diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 3af8ba86153a1..bf6e580ace8cf 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -7,3 +7,7 @@ e66bfa5dd32f93e76068c00ad882c1fc839c5af8
 100a741e7ab38c91d48cc929bb001afc8e09261f
 # whitespace: replace tabs => space
 b03e8ab9c7bd3e001add519571858fa04d6a249b
+# whitespace: replace 2-space => 4-space for indentation
+f1b567507731129f90ca0dffc8fbc0ed98b6a15d
+# whitespace: replace multiple spaces after period with a single space
+f942c29bb0d02cc24f19712c642ac72ffc85a26b
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000000000..2ad7fdc1efa0a
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    open-pull-requests-limit: 100
+    labels:
+      - "dependencies"
+      - "github-actions"
+      - "domain:ci"
diff --git a/.github/workflows/LabelCheck.yml b/.github/workflows/LabelCheck.yml
index 194b0c92065c9..c966e478e3fe0 100644
--- a/.github/workflows/LabelCheck.yml
+++ b/.github/workflows/LabelCheck.yml
@@ -11,9 +11,9 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 2
     steps:
-    - uses: yogevbd/enforce-label-action@2.2.2
+    - uses: yogevbd/enforce-label-action@a3c219da6b8fa73f6ba62b68ff09c469b3a1c024 # 2.2.2
       with:
         # REQUIRED_LABELS_ANY: "bug,enhancement,skip-changelog"
         # REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['bug','enhancement','skip-changelog']"
-        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,DO NOT MERGE"
-        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `DO NOT MERGE` labels"
+        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,needs decision,DO NOT MERGE,status:DO NOT MERGE"
+        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `status:DO NOT MERGE` labels"
diff --git a/.github/workflows/Typos.yml b/.github/workflows/Typos.yml
index da5a6a550abe8..6c9eeacc21800 100644
--- a/.github/workflows/Typos.yml
+++ b/.github/workflows/Typos.yml
@@ -11,11 +11,11 @@ jobs:
     timeout-minutes: 5
     steps:
       - name: Checkout the JuliaLang/julia repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           persist-credentials: false
       - name: Check spelling with typos
-        #uses: crate-ci/typos@master
+        #uses: crate-ci/typos@c7af4712eda24dd1ef54bd8212973888489eb0ce # v1.23.5
         env:
           GH_TOKEN: "${{ github.token }}"
         run: |
diff --git a/.github/workflows/Whitespace.yml b/.github/workflows/Whitespace.yml
new file mode 100644
index 0000000000000..5706f6148dc33
--- /dev/null
+++ b/.github/workflows/Whitespace.yml
@@ -0,0 +1,23 @@
+name: Whitespace
+
+permissions: {}
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  whitespace:
+    name: Check whitespace
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          persist-credentials: false
+      - name: Check whitespace
+        run: |
+          contrib/check-whitespace.jl
diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
index 751476865ae4c..4c9debb246f3f 100644
--- a/.github/workflows/cffconvert.yml
+++ b/.github/workflows/cffconvert.yml
@@ -23,11 +23,11 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out a copy of the repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           persist-credentials: false
 
       - name: Check whether the citation metadata from CITATION.cff is valid
-        uses: citation-file-format/cffconvert-github-action@2.0.0
+        uses: citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084 # 2.0.0
         with:
           args: "--validate"
diff --git a/.mailmap b/.mailmap
index e91501651d065..e278160d7381b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -284,9 +284,9 @@ Daniel Karrasch <Daniel.Karrasch@gmx.de> <daniel.karrasch@posteo.de>
 Roger Luo <rogerluo.rl18@gmail.com> <rogerluo.rl18@gmail.com>
 Roger Luo <rogerluo.rl18@gmail.com> <hiroger@qq.com>
 
-Frames Catherine White <me@oxinabox.net> <oxinabox@ucc.asn.au>
-Frames Catherine White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
-Frames Catherine White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
+Frames White <me@oxinabox.net> <oxinabox@ucc.asn.au>
+Frames White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
+Frames White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
 
 Claire Foster <aka.c42f@gmail.com> <chris42f@gmail.com>
 
@@ -295,3 +295,16 @@ Jishnu Bhattacharya <jishnub.github@gmail.com> <jishnub@users.noreply.github.com
 
 Shuhei Kadowaki <aviatesk@gmail.com> <aviatesk@gmail.com>
 Shuhei Kadowaki <aviatesk@gmail.com> <40514306+aviatesk@users.noreply.github.com>
+
+inky <git@wo-class.cn>
+inky <git@wo-class.cn> <inkydragon@users.noreply.github.com>
+
+Lilith Orion Hafner <lilithhafner@gmail.com> <Lilith.Hafner@gmail.com>
+Lilith Orion Hafner <lilithhafner@gmail.com> <60898866+LilithHafner@users.noreply.github.com>
+
+Timothy <git@tecosaur.net>
+
+Bhuminjay Soni <soni5happy@gmail.com>
+Bhuminjay Soni <soni5happy@gmail.com> <76656712+11happy@users.noreply.github.com>
+
+Florian Atteneder <florian.atteneder@gmail.com>
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e028cf486c0f2..fd7f1c89420d6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -164,7 +164,7 @@ To run doctests you need to run `make -C doc doctest=true` from the root directo
 
 #### News-worthy changes
 
-For new functionality and other substantial changes, add a brief summary to `NEWS.md`. The news item should cross reference the pull request (PR) parenthetically, in the form `([#pr])`. To add the PR reference number, first create the PR, then push an additional commit updating `NEWS.md` with the PR reference number.  We periodically run `./julia doc/NEWS-update.jl` from the julia directory to update the cross-reference links, but this should not be done in a typical PR in order to avoid conflicting commits.
+For new functionality and other substantial changes, add a brief summary to `NEWS.md`. The news item should cross reference the pull request (PR) parenthetically, in the form `([#pr])`. To add the PR reference number, first create the PR, then push an additional commit updating `NEWS.md` with the PR reference number. We periodically run `./julia doc/NEWS-update.jl` from the julia directory to update the cross-reference links, but this should not be done in a typical PR in order to avoid conflicting commits.
 
 #### Annotations for new features, deprecations and behavior changes
 
diff --git a/HISTORY.md b/HISTORY.md
index 3eaec3c6d0774..7fb01c7e9a0e9 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -59,7 +59,7 @@ Compiler/Runtime improvements
 Command-line option changes
 ---------------------------
 
-* The entry point for Julia has been standardized to `Main.main(ARGS)`. This must be explicitly opted into using the `@main` macro
+* The entry point for Julia has been standardized to `Main.main(Base.ARGS)`. This must be explicitly opted into using the `@main` macro
 (see the docstring for further details). When opted-in, and julia is invoked to run a script or expression
 (i.e. using `julia script.jl` or `julia -e expr`), julia will subsequently run the `Main.main` function automatically.
 This is intended to unify script and compilation workflows, where code loading may happen
@@ -68,16 +68,19 @@ difference between defining a `main` function and executing the code directly at
 * The `--compiled-modules` and `--pkgimages` flags can now be set to `existing`, which will
   cause Julia to consider loading existing cache files, but not to create new ones ([#50586]
   and [#52573]).
+* The `--project` argument now accepts `@script` to give a path to a directory with a Project.toml relative to the passed script file. `--project=@script/foo` for the `foo` subdirectory. If no path is given after (i.e. `--project=@script`) then (like `--project=@.`) the directory and its parents are searched for a Project.toml ([#50864] and [#53352])
 
 Multi-threading changes
 -----------------------
 
 * `Threads.@threads` now supports the `:greedy` scheduler, intended for non-uniform workloads ([#52096]).
-* A new exported struct `Lockable{T, L<:AbstractLock}` makes it easy to bundle a resource and its lock together ([#52898]).
+* A new public (but unexported) struct `Base.Lockable{T, L<:AbstractLock}` makes it easy to bundle a resource and its lock together ([#52898]).
 
 Build system changes
 --------------------
 
+* There is a new `Makefile` to build Julia and LLVM using the profile-guided and link-time optimizations (PGO and LTO) strategies, see `contrib/pgo-lto/Makefile` ([#45641]).
+
 New library functions
 ---------------------
 
@@ -86,7 +89,6 @@ New library functions
 * `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
 * `eachrsplit(string, pattern)` iterates split substrings right to left.
 * `Sys.username()` can be used to return the current user's username ([#51897]).
-* `wrap(Array, m::Union{MemoryRef{T}, Memory{T}}, dims)` is the safe counterpart to `unsafe_wrap` ([#52049]).
 * `GC.logging_enabled()` can be used to test whether GC logging has been enabled via `GC.enable_logging` ([#51647]).
 * `IdSet` is now exported from Base and considered public ([#53262]).
 
@@ -128,12 +130,6 @@ Standard library changes
 * The new `@styled_str` string macro provides a convenient way of creating a
   `AnnotatedString` with various faces or other attributes applied ([#49586]).
 
-#### JuliaSyntaxHighlighting
-
-* A new standard library for applying syntax highlighting to Julia code, this
-  uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function
-  that creates an `AnnotatedString` with syntax highlighting applied.
-
 #### Package Manager
 
 #### LinearAlgebra
@@ -420,6 +416,7 @@ Deprecated or removed
 [#44247]: https://github.com/JuliaLang/julia/issues/44247
 [#45164]: https://github.com/JuliaLang/julia/issues/45164
 [#45396]: https://github.com/JuliaLang/julia/issues/45396
+[#45641]: https://github.com/JuliaLang/julia/issues/45641
 [#45962]: https://github.com/JuliaLang/julia/issues/45962
 [#46196]: https://github.com/JuliaLang/julia/issues/46196
 [#46372]: https://github.com/JuliaLang/julia/issues/46372
@@ -1112,7 +1109,7 @@ Standard library changes
 * `lpad/rpad` are now defined in terms of `textwidth` ([#39044]).
 * `Test.@test` now accepts `broken` and `skip` boolean keyword arguments, which
   mimic `Test.@test_broken` and `Test.@test_skip` behavior, but allows skipping
-  tests failing only under certain conditions.  For example
+  tests failing only under certain conditions. For example
   ```julia
   if T == Float64
       @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
@@ -1536,7 +1533,7 @@ Standard library changes
 * The `Pkg.BinaryPlatforms` module has been moved into `Base` as `Base.BinaryPlatforms` and heavily reworked.
   Applications that want to be compatible with the old API should continue to import `Pkg.BinaryPlatforms`,
   however new users should use `Base.BinaryPlatforms` directly ([#37320]).
-* The `Pkg.Artifacts` module has been imported as a separate standard library.  It is still available as
+* The `Pkg.Artifacts` module has been imported as a separate standard library. It is still available as
   `Pkg.Artifacts`, however starting from Julia v1.6+, packages may import simply `Artifacts` without importing
   all of `Pkg` alongside ([#37320]).
 
@@ -1576,7 +1573,7 @@ Standard library changes
 * The `AbstractMenu` extension interface of `REPL.TerminalMenus` has been extensively
   overhauled. The new interface does not rely on global configuration variables, is more
   consistent in delegating printing of the navigation/selection markers, and provides
-  improved support for dynamic menus.  These changes are compatible with the previous
+  improved support for dynamic menus. These changes are compatible with the previous
   (deprecated) interface, so are non-breaking.
 
   The new API offers several enhancements:
@@ -1831,8 +1828,8 @@ New library functions
   `Base.Experimental.show_error_hints` from their `showerror` method ([#35094]).
 * The `@ccall` macro has been added to Base. It is a near drop-in replacement for `ccall` with more Julia-like syntax. It also wraps the new `foreigncall` API for varargs of different types, though it lacks the capability to specify an LLVM calling convention ([#32748]).
 * New functions `mergewith` and `mergewith!` supersede `merge` and `merge!` with `combine`
-  argument.  They don't have the restriction for `combine` to be a `Function` and also
-  provide one-argument method that returns a closure.  The old methods of `merge` and
+  argument. They don't have the restriction for `combine` to be a `Function` and also
+  provide one-argument method that returns a closure. The old methods of `merge` and
   `merge!` are still available for backward compatibility ([#34296]).
 * The new `isdisjoint` function indicates whether two collections are disjoint ([#34427]).
 * Add function `ismutable` and deprecate `isimmutable` to check whether something is mutable ([#34652]).
@@ -2712,7 +2709,7 @@ Standard Library Changes
 
 * The `Libdl` module's methods `dlopen()` and `dlsym()` have gained a
   `throw_error` keyword argument, replacing the now-deprecated `dlopen_e()`
-  and `dlsym_e()` methods.  When `throw_error` is `false`, failure to locate
+  and `dlsym_e()` methods. When `throw_error` is `false`, failure to locate
   a shared library or symbol will return `nothing` rather than `C_NULL`.
   ([#28888])
 
@@ -2974,7 +2971,7 @@ This section lists changes that do not have deprecation warnings.
     "Code Loading" and "Pkg" for documentation.
 
   * `replace(s::AbstractString, pat=>repl)` for function `repl` arguments formerly
-    passed a substring to `repl` in all cases.  It now passes substrings for
+    passed a substring to `repl` in all cases. It now passes substrings for
     string patterns `pat`, but a `Char` for character patterns (when `pat` is a
     `Char`, collection of `Char`, or a character predicate) ([#25815]).
 
@@ -3159,7 +3156,7 @@ This section lists changes that do not have deprecation warnings.
 
   * The logging system has been redesigned - `info` and `warn` are deprecated
     and replaced with the logging macros `@info`, `@warn`, `@debug` and
-    `@error`.  The `logging` function is also deprecated and replaced with
+    `@error`. The `logging` function is also deprecated and replaced with
     `AbstractLogger` and the functions from the new standard `Logging` library.
     ([#24490])
 
@@ -3315,7 +3312,7 @@ Library improvements
     For example, `x^-1` is now essentially a synonym for `inv(x)`, and works
     in a type-stable way even if `typeof(x) != typeof(inv(x))` ([#24240]).
 
-  * New `Iterators.reverse(itr)` for reverse-order iteration ([#24187]).  Iterator
+  * New `Iterators.reverse(itr)` for reverse-order iteration ([#24187]). Iterator
     types `T` can implement `start` etc. for `Iterators.Reverse{T}` to support this.
 
   * The functions `nextind` and `prevind` now accept `nchar` argument that indicates
@@ -3454,7 +3451,7 @@ Library improvements
     cartesian indices to linear indices using the normal indexing operation.
     ([#24715], [#26775]).
 
-  * `IdDict{K,V}` replaces `ObjectIdDict`.  It has type parameters
+  * `IdDict{K,V}` replaces `ObjectIdDict`. It has type parameters
     like other `AbstractDict` subtypes and its constructors mirror the
     ones of `Dict`. ([#25210])
 
@@ -3665,8 +3662,8 @@ Deprecated or removed
     should add offset axis support to the function `f` directly ([#26733]).
 
   * The functions `ones` and `zeros` used to accept any objects as dimensional arguments,
-    implicitly converting them to `Int`s.  This is now deprecated; only `Integer`s or
-    `AbstractUnitRange`s are accepted as arguments.  Instead, convert the arguments before
+    implicitly converting them to `Int`s. This is now deprecated; only `Integer`s or
+    `AbstractUnitRange`s are accepted as arguments. Instead, convert the arguments before
     calling `ones` or `zeros` ([#26733]).
 
   * The variadic `size(A, dim1, dim2, dims...)` method to return a tuple of multiple
@@ -4531,8 +4528,8 @@ Language changes
     Previously, this syntax parsed as an implicit multiplication ([#18690]).
 
   * For every binary operator `⨳`, `a .⨳ b` is now automatically equivalent to
-    the `broadcast` call `(⨳).(a, b)`.  Hence, one no longer defines methods
-    for `.*` etcetera.  This also means that "dot operations" automatically
+    the `broadcast` call `(⨳).(a, b)`. Hence, one no longer defines methods
+    for `.*` etcetera. This also means that "dot operations" automatically
     fuse into a single loop, along with other dot calls `f.(x)` ([#17623]).
     Similarly for unary operators ([#20249]).
 
@@ -4585,11 +4582,11 @@ This section lists changes that do not have deprecation warnings.
     or an array as a "scalar" ([#16986]).
 
   * `broadcast` now produces a `BitArray` instead of `Array{Bool}` for
-    functions yielding a boolean result.  If you want `Array{Bool}`, use
+    functions yielding a boolean result. If you want `Array{Bool}`, use
     `broadcast!` or `.=` ([#17623]).
 
   * Broadcast `A[I...] .= X` with entirely scalar indices `I` is deprecated as
-    its behavior will change in the future.  Use `A[I...] = X` instead.
+    its behavior will change in the future. Use `A[I...] = X` instead.
 
   * Operations like `.+` and `.*` on `Range` objects are now generic
     `broadcast` calls (see [above](#language-changes)) and produce an `Array`.
@@ -4635,7 +4632,7 @@ This section lists changes that do not have deprecation warnings.
     now tab-completes to U+03B5 (greek small letter epsilon) ([#19464]).
 
   * `retry` now inputs the keyword arguments `delays` and `check` instead of
-    `n` and `max_delay`.  The previous functionality can be achieved setting
+    `n` and `max_delay`. The previous functionality can be achieved setting
     `delays` to `ExponentialBackOff` ([#19331]).
 
   * `transpose(::AbstractVector)` now always returns a `RowVector` view of the input (which is a
@@ -4676,7 +4673,7 @@ This section lists changes that do not have deprecation warnings.
       using the values and types of `a` and `step` as given, whereas
       `range(a, step, len)` will attempt to match inputs `a::FloatNN`
       and `step::FloatNN` to rationals and construct a `StepRangeLen`
-      that internally uses twice-precision arithmetic.  These two
+      that internally uses twice-precision arithmetic. These two
       outcomes exhibit differences in both precision and speed.
 
   * `A=>B` expressions are now parsed as calls instead of using `=>` as the
@@ -4696,7 +4693,7 @@ This section lists changes that do not have deprecation warnings.
     trigamma, and polygamma special functions have been moved from Base to
     the
     [SpecialFunctions.jl package](https://github.com/JuliaMath/SpecialFunctions.jl)
-    ([#20427]).  Note that `airy`, `airyx` and `airyprime` have been deprecated
+    ([#20427]). Note that `airy`, `airyx` and `airyprime` have been deprecated
     in favor of more specific functions (`airyai`, `airybi`, `airyaiprime`,
     `airybiprimex`, `airyaix`, `airybix`, `airyaiprimex`, `airybiprimex`)
     ([#18050]).
@@ -4781,7 +4778,7 @@ Library improvements
     for more information.
 
   * The default color for info messages has been changed from blue to cyan
-    ([#18442]), and for warning messages from red to yellow ([#18453]).  This
+    ([#18442]), and for warning messages from red to yellow ([#18453]). This
     can be changed back to the original colors by setting the environment
     variables `JULIA_INFO_COLOR` to `"blue"` and `JULIA_WARN_COLOR` to `"red"`.
 
@@ -5125,10 +5122,10 @@ New language features
   * Function return type syntax `function f()::T` has been added ([#1090]). Values returned
     from a function with such a declaration will be converted to the specified type `T`.
 
-  * Many more operators now support `.` prefixes (e.g. `.≤`) ([#17393]).  However,
+  * Many more operators now support `.` prefixes (e.g. `.≤`) ([#17393]). However,
     users are discouraged from overloading these, since they are mainly parsed
     in order to implement backwards compatibility with planned automatic
-    broadcasting of dot operators in Julia 0.6 ([#16285]).  Explicitly qualified
+    broadcasting of dot operators in Julia 0.6 ([#16285]). Explicitly qualified
     operator names like `Base.≤` should now use `Base.:≤` (prefixed by `@compat`
     if you need 0.4 compatibility via the `Compat` package).
 
@@ -5261,7 +5258,7 @@ Library improvements
   * Strings ([#16107]):
 
     * The `UTF8String` and `ASCIIString` types have been merged into a single
-      `String` type ([#16058]).  Use `isascii(s)` to check whether
+      `String` type ([#16058]). Use `isascii(s)` to check whether
       a string contains only ASCII characters. The `ascii(s)` function now
       converts `s` to `String`, raising an `ArgumentError` exception if `s` is
       not pure ASCII.
@@ -5583,7 +5580,7 @@ New language features
 
   * Function call overloading: for arbitrary objects `x` (not of type
     `Function`), `x(...)` is transformed into `call(x, ...)`, and `call`
-    can be overloaded as desired.  Constructors are now a special case of
+    can be overloaded as desired. Constructors are now a special case of
     this mechanism, which allows e.g. constructors for abstract types.
     `T(...)` falls back to `convert(T, x)`, so all `convert` methods implicitly
     define a constructor ([#8712], [#2403]).
@@ -5611,13 +5608,13 @@ New language features
     `~/.julia/lib/v0.4` ([#8745]).
 
       * See manual section on `Module initialization and precompilation` (under `Modules`) for
-        details and errata.  In particular, to be safely precompilable a module may need an
+        details and errata. In particular, to be safely precompilable a module may need an
         `__init__` function to separate code that must be executed at runtime rather than precompile
-        time.  Modules that are *not* precompilable should call `__precompile__(false)`.
+        time. Modules that are *not* precompilable should call `__precompile__(false)`.
 
       * The precompiled `.ji` file includes a list of dependencies (modules and files that
         were imported/included at precompile-time), and the module is automatically recompiled
-        upon `import` when any of its dependencies have changed.  Explicit dependencies
+        upon `import` when any of its dependencies have changed. Explicit dependencies
         on other files can be declared with `include_dependency(path)` ([#12458]).
 
       * New option `--output-incremental={yes|no}` added to invoke the equivalent of `Base.compilecache`
@@ -5821,7 +5818,7 @@ Library improvements
     * New `vecdot` function, analogous to `vecnorm`, for Euclidean inner products over any iterable container ([#11067]).
 
     * `p = plan_fft(x)` and similar functions now return a `Base.DFT.Plan` object, rather
-    than an anonymous function.  Calling it via `p(x)` is deprecated in favor of
+    than an anonymous function. Calling it via `p(x)` is deprecated in favor of
     `p * x` or `p \ x` (for the inverse), and it can also be used with `A_mul_B!`
     to employ pre-allocated output arrays ([#12087]).
 
@@ -6311,7 +6308,7 @@ Library improvements
     * New string type, `UTF16String` ([#4930]), constructed by
       `utf16(s)` from another string, a `Uint16` array or pointer, or
       a byte array (possibly prefixed by a byte-order marker to
-      indicate endian-ness).  Its data is internally `NULL`-terminated
+      indicate endian-ness). Its data is internally `NULL`-terminated
       for passing to C ([#7016]).
 
     * `CharString` is renamed to `UTF32String` ([#4943]), and its data
@@ -6346,7 +6343,7 @@ Library improvements
 
       * New `vecnorm(itr, p=2)` function that computes the norm of
         any iterable collection of numbers as if it were a vector of
-        the same length.  This generalizes and replaces `normfro` ([#6057]),
+        the same length. This generalizes and replaces `normfro` ([#6057]),
         and `norm` is now type-stable ([#6056]).
 
       * New `UniformScaling` matrix type and identity `I` constant ([#5810]).
diff --git a/LICENSE.md b/LICENSE.md
index d4125f4fba221..da8b6920491cc 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2009-2023: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+Copyright (c) 2009-2024: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/Make.inc b/Make.inc
index 5d58454c80333..f078a0c84f806 100644
--- a/Make.inc
+++ b/Make.inc
@@ -92,6 +92,9 @@ WITH_DTRACE := 0
 # Enable ITTAPI integration
 WITH_ITTAPI := 0
 
+# Enable NVTX integration
+WITH_NVTX := 0
+
 # Enable Tracy support
 WITH_TRACY := 0
 WITH_TRACY_CALLSTACKS := 0
@@ -482,7 +485,7 @@ endif
 
 FC := $(CROSS_COMPILE)gfortran
 
-# Note: Supporting only macOS Yosemite and above
+# Note: Supporting only macOS Mojave and above
 ifeq ($(OS), Darwin)
 APPLE_ARCH := $(shell uname -m)
 ifneq ($(APPLE_ARCH),arm64)
@@ -492,7 +495,7 @@ MACOSX_VERSION_MIN := 11.0
 endif
 endif
 
-JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
+JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -Wformat -Wformat-security
 JCFLAGS_CLANG     := $(JCFLAGS_COMMON)
 JCFLAGS_GCC       := $(JCFLAGS_COMMON) -fno-gnu-unique
 
@@ -501,7 +504,7 @@ JCPPFLAGS_COMMON  := -fasynchronous-unwind-tables
 JCPPFLAGS_CLANG   := $(JCPPFLAGS_COMMON) -mllvm -enable-tail-merge=0
 JCPPFLAGS_GCC     := $(JCPPFLAGS_COMMON) -fno-tree-tail-merge
 
-JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++17
+JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++17 -Wformat -Wformat-security
 JCXXFLAGS_CLANG   := $(JCXXFLAGS_COMMON) -pedantic
 JCXXFLAGS_GCC     := $(JCXXFLAGS_COMMON) -fno-gnu-unique
 
@@ -513,6 +516,11 @@ SHIPFLAGS_COMMON  := -O3
 SHIPFLAGS_CLANG   := $(SHIPFLAGS_COMMON) -g
 SHIPFLAGS_GCC     := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions
 
+BOLT_LDFLAGS :=
+
+BOLT_CFLAGS_GCC    :=
+BOLT_CFLAGS_CLANG  :=
+
 ifeq ($(OS), Darwin)
 JCPPFLAGS_CLANG   += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
 endif
@@ -529,7 +537,8 @@ JCFLAGS    := $(JCFLAGS_GCC)
 JCPPFLAGS  := $(JCPPFLAGS_GCC)
 JCXXFLAGS  := $(JCXXFLAGS_GCC)
 DEBUGFLAGS := $(DEBUGFLAGS_GCC)
-SHIPFLAGS  := $(SHIPFLAGS_GCC)
+SHIPFLAGS  := $(SHIPFLAGS_GCC) $(BOLT_CFLAGS_GCC)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_GCC)
 endif
 
 ifeq ($(USECLANG),1)
@@ -539,7 +548,8 @@ JCFLAGS    := $(JCFLAGS_CLANG)
 JCPPFLAGS  := $(JCPPFLAGS_CLANG)
 JCXXFLAGS  := $(JCXXFLAGS_CLANG)
 DEBUGFLAGS := $(DEBUGFLAGS_CLANG)
-SHIPFLAGS  := $(SHIPFLAGS_CLANG)
+SHIPFLAGS  := $(SHIPFLAGS_CLANG) $(BOLT_CFLAGS_CLANG)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_CLANG)
 
 ifeq ($(OS), Darwin)
 CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
@@ -550,7 +560,17 @@ export MACOSX_DEPLOYMENT_TARGET=$(MACOSX_VERSION_MIN)
 endif
 endif
 
-JLDFLAGS :=
+# Conditional setting of RELRO flag for enhanced security on Linux builds.
+# RELRO (Read-Only Relocations) is a security feature that marks certain sections
+# of the binary as read-only to prevent exploitation techniques like
+# GOT (Global Offset Table) overwriting attacks.
+ifeq ($(OS),Linux)
+    RELRO_FLAG := -Wl,-z,relro
+else
+    RELRO_FLAG :=
+endif
+
+JLDFLAGS := $(RELRO_FLAG)
 
 ifeq ($(USECCACHE), 1)
 # Expand CC, CXX and FC here already because we want the original definition and not the ccache version.
@@ -670,7 +690,7 @@ JL_MAJOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR)
 endif
 endif
 
-ifeq ($(OS), FreeBSD)
+ifneq ($(findstring $(OS),FreeBSD OpenBSD),)
 LOCALBASE ?= /usr/local
 else
 LOCALBASE ?= /usr
@@ -726,7 +746,7 @@ SANITIZE_LDFLAGS :=
 ifeq ($(SANITIZE_MEMORY),1)
 SANITIZE_OPTS += -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer
 SANITIZE_LDFLAGS += $(SANITIZE_OPTS)
-ifneq ($(findstring $(OS),Linux FreeBSD),)
+ifneq ($(findstring $(OS),Linux FreeBSD OpenBSD),)
 SANITIZE_LDFLAGS += -Wl,--warn-unresolved-symbols
 endif # OS Linux or FreeBSD
 endif # SANITIZE_MEMORY=1
@@ -798,6 +818,11 @@ JCXXFLAGS += -DUSE_TIMING_COUNTS
 JCFLAGS += -DUSE_TIMING_COUNTS
 endif
 
+ifeq ($(WITH_NVTX), 1)
+JCXXFLAGS += -DUSE_NVTX
+JCFLAGS += -DUSE_NVTX
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -930,6 +955,15 @@ BINARY:=64
 MARCH=
 endif
 
+# Allow Clang to use CRC instructions (only applicable on AArch64)
+ifneq (,$(findstring aarch64,$(ARCH)))
+ifeq ($(USECLANG),1)
+ifeq (,$(MARCH))
+JCFLAGS += -mcrc
+endif
+endif
+endif
+
 # If we are running on powerpc64 or ppc64, fail out dramatically
 ifneq (,$(filter $(ARCH), powerpc64 ppc64))
 $(error Big-endian PPC64 is not supported, to ignore this error, set ARCH=ppc64le)
@@ -1061,21 +1095,14 @@ LIBUNWIND:=
 else ifneq ($(DISABLE_LIBUNWIND), 0)
 LIBUNWIND:=
 else
-ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
-ifneq ($(OS),Darwin)
 LIBUNWIND:=-lunwind
-# Only for linux since we want to use not yet released libunwind features
+ifneq ($(findstring $(OS),Darwin OpenBSD),)
+JCPPFLAGS+=-DLLVMLIBUNWIND
+else ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
+# Only for linux and freebsd since we want to use not yet released gnu libunwind features
 JCFLAGS+=-DSYSTEM_LIBUNWIND
 JCPPFLAGS+=-DSYSTEM_LIBUNWIND
 endif
-else
-ifeq ($(OS),Darwin)
-LIBUNWIND:=-lunwind
-JCPPFLAGS+=-DLLVMLIBUNWIND
-else
-LIBUNWIND:=-lunwind
-endif
-endif
 endif
 
 ifeq ($(origin LLVM_CONFIG), undefined)
@@ -1268,7 +1295,7 @@ CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.33|GLIBCXX_3\.5\.|GLIBCXX_4\.
 # Note: we explicitly _do not_ define `CSL` here, since it requires some more
 # advanced techniques to decide whether it should be installed from a BB source
 # or not.  See `deps/csl.mk` for more detail.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT BOLT
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1374,12 +1401,32 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 # make it loaded first to
 # prevent from linking to outdated system libs.
 # See #21788
+# TODO: Determine whether the condition here on AArch64 (added in #55089) should actually
+# be `ifneq ($(USE_BINARYBUILDER),0)`. We vendor a correctly versioned libgcc_s when using
+# BinaryBuilder which we want to link in early as noted above, but it could be the case
+# that without BinaryBuilder, regardless of architecture, we need to delay linking libgcc_s
+# to avoid getting the system one.
+ifeq (,$(findstring aarch64,$(ARCH)))
 OSLIBS += -lgcc_s
+endif
 
-OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
+OSLIBS += -Wl,--export-dynamic -Wl,--undefined-version -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
 	$(NO_WHOLE_ARCHIVE)
 endif
 
+ifeq ($(OS), OpenBSD)
+JLDFLAGS += -Wl,--Bdynamic
+ifneq ($(SANITIZE),1)
+JLDFLAGS += -Wl,-no-undefined
+endif
+
+JLIBLDFLAGS += -Wl,-Bsymbolic-functions
+
+OSLIBS += -Wl,--no-as-needed -lpthread -lm -lc++abi -lc
+OSLIBS += -Wl,--whole-archive -lcompiler_rt -Wl,--no-whole-archive
+OSLIBS += -Wl,--export-dynamic,--as-needed,--version-script=$(BUILDROOT)/src/julia.expmap
+endif
+
 ifeq ($(OS), Darwin)
 SHLIB_EXT := dylib
 OSLIBS += -framework CoreFoundation
@@ -1393,7 +1440,7 @@ ifeq ($(OS), WINNT)
 HAVE_SSP := 1
 OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
 	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic -lole32
-JLDFLAGS += -Wl,--stack,8388608
+JLDFLAGS += -Wl,--stack,8388608 --disable-auto-import --disable-runtime-pseudo-reloc
 ifeq ($(ARCH),i686)
 JLDFLAGS += -Wl,--large-address-aware
 endif
@@ -1487,7 +1534,7 @@ endif
 CLANGSA_FLAGS :=
 CLANGSA_CXXFLAGS :=
 ifeq ($(OS), Darwin) # on new XCode, the files are hidden
-CLANGSA_FLAGS += -isysroot $(shell xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+ CLANGSA_FLAGS += -isysroot $(shell xcrun --show-sdk-path -sdk macosx)
 endif
 ifeq ($(USEGCC),1)
 # try to help clang find the c++ files for CC by guessing the value for --prefix
diff --git a/Makefile b/Makefile
index 89fe35b470094..735d342a79eb5 100644
--- a/Makefile
+++ b/Makefile
@@ -61,6 +61,10 @@ $(foreach link,base $(JULIAHOME)/test,$(eval $(call symlink_target,$(link),$$(bu
 julia_flisp.boot.inc.phony: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src julia_flisp.boot.inc.phony
 
+# Build the HTML docs (skipped if already exists, notably in tarballs)
+$(BUILDROOT)/doc/_build/html/en/index.html: $(shell find $(BUILDROOT)/base $(BUILDROOT)/doc \( -path $(BUILDROOT)/doc/_build -o -path $(BUILDROOT)/doc/deps -o -name *_constants.jl -o -name *_h.jl -o -name version_git.jl \) -prune -o -type f -print)
+	@$(MAKE) docs
+
 julia-symlink: julia-cli-$(JULIA_BUILD_MODE)
 ifeq ($(OS),WINNT)
 	echo '@"%~dp0/'"$$(echo '$(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE))')"'" %*' | tr / '\\' > $(BUILDROOT)/julia.bat
@@ -112,7 +116,7 @@ julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink j
                                       julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest julia-base-cache
 
 stdlibs-cache-release stdlibs-cache-debug : stdlibs-cache-% : julia-%
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk all-$*
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk $*
 
 debug release : % : julia-% stdlibs-cache-%
 
@@ -202,8 +206,13 @@ JL_PRIVATE_LIBS-0 += libjulia-internal libjulia-codegen
 else ifeq ($(JULIA_BUILD_MODE),debug)
 JL_PRIVATE_LIBS-0 += libjulia-internal-debug libjulia-codegen-debug
 endif
+# BSD-3-Clause
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libcamd libccolamd libcolamd libsuitesparseconfig
+# LGPL-2.1+
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libbtf libklu libldl
 ifeq ($(USE_GPL_LIBS), 1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
+# GPL-2.0+
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libcholmod librbio libspqr libumfpack
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBBLASTRAMPOLINE) += libblastrampoline
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
@@ -272,7 +281,7 @@ define stringreplace
 endef
 
 
-install: $(build_depsbindir)/stringreplace docs
+install: $(build_depsbindir)/stringreplace $(BUILDROOT)/doc/_build/html/en/index.html
 	@$(MAKE) $(QUIET_MAKE) $(JULIA_BUILD_MODE)
 	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(private_libexecdir); do \
 		mkdir -p $(DESTDIR)$$subdir; \
@@ -289,6 +298,7 @@ else ifeq ($(JULIA_BUILD_MODE),debug)
 	-$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/
 endif
 	-$(INSTALL_M) $(wildcard $(build_private_libdir)/*.a) $(DESTDIR)$(private_libdir)/
+	-rm -f $(DESTDIR)$(private_libdir)/sys-o.a
 
 	# We have a single exception; we want 7z.dll to live in private_libexecdir,
 	# not bindir, so that 7z.exe can find it.
@@ -372,6 +382,11 @@ endif
 	cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base
 	cp -R -L $(JULIAHOME)/test/* $(DESTDIR)$(datarootdir)/julia/test
 	cp -R -L $(build_datarootdir)/julia/* $(DESTDIR)$(datarootdir)/julia
+
+	# Set .jl sources as read-only to match package directories
+	find $(DESTDIR)$(datarootdir)/julia/base -type f -name \*.jl -exec chmod 0444 '{}' \;
+	find $(DESTDIR)$(datarootdir)/julia/test -type f -name \*.jl -exec chmod 0444 '{}' \;
+
 	# Copy documentation
 	cp -R -L $(BUILDROOT)/doc/_build/html $(DESTDIR)$(docdir)/
 	# Remove various files which should not be installed
@@ -391,8 +406,12 @@ endif
 	mkdir -p $(DESTDIR)$(datarootdir)/applications/
 	$(INSTALL_F) $(JULIAHOME)/contrib/julia.desktop $(DESTDIR)$(datarootdir)/applications/
 	# Install appdata file
-	mkdir -p $(DESTDIR)$(datarootdir)/appdata/
-	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/appdata/
+	mkdir -p $(DESTDIR)$(datarootdir)/metainfo/
+	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/metainfo/
+	# Install terminal info database
+ifneq ($(WITH_TERMINFO),0)
+	cp -R -L $(build_datarootdir)/terminfo $(DESTDIR)$(datarootdir)
+endif
 
 	# Update RPATH entries and JL_SYSTEM_IMAGE_PATH if $(private_libdir_rel) != $(build_private_libdir_rel)
 ifneq ($(private_libdir_rel),$(build_private_libdir_rel))
@@ -449,7 +468,6 @@ endif
 ifeq ($(OS), Linux)
 	-$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
 endif
-
 ifneq ($(LOADER_BUILD_DEP_LIBS),$(LOADER_INSTALL_DEP_LIBS))
 	# Next, overwrite relative path to libjulia-internal in our loader if $$(LOADER_BUILD_DEP_LIBS) != $$(LOADER_INSTALL_DEP_LIBS)
 ifeq ($(JULIA_BUILD_MODE),release)
@@ -529,7 +547,7 @@ app:
 darwinframework:
 	$(MAKE) -C $(JULIAHOME)/contrib/mac/framework
 
-light-source-dist.tmp: docs
+light-source-dist.tmp: $(BUILDROOT)/doc/_build/html/en/index.html
 ifneq ($(BUILDROOT),$(JULIAHOME))
 	$(error make light-source-dist does not work in out-of-tree builds)
 endif
@@ -587,6 +605,7 @@ clean: | $(CLEAN_TARGETS)
 	@-$(MAKE) -C $(BUILDROOT)/cli clean
 	@-$(MAKE) -C $(BUILDROOT)/test clean
 	@-$(MAKE) -C $(BUILDROOT)/stdlib clean
+	@-$(MAKE) -C $(BUILDROOT) -f pkgimage.mk clean
 	-rm -f $(BUILDROOT)/julia
 	-rm -f $(BUILDROOT)/*.tar.gz
 	-rm -f $(build_depsbindir)/stringreplace \
@@ -651,7 +670,7 @@ win-extras:
 ifeq ($(USE_SYSTEM_LLVM), 1)
 LLVM_SIZE := llvm-size$(EXE)
 else
-LLVM_SIZE := $(build_depsbindir)/llvm-size$(EXE)
+LLVM_SIZE := PATH=$(build_bindir):$$PATH; $(build_depsbindir)/llvm-size$(EXE)
 endif
 build-stats:
 ifeq ($(USE_BINARYBUILDER_LLVM),1)
@@ -660,7 +679,14 @@ endif
 	@printf $(JULCOLOR)' ==> ./julia binary sizes\n'$(ENDCOLOR)
 	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_private_libdir)/sys.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_shlibdir)/libjulia.$(SHLIB_EXT)) \
+		$(call cygpath_w,$(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)) \
+		$(call cygpath_w,$(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_bindir)/julia$(EXE)))
+ifeq ($(OS),Darwin)
+	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_shlibdir)/libLLVM.$(SHLIB_EXT)))
+else
+	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_shlibdir)/$(LLVM_SHARED_LIB_NAME).$(SHLIB_EXT)))
+endif
 	@printf $(JULCOLOR)' ==> ./julia launch speedtest\n'$(ENDCOLOR)
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
diff --git a/NEWS.md b/NEWS.md
index 4d312c28ce34e..c12cc3c64300c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,18 @@ Julia v1.12 Release Notes
 New language features
 ---------------------
 
+- A new keyword argument `usings::Bool` has been added to `names`. By using this, we can now
+  find all the names available in module `A` by `names(A; all=true, imported=true, usings=true)`. ([#54609])
+- the `@atomic(...)` macro family supports now the reference assignment syntax, e.g.
+  `@atomic :monotonic v[3] += 4` modifies `v[3]` atomically with monotonic ordering semantics. ([#54707])
+  The supported syntax allows
+  - atomic fetch (`x = @atomic v[3]`),
+  - atomic set (`@atomic v[3] = 4`),
+  - atomic modify (`@atomic v[3] += 2`),
+  - atomic set once (`@atomiconce v[3] = 2`),
+  - atomic swap (`x = @atomicswap v[3] = 2`), and
+  - atomic replace (`x = @atomicreplace v[3] 2=>5`).
+
 Language changes
 ----------------
 
@@ -17,14 +29,33 @@ Language changes
    may pave the way for inference to be able to intelligently re-use the old
    results, once the new method is deleted. ([#53415])
 
+ - Macro expansion will no longer eagerly recurse into into `Expr(:toplevel)`
+   expressions returned from macros. Instead, macro expansion of `:toplevel`
+   expressions will be delayed until evaluation time. This allows a later
+   expression within a given `:toplevel` expression to make use of macros
+   defined earlier in the same `:toplevel` expression. ([#53515])
+
 Compiler/Runtime improvements
 -----------------------------
 
+- Generated LLVM IR now uses actual pointer types instead of passing pointers as integers.
+  This affects `llvmcall`: Inline LLVM IR should be updated to use `i8*` or `ptr` instead of
+  `i32` or `i64`, and remove unneeded `ptrtoint`/`inttoptr` conversions. For compatibility,
+  IR with integer pointers is still supported, but generates a deprecation warning. ([#53687])
+
+- A new exception `FieldError` is now introduced to raise/handle `getfield` exceptions. Previously `getfield` exception was captured by fallback generic exception `ErrorException`. Now that `FieldError` is more specific `getfield` related exceptions that can occur should use `FieldError` exception instead. ([#54504])
+
 Command-line option changes
 ---------------------------
 
 * The `-m/--module` flag can be passed to run the `main` function inside a package with a set of arguments.
   This `main` function should be declared using `@main` to indicate that it is an entry point.
+* Enabling or disabling color text in Julia can now be controlled with the
+[`NO_COLOR`](https://no-color.org/) or [`FORCE_COLOR`](https://force-color.org/) environment
+variables. ([#53742]).
+* `--project=@temp` starts Julia with a temporary environment.
+* New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took
+  to compile, in ms. ([#54662])
 
 Multi-threading changes
 -----------------------
@@ -32,39 +63,99 @@ Multi-threading changes
 Build system changes
 --------------------
 
+* There are new `Makefile`s to build Julia and LLVM using the Binary Optimization and Layout Tool (BOLT), see  `contrib/bolt` and `contrib/pgo-lto-bolt` ([#54107]).
+
 New library functions
 ---------------------
 
 * `logrange(start, stop; length)` makes a range of constant ratio, instead of constant step ([#39071])
 * The new `isfull(c::Channel)` function can be used to check if `put!(c, some_value)` will block. ([#53159])
+* `waitany(tasks; throw=false)` and `waitall(tasks; failfast=false, throw=false)` which wait multiple tasks at once ([#53341]).
+* `uuid7()` creates an RFC 9652 compliant UUID with version 7 ([#54834]).
+* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims`
+* The new `Fix` type is a generalization of `Fix1/Fix2` for fixing a single argument ([#54653]).
 
 New library features
 --------------------
 
+* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]).
+* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]).
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]).
+* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]).
+* New function `Docs.undocumented_names(module)` returns a module's undocumented public names ([#52413]).
+* Passing an `IOBuffer` as a stdout argument for `Process` spawn now works as
+  expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is
+  no longer required there for correctness to avoid data races ([#52461]).
+* After a process exits, `closewrite` will no longer be automatically called on
+  the stream passed to it. Call `wait` on the process instead to ensure the
+  content is fully written, then call `closewrite` manually to avoid
+  data-races. Or use the callback form of `open` to have all that handled
+  automatically.
+* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
+* `escape_string` takes additional keyword arguments `ascii=true` (to escape all
+  non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers
+  for u/U escapes, e.g. for C compatibility) [#55099]).
+* `filter` can now act on a `NamedTuple` ([#50795]).
 * `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in
   the uniquing checking ([#53474])
+* `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988])
+* `Lockable` is now exported ([#54595])
+* New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351])
 
 Standard library changes
 ------------------------
 
+* `gcdx(0, 0)` now returns `(0, 0, 0)` instead of `(0, 1, 0)` ([#40989]).
+* `fd` returns a `RawFD` instead of an `Int` ([#55080]).
+
 #### StyledStrings
 
 #### JuliaSyntaxHighlighting
 
+* A new standard library for applying syntax highlighting to Julia code, this
+  uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function
+  that creates an `AnnotatedString` with syntax highlighting applied.
+
 #### Package Manager
 
 #### LinearAlgebra
 
+* `rank` can now take a `QRPivoted` matrix to allow rank estimation via QR factorization ([#54283]).
+* Added keyword argument `alg` to `eigen`, `eigen!`, `eigvals` and `eigvals!` for self-adjoint
+  matrix types (i.e., the type union `RealHermSymComplexHerm`) that allows one to switch
+  between different eigendecomposition algorithms ([#49355]).
+* Added a generic version of the (unblocked) pivoted Cholesky decomposition
+  (callable via `cholesky[!](A, RowMaximum())`) ([#54619]).
+* The number of default BLAS threads now respects process affinity, instead of
+  using total number of logical threads available on the system ([#55574]).
+
 #### Logging
 
 #### Printf
 
 #### Profile
 
+* `Profile.take_heap_snapshot` takes a new keyword argument, `redact_data::Bool`,
+  that is `true` by default. When set, the contents of Julia objects are not emitted
+  in the heap snapshot. This currently only applies to strings. ([#55326])
+* `Profile.print()` now colors Base/Core/Package modules similarly to how they are in stacktraces.
+  Also paths, even if truncated, are now clickable in terminals that support URI links
+  to take you to the specified `JULIA_EDITOR` for the given file & line number. ([#55335])
+
 #### Random
 
 #### REPL
 
+- Using the new `usings=true` feature of the `names()` function, REPL completions can now
+  complete names that have been explicitly `using`-ed. ([#54610])
+- REPL completions can now complete input lines like `[import|using] Mod: xxx|` e.g.
+  complete `using Base.Experimental: @op` to `using Base.Experimental: @opaque`. ([#54719])
+- the REPL will now warn if it detects a name is being accessed from a module which does not define it (nor has a submodule which defines it),
+  and for which the name is not public in that module. For example, `map` is defined in Base, and executing `LinearAlgebra.map`
+  in the REPL will now issue a warning the first time occurs. ([#54872])
+
 #### SuiteSparse
 
 #### SparseArrays
@@ -89,6 +180,10 @@ Deprecated or removed
 External dependencies
 ---------------------
 
+- The terminal info database, `terminfo`, is now vendored by default, providing a better
+  REPL user experience when `terminfo` is not available on the system. Julia can be built
+  without vendoring the database using the Makefile option `WITH_TERMINFO=0`. ([#55411])
+
 Tooling Improvements
 --------------------
 
diff --git a/README.md b/README.md
index c5c4f3f2730ac..465adcf049922 100644
--- a/README.md
+++ b/README.md
@@ -35,8 +35,8 @@
 ## The Julia Language
 
 Julia is a high-level, high-performance dynamic language for technical
-computing.  The main homepage for Julia can be found at
-[julialang.org](https://julialang.org/).  This is the GitHub
+computing. The main homepage for Julia can be found at
+[julialang.org](https://julialang.org/). This is the GitHub
 repository of Julia source code, including instructions for compiling
 and installing Julia, below.
 
@@ -57,10 +57,8 @@ New developers may find the notes in
 [CONTRIBUTING](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md)
 helpful to start contributing to the Julia codebase.
 
-### External Resources
+### Learning Julia
 
-- [**StackOverflow**](https://stackoverflow.com/questions/tagged/julia-lang)
-- [**Twitter**](https://twitter.com/JuliaLanguage)
 - [**Learning resources**](https://julialang.org/learning/)
 
 ## Binary Installation
@@ -74,7 +72,7 @@ for OS and platform combinations.
 
 If everything works correctly, you will see a Julia banner and an
 interactive prompt into which you can enter expressions for
-evaluation.  You can read about [getting
+evaluation. You can read about [getting
 started](https://docs.julialang.org/en/v1/manual/getting-started/) in the manual.
 
 **Note**: Although some OS package managers provide Julia, such
@@ -94,7 +92,7 @@ and then use the command prompt to change into the resulting julia directory. By
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by running:
 
-    git checkout v1.10.0
+    git checkout v1.10.5
 
 To build the `julia` executable, run `make` from within the julia directory.
 
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
index 89d1ce3de3d97..30f53727c50ab 100644
--- a/THIRDPARTY.md
+++ b/THIRDPARTY.md
@@ -42,7 +42,19 @@ Julia's `stdlib` uses the following external libraries, which have their own lic
 - [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
 - [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
 - [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of BSD-3-Clause, LGPL2.1+ and GPL2+; see individual module licenses]
+  - [`libamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/AMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libcamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libccolamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CCOLAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libcolamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/COLAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libsuitesparseconfig`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/SuiteSparse_config/README.txt) [BSD-3-Clause]
+  - [`libbtf`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/BTF/Doc/License.txt) [LGPL-2.1+]
+  - [`libklu`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/KLU/Doc/License.txt) [LGPL-2.1+]
+  - [`libldl`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/LDL/Doc/License.txt) [LGPL-2.1+]
+  - [`libcholmod`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CHOLMOD/Doc/License.txt) [LGPL-2.1+ and GPL-2.0+]
+  - [`librbio`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/RBio/Doc/License.txt) [GPL-2.0+]
+  - [`libspqr`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/SPQR/Doc/License.txt) [GPL-2.0+]
+  - [`libumfpack`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/UMFPACK/Doc/License.txt) [GPL-2.0+]
 - [LIBBLASTRAMPOLINE](https://github.com/staticfloat/libblastrampoline/blob/main/LICENSE) [MIT]
 - [NGHTTP2](https://github.com/nghttp2/nghttp2/blob/master/COPYING) [MIT]
 
diff --git a/base/Base.jl b/base/Base.jl
index 30bc0b91411b8..10a8dd1532f92 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -53,6 +53,9 @@ function setproperty!(x, f::Symbol, v)
     return setfield!(x, f, val)
 end
 
+typeof(function getproperty end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+typeof(function setproperty! end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+
 dotgetproperty(x, f) = getproperty(x, f)
 
 getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
@@ -152,7 +155,8 @@ end
 """
     time_ns() -> UInt64
 
-Get the time in nanoseconds. The time corresponding to 0 is undefined, and wraps every 5.8 years.
+Get the time in nanoseconds relative to some arbitrary time in the past. The primary use is for measuring the elapsed time
+between two moments in time.
 """
 time_ns() = ccall(:jl_hrtime, UInt64, ())
 
@@ -223,7 +227,7 @@ delete_method(which(Pair{Any,Any}, (Any, Any)))
 end
 
 # The REPL stdlib hooks into Base using this Ref
-const REPL_MODULE_REF = Ref{Module}()
+const REPL_MODULE_REF = Ref{Module}(Base)
 
 include("checked.jl")
 using .Checked
@@ -264,8 +268,28 @@ function strcat(x::String, y::String)
     end
     return out
 end
-include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
-include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+
+BUILDROOT::String = ""
+
+baremodule BuildSettings
+end
+
+let i = 1
+    global BUILDROOT
+    while i <= length(Core.ARGS)
+        if Core.ARGS[i] == "--buildsettings"
+            include(BuildSettings, ARGS[i+1])
+            i += 1
+        else
+            BUILDROOT = Core.ARGS[i]
+        end
+        i += 1
+    end
+end
+
+include(strcat(BUILDROOT, "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
+include(strcat(BUILDROOT, "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+
 # Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
 # a slightly more verbose fashion than usual, because we're running so early.
 const DL_LOAD_PATH = String[]
@@ -400,10 +424,12 @@ include("weakkeydict.jl")
 
 # ScopedValues
 include("scopedvalues.jl")
-using .ScopedValues
+
+# metaprogramming
+include("meta.jl")
 
 # Logging
-include("logging.jl")
+include("logging/logging.jl")
 using .CoreLogging
 
 include("env.jl")
@@ -495,9 +521,6 @@ include("irrationals.jl")
 include("mathconstants.jl")
 using .MathConstants: ℯ, π, pi
 
-# metaprogramming
-include("meta.jl")
-
 # Stack frames and traces
 include("stacktraces.jl")
 using .StackTraces
@@ -543,6 +566,8 @@ if isdefined(Core, :Compiler) && is_primary_base_module
     Docs.loaddocs(Core.Compiler.CoreDocs.DOCS)
 end
 
+include("precompilation.jl")
+
 # finally, now make `include` point to the full version
 for m in methods(include)
     delete_method(m)
@@ -559,68 +584,13 @@ include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexp
 
 # External libraries vendored into Base
 Core.println("JuliaSyntax/src/JuliaSyntax.jl")
-include(@__MODULE__, string((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "JuliaSyntax/src/JuliaSyntax.jl")) # include($BUILDROOT/base/JuliaSyntax/JuliaSyntax.jl)
+include(@__MODULE__, string(BUILDROOT, "JuliaSyntax/src/JuliaSyntax.jl")) # include($BUILDROOT/base/JuliaSyntax/JuliaSyntax.jl)
 
 end_base_include = time_ns()
 
 const _sysimage_modules = PkgId[]
 in_sysimage(pkgid::PkgId) = pkgid in _sysimage_modules
 
-# Precompiles for Revise and other packages
-# TODO: move these to contrib/generate_precompile.jl
-# The problem is they don't work there
-for match = _methods(+, (Int, Int), -1, get_world_counter())
-    m = match.method
-    delete!(push!(Set{Method}(), m), m)
-    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
-
-    empty!(Set())
-    push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
-    (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-    (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
-    (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-    (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
-    (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
-    Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
-    Dict(Base => [:(1+1)])[Base]
-    Dict(:one => [1])[:one]
-    Dict("abc" => Set())["abc"]
-    pushfirst!([], sum)
-    get(Base.pkgorigins, Base.PkgId(Base), nothing)
-    sort!([1,2,3])
-    unique!([1,2,3])
-    cumsum([1,2,3])
-    append!(Int[], BitSet())
-    isempty(BitSet())
-    delete!(BitSet([1,2]), 3)
-    deleteat!(Int32[1,2,3], [1,3])
-    deleteat!(Any[1,2,3], [1,3])
-    Core.svec(1, 2) == Core.svec(3, 4)
-    any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
-
-    # Code loading uses this
-    sortperm(mtime.(readdir(".")), rev=true)
-    # JLLWrappers uses these
-    Dict{UUID,Set{String}}()[UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
-    get!(Set{String}, Dict{UUID,Set{String}}(), UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
-    eachindex(IndexLinear(), Expr[])
-    push!(Expr[], Expr(:return, false))
-    vcat(String[], String[])
-    k, v = (:hello => nothing)
-    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
-    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
-    # Preferences uses these
-    precompile(get_preferences, (UUID,))
-    precompile(record_compiletime_preference, (UUID, String))
-    get(Dict{String,Any}(), "missing", nothing)
-    delete!(Dict{String,Any}(), "missing")
-    for (k, v) in Dict{String,Any}()
-        println(k)
-    end
-
-    break   # only actually need to do this once
-end
-
 if is_primary_base_module
 
 # Profiling helper
@@ -646,6 +616,25 @@ function profile_printing_listener(cond::Base.AsyncCondition)
     nothing
 end
 
+function start_profile_listener()
+    cond = Base.AsyncCondition()
+    Base.uv_unref(cond.handle)
+    t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
+    atexit() do
+        # destroy this callback when exiting
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+        # this will prompt any ongoing or pending event to flush also
+        close(cond)
+        # error-propagation is not needed, since the errormonitor will handle printing that better
+        t === current_task() || _wait(t)
+    end
+    finalizer(cond) do c
+        # if something goes south, still make sure we aren't keeping a reference in C to this
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+    end
+    ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
+end
+
 function __init__()
     # Base library init
     global _atexit_hooks_finished = false
@@ -658,28 +647,17 @@ function __init__()
     init_active_project()
     append!(empty!(_sysimage_modules), keys(loaded_modules))
     empty!(explicit_loaded_modules)
+    empty!(loaded_precompiles) # If we load a packageimage when building the image this might not be empty
+    for (mod, key) in module_keys
+        loaded_precompiles[key => module_build_id(mod)] = mod
+    end
     if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
         MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
     end
     # Profiling helper
     @static if !Sys.iswindows()
         # triggering a profile via signals is not implemented on windows
-        cond = Base.AsyncCondition()
-        Base.uv_unref(cond.handle)
-        t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
-        atexit() do
-            # destroy this callback when exiting
-            ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
-            # this will prompt any ongoing or pending event to flush also
-            close(cond)
-            # error-propagation is not needed, since the errormonitor will handle printing that better
-            _wait(t)
-        end
-        finalizer(cond) do c
-            # if something goes south, still make sure we aren't keeping a reference in C to this
-            ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
-        end
-        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
+        start_profile_listener()
     end
     _require_world_age[] = get_world_counter()
     # Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
@@ -687,6 +665,8 @@ function __init__()
     if get_bool_env("JULIA_USE_FLISP_PARSER", false) === false
         JuliaSyntax.enable_in_core!()
     end
+
+    CoreLogging.global_logger(CoreLogging.ConsoleLogger())
     nothing
 end
 
diff --git a/base/Enums.jl b/base/Enums.jl
index 45a1b66753484..d4094945853ec 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -44,7 +44,7 @@ Base.print(io::IO, x::Enum) = print(io, _symbol(x))
 function Base.show(io::IO, x::Enum)
     sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
-        from = get(io, :module, Base.active_module())
+        from = get(io, :module, Main)
         def = parentmodule(typeof(x))
         if from === nothing || !Base.isvisible(sym, def, from)
             show(io, def)
@@ -90,7 +90,7 @@ end
 # give Enum types scalar behavior in broadcasting
 Base.broadcastable(x::Enum) = Ref(x)
 
-@noinline enum_argument_error(typename, x) = throw(ArgumentError(string("invalid value for Enum $(typename): $x")))
+@noinline enum_argument_error(typename, x) = throw(ArgumentError(LazyString("invalid value for Enum ", typename, ": ", x)))
 
 """
     @enum EnumName[::BaseType] value1[=x] value2[=y]
@@ -143,7 +143,7 @@ julia> Symbol(apple)
 """
 macro enum(T::Union{Symbol,Expr}, syms...)
     if isempty(syms)
-        throw(ArgumentError("no arguments given for Enum $T"))
+        throw(ArgumentError(LazyString("no arguments given for Enum ", T)))
     end
     basetype = Int32
     typename = T
@@ -151,10 +151,11 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         typename = T.args[1]
         basetype = Core.eval(__module__, T.args[2])
         if !isa(basetype, DataType) || !(basetype <: Integer) || !isbitstype(basetype)
-            throw(ArgumentError("invalid base type for Enum $typename, $T=::$basetype; base type must be an integer primitive type"))
+            throw(ArgumentError(
+                LazyString("invalid base type for Enum ", typename, ", ", T, "=::", basetype, "; base type must be an integer primitive type")))
         end
     elseif !isa(T, Symbol)
-        throw(ArgumentError("invalid type expression for enum $T"))
+        throw(ArgumentError(LazyString("invalid type expression for enum ", T)))
     end
     values = Vector{basetype}()
     seen = Set{Symbol}()
@@ -169,32 +170,32 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         s isa LineNumberNode && continue
         if isa(s, Symbol)
             if i == typemin(basetype) && !isempty(values)
-                throw(ArgumentError("overflow in value \"$s\" of Enum $typename"))
+                throw(ArgumentError(LazyString("overflow in value \"", s, "\" of Enum ", typename)))
             end
         elseif isa(s, Expr) &&
                (s.head === :(=) || s.head === :kw) &&
                length(s.args) == 2 && isa(s.args[1], Symbol)
             i = Core.eval(__module__, s.args[2]) # allow exprs, e.g. uint128"1"
             if !isa(i, Integer)
-                throw(ArgumentError("invalid value for Enum $typename, $s; values must be integers"))
+                throw(ArgumentError(LazyString("invalid value for Enum ", typename, ", ", s, "; values must be integers")))
             end
             i = convert(basetype, i)
             s = s.args[1]
             hasexpr = true
         else
-            throw(ArgumentError(string("invalid argument for Enum ", typename, ": ", s)))
+            throw(ArgumentError(LazyString("invalid argument for Enum ", typename, ": ", s)))
         end
         s = s::Symbol
         if !Base.isidentifier(s)
-            throw(ArgumentError("invalid name for Enum $typename; \"$s\" is not a valid identifier"))
+            throw(ArgumentError(LazyString("invalid name for Enum ", typename, "; \"", s, "\" is not a valid identifier")))
         end
         if hasexpr && haskey(namemap, i)
-            throw(ArgumentError("both $s and $(namemap[i]) have value $i in Enum $typename; values must be unique"))
+            throw(ArgumentError(LazyString("both ", s, " and ", namemap[i], " have value ", i, " in Enum ", typename, "; values must be unique")))
         end
         namemap[i] = s
         push!(values, i)
         if s in seen
-            throw(ArgumentError("name \"$s\" in Enum $typename is not unique"))
+            throw(ArgumentError(LazyString("name \"", s, "\" in Enum ", typename, " is not unique")))
         end
         push!(seen, s)
         if length(values) == 1
diff --git a/base/Makefile b/base/Makefile
index 9a6c0d0d03833..febee53a9ddfc 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -249,12 +249,12 @@ endif
 ifneq (,$(LIBGFORTRAN_VERSION))
 $(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))
 endif
-$(eval $(call symlink_system_library,CSL,libquadmath,0))
 $(eval $(call symlink_system_library,CSL,libstdc++,6))
-# We allow libssp, libatomic and libgomp to fail as they are not available on all systems
+# We allow libssp, libatomic, libgomp and libquadmath to fail as they are not available on all systems
 $(eval $(call symlink_system_library,CSL,libssp,0,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libatomic,1,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libgomp,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libquadmath,0,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,PCRE,libpcre2-8))
 $(eval $(call symlink_system_library,DSFMT,libdSFMT))
 $(eval $(call symlink_system_library,LIBBLASTRAMPOLINE,libblastrampoline))
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index a9431197932dd..754ab20660ab8 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -748,7 +748,7 @@ julia> checkindex(Bool, 1:20, 21)
 false
 ```
 """
-checkindex(::Type{Bool}, inds, i) = throw(ArgumentError("unable to check bounds for indices of type $(typeof(i))"))
+checkindex(::Type{Bool}, inds, i) = throw(ArgumentError(LazyString("unable to check bounds for indices of type ", typeof(i))))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i::Real) = (first(inds) <= i) & (i <= last(inds))
 checkindex(::Type{Bool}, inds::IdentityUnitRange, i::Real) = checkindex(Bool, inds.indices, i)
 checkindex(::Type{Bool}, inds::OneTo{T}, i::T) where {T<:BitInteger} = unsigned(i - one(i)) < unsigned(last(inds))
@@ -896,13 +896,18 @@ emptymutable(itr, ::Type{U}) where {U} = Vector{U}()
 In-place [`copy`](@ref) of `src` into `dst`, discarding any pre-existing
 elements in `dst`.
 If `dst` and `src` are of the same type, `dst == src` should hold after
-the call. If `dst` and `src` are multidimensional arrays, they must have
+the call. If `dst` and `src` are vector types, they must have equal
+offset. If `dst` and `src` are multidimensional arrays, they must have
 equal [`axes`](@ref).
 
 $(_DOCS_ALIASING_WARNING)
 
 See also [`copyto!`](@ref).
 
+!!! note
+    When operating on vector types, if `dst` and `src` are not of the
+    same length, `dst` is resized to `length(src)` prior to the `copy`.
+
 !!! compat "Julia 1.1"
     This method requires at least Julia 1.1. In Julia 1.0 this method
     is available from the `Future` standard library as `Future.copy!`.
@@ -1078,45 +1083,28 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
         if srcstyle isa IndexLinear
             # Single-index implementation
             @inbounds for i in srcinds
-                if isassigned(src, i)
-                    dest[i + Δi] = src[i]
-                else
-                    _unsetindex!(dest, i + Δi)
-                end
+                dest[i + Δi] = src[i]
             end
         else
             # Dual-index implementation
             i = idf - 1
-            @inbounds for a in eachindex(src)
-                i += 1
-                if isassigned(src, a)
-                    dest[i] = src[a]
-                else
-                    _unsetindex!(dest, i)
-                end
+            @inbounds for a in src
+                dest[i+=1] = a
             end
         end
     else
         iterdest, itersrc = eachindex(dest), eachindex(src)
         if iterdest == itersrc
             # Shared-iterator implementation
-            @inbounds for I in iterdest
-                if isassigned(src, I)
-                    dest[I] = src[I]
-                else
-                    _unsetindex!(dest, I)
-                end
+            for I in iterdest
+                @inbounds dest[I] = src[I]
             end
         else
             # Dual-iterator implementation
             ret = iterate(iterdest)
-            @inbounds for a in itersrc
+            @inbounds for a in src
                 idx, state = ret::NTuple{2,Any}
-                if isassigned(src, a)
-                    dest[idx] = src[a]
-                else
-                    _unsetindex!(dest, idx)
-                end
+                dest[idx] = a
                 ret = iterate(iterdest, state)
             end
         end
@@ -1145,11 +1133,7 @@ function copyto!(dest::AbstractArray, dstart::Integer,
     (checkbounds(Bool, srcinds, sstart)  && checkbounds(Bool, srcinds, sstart+n-1))  || throw(BoundsError(src,  sstart:sstart+n-1))
     src′ = unalias(dest, src)
     @inbounds for i = 0:n-1
-        if isassigned(src′, sstart+i)
-            dest[dstart+i] = src′[sstart+i]
-        else
-            _unsetindex!(dest, dstart+i)
-        end
+        dest[dstart+i] = src′[sstart+i]
     end
     return dest
 end
@@ -1160,7 +1144,7 @@ function copy(a::AbstractArray)
 end
 
 function copyto!(B::AbstractVecOrMat{R}, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
-                 A::AbstractVecOrMat{S}, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int}) where {R,S}
+               A::AbstractVecOrMat{S}, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int}) where {R,S}
     if length(ir_dest) != length(ir_src)
         throw(ArgumentError(LazyString("source and destination must have same size (got ",
             length(ir_src)," and ",length(ir_dest),")")))
@@ -1218,8 +1202,26 @@ end
 copymutable(itr) = collect(itr)
 
 zero(x::AbstractArray{T}) where {T<:Number} = fill!(similar(x, typeof(zero(T))), zero(T))
+zero(x::AbstractArray{S}) where {S<:Union{Missing, Number}} = fill!(similar(x, typeof(zero(S))), zero(S))
 zero(x::AbstractArray) = map(zero, x)
 
+function _one(unit::T, mat::AbstractMatrix) where {T}
+    (rows, cols) = axes(mat)
+    (length(rows) == length(cols)) ||
+      throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
+    zer = zero(unit)::T
+    require_one_based_indexing(mat)
+    I = similar(mat, T)
+    fill!(I, zer)
+    for i ∈ rows
+        I[i, i] = unit
+    end
+    I
+end
+
+one(x::AbstractMatrix{T}) where {T} = _one(one(T), x)
+oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
+
 ## iteration support for arrays by iterating over `eachindex` in the array ##
 # Allows fast iteration by default for both IndexLinear and IndexCartesian arrays
 
@@ -1262,6 +1264,10 @@ function _memory_offset(x::AbstractArray, I::Vararg{Any,N}) where {N}
     return sum(map((i, s, o)->s*(i-o), J, strides(x), Tuple(first(CartesianIndices(x)))))*elsize(x)
 end
 
+## Special constprop heuristics for getindex/setindex
+typename(typeof(function getindex end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+typename(typeof(function setindex! end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+
 ## Approach:
 # We only define one fallback method on getindex for all argument types.
 # That dispatches to an (inlined) internal _getindex function, where the goal is
@@ -1472,20 +1478,7 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe
     r
 end
 
-function _unsetindex!(A::AbstractArray, i::Integer...)
-    @_propagate_inbounds_meta
-    _unsetindex!(A, map(to_index, i)...)
-end
-
-function _unsetindex!(A::AbstractArray{T}, i::Int...) where T
-    # this provides a fallback method which is a no-op if the element is already unassigned
-    # such that copying into an uninitialized object generally always will work,
-    # even if the specific custom array type has not implemented `_unsetindex!`
-    @inline
-    @boundscheck checkbounds(A, i...)
-    allocatedinline(T) || @inbounds(!isassigned(A, i...)) || throw(MethodError(_unsetindex!, (A, i...)))
-    return A
-end
+_unsetindex!(A::AbstractArray, i::Integer) = _unsetindex!(A, to_index(i))
 
 """
     parent(A)
@@ -1552,12 +1545,14 @@ much more common case where aliasing does not occur. By default,
 unaliascopy(A::Array) = copy(A)
 unaliascopy(A::AbstractArray)::typeof(A) = (@noinline; _unaliascopy(A, copy(A)))
 _unaliascopy(A::T, C::T) where {T} = C
-_unaliascopy(A, C) = throw(ArgumentError("""
-    an array of type `$(typename(typeof(A)).wrapper)` shares memory with another argument
-    and must make a preventative copy of itself in order to maintain consistent semantics,
-    but `copy(::$(typeof(A)))` returns a new array of type `$(typeof(C))`.
-    To fix, implement:
-        `Base.unaliascopy(A::$(typename(typeof(A)).wrapper))::typeof(A)`"""))
+function _unaliascopy(A, C)
+    Aw = typename(typeof(A)).wrapper
+    throw(ArgumentError(LazyString("an array of type `", Aw, "` shares memory with another argument ",
+    "and must make a preventative copy of itself in order to maintain consistent semantics, ",
+    "but `copy(::", typeof(A), ")` returns a new array of type `", typeof(C), "`.\n",
+    """To fix, implement:
+        `Base.unaliascopy(A::""", Aw, ")::typeof(A)`")))
+end
 unaliascopy(A) = A
 
 """
@@ -1649,13 +1644,15 @@ eltypeof(x::AbstractArray) = eltype(x)
 
 promote_eltypeof() = error()
 promote_eltypeof(v1) = eltypeof(v1)
-promote_eltypeof(v1, vs...) = promote_type(eltypeof(v1), promote_eltypeof(vs...))
+promote_eltypeof(v1, v2) = promote_type(eltypeof(v1), eltypeof(v2))
+promote_eltypeof(v1, v2, vs...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, eltypeof(y)), promote_eltypeof(v1, v2), vs...))
 promote_eltypeof(v1::T, vs::T...) where {T} = eltypeof(v1)
 promote_eltypeof(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
 promote_eltype() = error()
 promote_eltype(v1) = eltype(v1)
-promote_eltype(v1, vs...) = promote_type(eltype(v1), promote_eltype(vs...))
+promote_eltype(v1, v2) = promote_type(eltype(v1), eltype(v2))
+promote_eltype(v1, v2, vs...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, eltype(y)), promote_eltype(v1, v2), vs...))
 promote_eltype(v1::T, vs::T...) where {T} = eltype(T)
 promote_eltype(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
@@ -1666,10 +1663,10 @@ typed_vcat(::Type{T}) where {T} = Vector{T}()
 typed_hcat(::Type{T}) where {T} = Vector{T}()
 
 ## cat: special cases
-vcat(X::T...) where {T}         = T[ X[i] for i=1:length(X) ]
-vcat(X::T...) where {T<:Number} = T[ X[i] for i=1:length(X) ]
-hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=1:length(X) ]
-hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=1:length(X) ]
+vcat(X::T...) where {T}         = T[ X[i] for i=eachindex(X) ]
+vcat(X::T...) where {T<:Number} = T[ X[i] for i=eachindex(X) ]
+hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=eachindex(X) ]
+hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=eachindex(X) ]
 
 vcat(X::Number...) = hvcat_fill!(Vector{promote_typeof(X...)}(undef, length(X)), X)
 hcat(X::Number...) = hvcat_fill!(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
@@ -1924,7 +1921,7 @@ julia> vcat(range(1, 2, length=3))  # collects lazy ranges
  2.0
 
 julia> two = ([10, 20, 30]', Float64[4 5 6; 7 8 9])  # row vector and a matrix
-([10 20 30], [4.0 5.0 6.0; 7.0 8.0 9.0])
+(adjoint([10, 20, 30]), [4.0 5.0 6.0; 7.0 8.0 9.0])
 
 julia> vcat(two...)
 3×3 Matrix{Float64}:
@@ -3014,7 +3011,7 @@ end
 @inline function _stack_size_check(x, ax1::Tuple)
     if _iterator_axes(x) != ax1
         uax1 = map(UnitRange, ax1)
-        uaxN = map(UnitRange, axes(x))
+        uaxN = map(UnitRange, _iterator_axes(x))
         throw(DimensionMismatch(
             LazyString("stack expects uniform slices, got axes(x) == ", uaxN, " while first had ", uax1)))
     end
@@ -3294,7 +3291,7 @@ one *without* a colon in the slice. This is `view(A,:,i,:)`, whereas
 `mapslices(f, A; dims=(1,3))` uses `A[:,i,:]`. The function `f` may mutate
 values in the slice without affecting `A`.
 """
-function mapslices(f, A::AbstractArray; dims)
+@constprop :aggressive function mapslices(f, A::AbstractArray; dims)
     isempty(dims) && return map(f, A)
 
     for d in dims
@@ -3411,6 +3408,8 @@ mapany(f, itr) = Any[f(x) for x in itr]
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
 apply `f` elementwise, and stop when any of them is exhausted.
 
+The element type of the result is determined in the same manner as in [`collect`](@ref).
+
 See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
 # Examples
@@ -3526,6 +3525,36 @@ julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is
 """
 map(f, it, iters...) = collect(Generator(f, it, iters...))
 
+# Generic versions of push! for AbstractVector
+# These are specialized further for Vector for faster resizing and setindexing
+function push!(a::AbstractVector{T}, item) where T
+    # convert first so we don't grow the array if the assignment won't work
+    itemT = item isa T ? item : convert(T, item)::T
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = itemT
+    return a
+end
+
+# specialize and optimize the single argument case
+function push!(a::AbstractVector{Any}, @nospecialize x)
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = x
+    return a
+end
+function push!(a::AbstractVector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
+    na = length(a)
+    nx = length(x)
+    resize!(a, na + nx)
+    e = lastindex(a) - nx
+    for i = 1:nx
+        a[e+i] = x[i]
+    end
+    return a
+end
+
 # multi-item push!, pushfirst! (built on top of type-specific 1-item version)
 # (note: must not cause a dispatch loop when 1-item case is not defined)
 push!(A, a, b) = push!(push!(A, a), b)
@@ -3533,6 +3562,9 @@ push!(A, a, b, c...) = push!(push!(A, a, b), c...)
 pushfirst!(A, a, b) = pushfirst!(pushfirst!(A, b), a)
 pushfirst!(A, a, b, c...) = pushfirst!(pushfirst!(A, c...), a, b)
 
+# sizehint! does not nothing by default
+sizehint!(a::AbstractVector, _) = a
+
 ## hashing AbstractArray ##
 
 const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index a9efc2b87bee4..0f028a0f66729 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -93,6 +93,70 @@ function _dropdims(A::AbstractArray, dims::Dims)
 end
 _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
+
+"""
+    insertdims(A; dims)
+
+Inverse of [`dropdims`](@ref); return an array with new singleton dimensions
+at every dimension in `dims`.
+
+Repeated dimensions are forbidden and the largest entry in `dims` must be
+less than or equal than `ndims(A) + length(dims)`.
+
+The result shares the same underlying data as `A`, such that the
+result is mutable if and only if `A` is mutable, and setting elements of one
+alters the values of the other.
+
+See also: [`dropdims`](@ref), [`reshape`](@ref), [`vec`](@ref).
+# Examples
+```jldoctest
+julia> x = [1 2 3; 4 5 6]
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=3)
+2×3×1 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=(1,2,5)) == reshape(x, 1, 1, 2, 3, 1)
+true
+
+julia> dropdims(insertdims(x, dims=(1,2,5)), dims=(1,2,5))
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+```
+
+!!! compat "Julia 1.12"
+    Requires Julia 1.12 or later.
+"""
+insertdims(A; dims) = _insertdims(A, dims)
+function _insertdims(A::AbstractArray{T, N}, dims::NTuple{M, Int}) where {T, N, M}
+    for i in eachindex(dims)
+        1 ≤ dims[i] || throw(ArgumentError("the smallest entry in dims must be ≥ 1."))
+        dims[i] ≤ N+M || throw(ArgumentError("the largest entry in dims must be not larger than the dimension of the array and the length of dims added"))
+        for j = 1:i-1
+            dims[j] == dims[i] && throw(ArgumentError("inserted dims must be unique"))
+        end
+    end
+
+    # acc is a tuple, where the first entry is the final shape
+    # the second entry off acc is a counter for the axes of A
+    inds= Base._foldoneto((acc, i) ->
+                            i ∈ dims
+                                ? ((acc[1]..., Base.OneTo(1)), acc[2])
+                                : ((acc[1]..., axes(A, acc[2])), acc[2] + 1),
+                            ((), 1), Val(N+M))
+    new_shape = inds[1]
+    return reshape(A, new_shape)
+end
+_insertdims(A::AbstractArray, dim::Integer) = _insertdims(A, (Int(dim),))
+
+
+
 ## Unary operators ##
 
 """
diff --git a/base/accumulate.jl b/base/accumulate.jl
index a2d8a1d368d86..2748a4da481fa 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -33,7 +33,7 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
 end
 
 function accumulate_pairwise(op, v::AbstractVector{T}) where T
-    out = similar(v, promote_op(op, T, T))
+    out = similar(v, _accumulate_promote_op(op, v))
     return accumulate_pairwise!(op, out, v)
 end
 
@@ -111,8 +111,8 @@ julia> cumsum(a, dims=2)
     widening happens and integer overflow results in `Int8[100, -128]`.
 """
 function cumsum(A::AbstractArray{T}; dims::Integer) where T
-    out = similar(A, promote_op(add_sum, T, T))
-    cumsum!(out, A, dims=dims)
+    out = similar(A, _accumulate_promote_op(add_sum, A))
+    return cumsum!(out, A, dims=dims)
 end
 
 """
@@ -280,14 +280,13 @@ function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
         # This branch takes care of the cases not handled by `_accumulate!`.
         return collect(Iterators.accumulate(op, A; kw...))
     end
+
     nt = values(kw)
-    if isempty(kw)
-        out = similar(A, promote_op(op, eltype(A), eltype(A)))
-    elseif keys(nt) === (:init,)
-        out = similar(A, promote_op(op, typeof(nt.init), eltype(A)))
-    else
+    if !(isempty(kw) || keys(nt) === (:init,))
         throw(ArgumentError("accumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
     end
+
+    out = similar(A, _accumulate_promote_op(op, A; kw...))
     accumulate!(op, out, A; dims=dims, kw...)
 end
 
@@ -442,3 +441,42 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
     end
     return B
 end
+
+# Internal function used to identify the widest possible eltype required for accumulate results
+function _accumulate_promote_op(op, v; init=nothing)
+    # Nested mock functions used to infer the widest necessary eltype
+    # NOTE: We are just passing this to promote_op for inference and should never be run.
+
+    # Initialization function used to identify initial type of `r`
+    # NOTE: reduce_first may have a different return type than calling `op`
+    function f(op, v, init)
+        val = first(something(iterate(v)))
+        return isnothing(init) ? Base.reduce_first(op, val) : op(init, val)
+    end
+
+    # Infer iteration type independent of the initialization type
+    # If `op` fails then this will return `Union{}` as `k` will be undefined.
+    # Returning `Union{}` is desirable as it won't break the `promote_type` call in the
+    # outer scope below
+    function g(op, v, r)
+        local k
+        for val in v
+            k = op(r, val)
+        end
+        return k
+    end
+
+    # Finally loop again with the two types promoted together
+    # If the `op` fails and reduce_first was used then then this will still just
+    # return the initial type, allowing the `op` to error during execution.
+    function h(op, v, r)
+        for val in v
+            r = op(r, val)
+        end
+        return r
+    end
+
+    R = Base.promote_op(f, typeof(op), typeof(v), typeof(init))
+    K = Base.promote_op(g, typeof(op), typeof(v), R)
+    return Base.promote_op(h, typeof(op), typeof(v), Base.promote_type(R, K))
+end
diff --git a/base/array.jl b/base/array.jl
index 4fcf29af69b4b..648fedd5036e1 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -216,16 +216,11 @@ isbitsunion(u::Type) = u isa Union && allocatedinline(u)
 function _unsetindex!(A::Array, i::Int)
     @inline
     @boundscheck checkbounds(A, i)
-    @inbounds _unsetindex!(GenericMemoryRef(A.ref, i))
-    return A
-end
-function _unsetindex!(A::Array, i::Int...)
-    @inline
-    @boundscheck checkbounds(A, i...)
-    @inbounds _unsetindex!(A, _to_linear_index(A, i...))
+    @inbounds _unsetindex!(memoryref(A.ref, i))
     return A
 end
 
+
 # TODO: deprecate this (aligned_sizeof and/or elsize and/or sizeof(Some{T}) are more correct)
 elsize(::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
 function elsize(::Type{Ptr{T}}) where T
@@ -244,14 +239,14 @@ function isassigned(a::Array, i::Int...)
     @_noub_if_noinbounds_meta
     @boundscheck checkbounds(Bool, a, i...) || return false
     ii = _sub2ind(size(a), i...)
-    return @inbounds isassigned(memoryref(a.ref, ii, false))
+    return @inbounds isassigned(memoryrefnew(a.ref, ii, false))
 end
 
 function isassigned(a::Vector, i::Int) # slight compiler simplification for the most common case
     @inline
     @_noub_if_noinbounds_meta
     @boundscheck checkbounds(Bool, a, i) || return false
-    return @inbounds isassigned(memoryref(a.ref, i, false))
+    return @inbounds isassigned(memoryrefnew(a.ref, i, false))
 end
 
 
@@ -275,26 +270,26 @@ function unsafe_copyto!(dest::Ptr{T}, src::Ptr{T}, n) where T
 end
 
 """
-    unsafe_copyto!(dest::Array, do, src::Array, so, N)
+    unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
 
-Copy `N` elements from a source array to a destination, starting at the linear index `so` in the
-source and `do` in the destination (1-indexed).
+Copy `n` elements from a source array to a destination, starting at the linear index `soffs` in the
+source and `doffs` in the destination (1-indexed).
 
 The `unsafe` prefix on this function indicates that no validation is performed to ensure
-that N is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
+that n is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
 the same manner as C.
 """
 function unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
     n == 0 && return dest
-    unsafe_copyto!(GenericMemoryRef(dest.ref, doffs), GenericMemoryRef(src.ref, soffs), n)
+    unsafe_copyto!(memoryref(dest.ref, doffs), memoryref(src.ref, soffs), n)
     return dest
 end
 
 """
-    copyto!(dest, do, src, so, N)
+    copyto!(dest, doffs, src, soffs, n)
 
-Copy `N` elements from collection `src` starting at the linear index `so`, to array `dest` starting at
-the index `do`. Return `dest`.
+Copy `n` elements from collection `src` starting at the linear index `soffs`, to array `dest` starting at
+the index `doffs`. Return `dest`.
 """
 copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
 copyto!(dest::Array, doffs::Integer, src::Memory, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
@@ -308,8 +303,8 @@ function _copyto_impl!(dest::Union{Array,Memory}, doffs::Integer, src::Union{Arr
     n > 0 || _throw_argerror("Number of elements to copy must be non-negative.")
     @boundscheck checkbounds(dest, doffs:doffs+n-1)
     @boundscheck checkbounds(src, soffs:soffs+n-1)
-    @inbounds let dest = GenericMemoryRef(dest isa Array ? getfield(dest, :ref) : dest, doffs)
-                  src = GenericMemoryRef(src isa Array ? getfield(src, :ref) : src, soffs)
+    @inbounds let dest = memoryref(dest isa Array ? getfield(dest, :ref) : dest, doffs),
+                  src = memoryref(src isa Array ? getfield(src, :ref) : src, soffs)
         unsafe_copyto!(dest, src, n)
     end
     return dest
@@ -353,7 +348,7 @@ copy
     @_nothrow_meta
     ref = a.ref
     newmem = ccall(:jl_genericmemory_copy_slice, Ref{Memory{T}}, (Any, Ptr{Cvoid}, Int), ref.mem, ref.ptr_or_offset, length(a))
-    return $(Expr(:new, :(typeof(a)), :(Core.memoryref(newmem)), :(a.size)))
+    return $(Expr(:new, :(typeof(a)), :(memoryref(newmem)), :(a.size)))
 end
 
 ## Constructors ##
@@ -529,6 +524,7 @@ function fill end
 fill(v, dims::DimOrInd...) = fill(v, dims)
 fill(v, dims::NTuple{N, Union{Integer, OneTo}}) where {N} = fill(v, map(to_dim, dims))
 fill(v, dims::NTuple{N, Integer}) where {N} = (a=Array{typeof(v),N}(undef, dims); fill!(a, v); a)
+fill(v, dims::NTuple{N, DimOrInd}) where {N} = (a=similar(Array{typeof(v),N}, dims); fill!(a, v); a)
 fill(v, dims::Tuple{}) = (a=Array{typeof(v),0}(undef, dims); fill!(a, v); a)
 
 """
@@ -589,24 +585,14 @@ for (fname, felt) in ((:zeros, :zero), (:ones, :one))
             fill!(a, $felt(T))
             return a
         end
+        function $fname(::Type{T}, dims::NTuple{N, DimOrInd}) where {T,N}
+            a = similar(Array{T,N}, dims)
+            fill!(a, $felt(T))
+            return a
+        end
     end
 end
 
-function _one(unit::T, x::AbstractMatrix) where T
-    require_one_based_indexing(x)
-    m,n = size(x)
-    m==n || throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
-    # Matrix{T}(I, m, m)
-    I = zeros(T, m, m)
-    for i in 1:m
-        I[i,i] = unit
-    end
-    I
-end
-
-one(x::AbstractMatrix{T}) where {T} = _one(one(T), x)
-oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
-
 ## Conversions ##
 
 convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a)::T
@@ -674,7 +660,7 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it
 
 
 """
-    collect(collection)
+    collect(iterator)
 
 Return an `Array` of all items in a collection or iterator. For dictionaries, returns
 a `Vector` of `key=>value` [Pair](@ref Pair)s. If the argument is array-like or is an iterator
@@ -685,6 +671,9 @@ Used by [comprehensions](@ref man-comprehensions) to turn a [generator expressio
 into an `Array`. Thus, *on generators*, the square-brackets notation may be used instead of calling `collect`,
 see second example.
 
+The element type of the returned array is based on the types of the values collected. However, if the
+iterator is empty then the element type of the returned (empty) array is determined by type inference.
+
 # Examples
 
 Collect items from a `UnitRange{Int64}` collection:
@@ -706,6 +695,21 @@ julia> collect(x^2 for x in 1:3)
  4
  9
 ```
+
+Collecting an empty iterator where the result type depends on type inference:
+
+```jldoctest
+julia> [rand(Bool) ? 1 : missing for _ in []]
+Union{Missing, Int64}[]
+```
+
+When the iterator is non-empty, the result type depends only on values:
+
+```julia-repl
+julia> [rand(Bool) ? 1 : missing for _ in [""]]
+1-element Vector{Int64}:
+ 1
+```
 """
 collect(itr) = _collect(1:1 #= Array =#, itr, IteratorEltype(itr), IteratorSize(itr))
 
@@ -978,21 +982,21 @@ function setindex! end
 function setindex!(A::Array{T}, x, i::Int) where {T}
     @_noub_if_noinbounds_meta
     @boundscheck (i - 1)%UInt < length(A)%UInt || throw_boundserror(A, (i,))
-    memoryrefset!(memoryref(A.ref, i, false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
+    memoryrefset!(memoryrefnew(A.ref, i, false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
     return A
 end
 function setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T}
     @inline
     @_noub_if_noinbounds_meta
     @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
-    memoryrefset!(memoryref(A.ref, _to_linear_index(A, i1, i2, I...), false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
+    memoryrefset!(memoryrefnew(A.ref, _to_linear_index(A, i1, i2, I...), false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
     return A
 end
 
 __safe_setindex!(A::Vector{Any}, @nospecialize(x), i::Int) = (@inline; @_nothrow_noub_meta;
-    memoryrefset!(memoryref(A.ref, i, false), x, :not_atomic, false); return A)
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false); return A)
 __safe_setindex!(A::Vector{T}, x::T, i::Int) where {T} = (@inline; @_nothrow_noub_meta;
-    memoryrefset!(memoryref(A.ref, i, false), x, :not_atomic, false); return A)
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false); return A)
 __safe_setindex!(A::Vector{T}, x,    i::Int) where {T} = (@inline;
     __safe_setindex!(A, convert(T, x)::T, i))
 
@@ -1064,10 +1068,13 @@ function _growbeg!(a::Vector, delta::Integer)
     setfield!(a, :size, (newlen,))
     # if offset is far enough advanced to fit data in existing memory without copying
     if delta <= offset - 1
-        setfield!(a, :ref, @inbounds GenericMemoryRef(ref, 1 - delta))
+        setfield!(a, :ref, @inbounds memoryref(ref, 1 - delta))
     else
         @noinline (function()
         memlen = length(mem)
+        if offset + len - 1 > memlen || offset < 1
+            throw(ConcurrencyViolationError("Vector has invalid state. Don't modify internal fields incorrectly, or resize without correct locks"))
+        end
         # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
         # the +1 is because I didn't want to have an off by 1 error.
         newmemlen = max(overallocation(memlen), len + 2 * delta + 1)
@@ -1083,7 +1090,10 @@ function _growbeg!(a::Vector, delta::Integer)
             newmem = array_new_memory(mem, newmemlen)
         end
         unsafe_copyto!(newmem, newoffset + delta, mem, offset, len)
-        setfield!(a, :ref, @inbounds GenericMemoryRef(newmem, newoffset))
+        if ref !== a.ref
+            @noinline throw(ConcurrencyViolationError("Vector can not be resized concurrently"))
+        end
+        setfield!(a, :ref, @inbounds memoryref(newmem, newoffset))
         end)()
     end
     return
@@ -1103,6 +1113,10 @@ function _growend!(a::Vector, delta::Integer)
     newmemlen = offset + newlen - 1
     if memlen < newmemlen
         @noinline (function()
+        if offset + len - 1 > memlen || offset < 1
+            throw(ConcurrencyViolationError("Vector has invalid state. Don't modify internal fields incorrectly, or resize without correct locks"))
+        end
+
         if offset - 1 > div(5 * newlen, 4)
             # If the offset is far enough that we can copy without resizing
             # while maintaining proportional spacing on both ends of the array
@@ -1118,8 +1132,11 @@ function _growend!(a::Vector, delta::Integer)
             newmem = array_new_memory(mem, newmemlen2)
             newoffset = offset
         end
-        newref = @inbounds GenericMemoryRef(newmem, newoffset)
+        newref = @inbounds memoryref(newmem, newoffset)
         unsafe_copyto!(newref, ref, len)
+        if ref !== a.ref
+            @noinline throw(ConcurrencyViolationError("Vector can not be resized concurrently"))
+        end
         setfield!(a, :ref, newref)
         end)()
     end
@@ -1147,7 +1164,7 @@ function _growat!(a::Vector, i::Integer, delta::Integer)
     prefer_start = i <= div(len, 2)
     # if offset is far enough advanced to fit data in beginning of the memory
     if prefer_start && delta <= offset - 1
-        newref = @inbounds GenericMemoryRef(mem, offset - delta)
+        newref = @inbounds memoryref(mem, offset - delta)
         unsafe_copyto!(newref, ref, i)
         setfield!(a, :ref, newref)
         for j in i:i+delta-1
@@ -1164,7 +1181,7 @@ function _growat!(a::Vector, i::Integer, delta::Integer)
         newmemlen = max(overallocation(memlen), len+2*delta+1)
         newoffset = (newmemlen - newlen) ÷ 2 + 1
         newmem = array_new_memory(mem, newmemlen)
-        newref = @inbounds GenericMemoryRef(newmem, newoffset)
+        newref = @inbounds memoryref(newmem, newoffset)
         unsafe_copyto!(newref, ref, i-1)
         unsafe_copyto!(newmem, newoffset + delta + i - 1, mem, offset + i - 1, len - i + 1)
         setfield!(a, :ref, newref)
@@ -1181,7 +1198,7 @@ function _deletebeg!(a::Vector, delta::Integer)
     end
     newlen = len - delta
     if newlen != 0 # if newlen==0 we could accidentally index past the memory
-        newref = @inbounds GenericMemoryRef(a.ref, delta + 1)
+        newref = @inbounds memoryref(a.ref, delta + 1)
         setfield!(a, :ref, newref)
     end
     setfield!(a, :size, (newlen,))
@@ -1317,8 +1334,7 @@ end
 
 append!(a::AbstractVector, iter) = _append!(a, IteratorSize(iter), iter)
 push!(a::AbstractVector, iter...) = append!(a, iter)
-
-append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
+append!(a::AbstractVector, iter...) = (for v in iter; append!(a, v); end; return a)
 
 function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
     n = Int(length(iter))::Int
@@ -1377,10 +1393,9 @@ function prepend!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
     return a
 end
 
-prepend!(a::Vector, iter) = _prepend!(a, IteratorSize(iter), iter)
-pushfirst!(a::Vector, iter...) = prepend!(a, iter)
-
-prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, init=a)
+prepend!(a::AbstractVector, iter) = _prepend!(a, IteratorSize(iter), iter)
+pushfirst!(a::AbstractVector, iter...) = prepend!(a, iter)
+prepend!(a::AbstractVector, iter...) = (for v = reverse(iter); prepend!(a, v); end; return a)
 
 function _prepend!(a::Vector, ::Union{HasLength,HasShape}, iter)
     @_terminates_locally_meta
@@ -1498,16 +1513,16 @@ function sizehint!(a::Vector, sz::Integer; first::Bool=false, shrink::Bool=true)
         end
         newmem = array_new_memory(mem, sz)
         if first
-            newref = GenericMemoryRef(newmem, inc + 1)
+            newref = memoryref(newmem, inc + 1)
         else
-            newref = GenericMemoryRef(newmem)
+            newref = memoryref(newmem)
         end
         unsafe_copyto!(newref, ref, len)
         setfield!(a, :ref, newref)
     elseif first
         _growbeg!(a, inc)
         newref = getfield(a, :ref)
-        newref = GenericMemoryRef(newref, inc + 1)
+        newref = memoryref(newref, inc + 1)
         setfield!(a, :size, (len,)) # undo the size change from _growbeg!
         setfield!(a, :ref, newref) # undo the offset change from _growbeg!
     else # last
@@ -2622,7 +2637,7 @@ Dict{Symbol, Int64} with 3 entries:
   :B => -1
   :C => 0
 
-julia> findall(x -> x >= 0, d)
+julia> findall(≥(0), d)
 2-element Vector{Symbol}:
  :A
  :C
@@ -3084,13 +3099,15 @@ function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N}
     @boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
     if N != 1 && !(ref === GenericMemoryRef(mem) && len === mem_len)
         mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
-        ref = MemoryRef(mem)
+        ref = memoryref(mem)
     end
     return ref
 end
 
-@noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch(
-    "Attempted to wrap a MemoryRef of length $len with an Array of size dims=$dims, which is invalid because prod(dims) = $proddims > $len, so that the array would have more elements than the underlying memory can store."))
+@noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch(LazyString(
+    "Attempted to wrap a MemoryRef of length ", len, " with an Array of size dims=", dims,
+    " which is invalid because prod(dims) = ", proddims, " > ", len,
+    " so that the array would have more elements than the underlying memory can store.")))
 
 @eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N}
     dims = convert(Dims, dims)
@@ -3100,7 +3117,7 @@ end
 
 @eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N}
     dims = convert(Dims, dims)
-    ref = _wrap(MemoryRef(m), dims)
+    ref = _wrap(memoryref(m), dims)
     $(Expr(:new, :(Array{T, N}), :ref, :dims))
 end
 @eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T}
@@ -3110,11 +3127,11 @@ end
 end
 @eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T}
     dims = (Int(l),)
-    ref = _wrap(MemoryRef(m), (l,))
+    ref = _wrap(memoryref(m), (l,))
     $(Expr(:new, :(Array{T, 1}), :ref, :dims))
 end
 @eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}) where {T}
-    ref = MemoryRef(m)
+    ref = memoryref(m)
     dims = (length(m),)
     $(Expr(:new, :(Array{T, 1}), :ref, :dims))
 end
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index 3c782be10e194..c6cb3d3fa73bb 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -127,8 +127,11 @@ function _trywait(t::Union{Timer, AsyncCondition})
         t isa Timer || Core.Intrinsics.atomic_fence(:acquire_release)
     else
         if !isopen(t)
-            close(t) # wait for the close to complete
-            return false
+            set = t.set
+            if !set
+                close(t) # wait for the close to complete
+                return false
+            end
         end
         iolock_begin()
         set = t.set
@@ -151,7 +154,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
         end
         iolock_end()
     end
-    @atomic :monotonic t.set = false
+    @atomic :monotonic t.set = false # if there are multiple waiters, an unspecified number may short-circuit past here
     return set
 end
 
@@ -161,14 +164,14 @@ function wait(t::Union{Timer, AsyncCondition})
 end
 
 
-isopen(t::Union{Timer, AsyncCondition}) = t.isopen && t.handle != C_NULL
+isopen(t::Union{Timer, AsyncCondition}) = @atomic :acquire t.isopen
 
 function close(t::Union{Timer, AsyncCondition})
-    t.handle == C_NULL && return # short-circuit path
+    t.handle == C_NULL && !t.isopen && return # short-circuit path, :monotonic
     iolock_begin()
     if t.handle != C_NULL
         if t.isopen
-            @atomic :monotonic t.isopen = false
+            @atomic :release t.isopen = false
             ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
         end
         # implement _trywait here without the auto-reset function, just waiting for the final close signal
@@ -186,6 +189,8 @@ function close(t::Union{Timer, AsyncCondition})
             unlock(t.cond)
             unpreserve_handle(t)
         end
+    elseif t.isopen
+        @atomic :release t.isopen = false
     end
     iolock_end()
     nothing
@@ -198,8 +203,8 @@ function uvfinalize(t::Union{Timer, AsyncCondition})
         if t.handle != C_NULL
             disassociate_julia_struct(t.handle) # not going to call the usual close hooks anymore
             if t.isopen
-                @atomic :monotonic t.isopen = false
-                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
+                @atomic :release t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle) # this will call Libc.free
             end
             @atomic :monotonic t.handle = C_NULL
             notify(t.cond, false)
@@ -214,8 +219,10 @@ end
 function _uv_hook_close(t::Union{Timer, AsyncCondition})
     lock(t.cond)
     try
-        @atomic :monotonic t.isopen = false
-        Libc.free(@atomicswap :monotonic t.handle = C_NULL)
+        handle = t.handle
+        @atomic :release t.isopen = false
+        @atomic :monotonic t.handle = C_NULL
+        Libc.free(handle)
         notify(t.cond, false)
     finally
         unlock(t.cond)
@@ -243,7 +250,7 @@ function uv_timercb(handle::Ptr{Cvoid})
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
             if t.isopen
-                @atomic :monotonic t.isopen = false
+                @atomic :release t.isopen = false
                 ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
             end
         end
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index c81afbb7e9115..02e515d2e0c6c 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -9,6 +9,8 @@ Uses multiple concurrent tasks to map `f` over a collection (or multiple
 equal length collections). For multiple collection arguments, `f` is
 applied elementwise.
 
+The output is guaranteed to be the same order as the elements of the collection(s) `c`.
+
 `ntasks` specifies the number of tasks to run concurrently.
 Depending on the length of the collections, if `ntasks` is unspecified,
 up to 100 tasks will be used for concurrent mapping.
diff --git a/base/atomics.jl b/base/atomics.jl
index dde981b8c0e6a..e6f3a5654cbf7 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -364,13 +364,13 @@ for typ in atomictypes
     irt = "$ilt, $ilt*"
     @eval getindex(x::Atomic{$typ}) =
         GC.@preserve x llvmcall($"""
-                 %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                 %ptr = bitcast i8* %0 to $lt*
                  %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
         GC.@preserve x llvmcall($"""
-                 %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                 %ptr = bitcast i8* %0 to $lt*
                  store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
                  """, Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
@@ -379,7 +379,7 @@ for typ in atomictypes
     if typ <: Integer
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
             GC.@preserve x llvmcall($"""
-                     %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                     %ptr = bitcast i8* %0 to $lt*
                      %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
                      %rv = extractvalue { $lt, i1 } %rs, 0
                      ret $lt %rv
@@ -388,7 +388,7 @@ for typ in atomictypes
     else
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
             GC.@preserve x llvmcall($"""
-                     %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
+                     %iptr = bitcast i8* %0 to $ilt*
                      %icmp = bitcast $lt %1 to $ilt
                      %inew = bitcast $lt %2 to $ilt
                      %irs = cmpxchg $ilt* %iptr, $ilt %icmp, $ilt %inew acq_rel acquire
@@ -411,7 +411,7 @@ for typ in atomictypes
         if typ <: Integer
             @eval $fn(x::Atomic{$typ}, v::$typ) =
                 GC.@preserve x llvmcall($"""
-                         %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                         %ptr = bitcast i8* %0 to $lt*
                          %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
                          ret $lt %rv
                          """, $typ, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
@@ -419,7 +419,7 @@ for typ in atomictypes
             rmwop === :xchg || continue
             @eval $fn(x::Atomic{$typ}, v::$typ) =
                 GC.@preserve x llvmcall($"""
-                         %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
+                         %iptr = bitcast i8* %0 to $ilt*
                          %ival = bitcast $lt %1 to $ilt
                          %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
                          %rv = bitcast $ilt %irv to $lt
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index 9e45275f02636..c8a55c99a5724 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -194,11 +194,11 @@ end
 function validate_tags(tags::Dict)
     throw_invalid_key(k) = throw(ArgumentError("Key \"$(k)\" cannot have value \"$(tags[k])\""))
     # Validate `arch`
-    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le")
+    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le", "riscv64")
         throw_invalid_key("arch")
     end
     # Validate `os`
-    if tags["os"] ∉ ("linux", "macos", "freebsd", "windows")
+    if tags["os"] ∉ ("linux", "macos", "freebsd", "openbsd", "windows")
         throw_invalid_key("os")
     end
     # Validate `os`/`arch` combination
@@ -375,8 +375,10 @@ function os()
         return "windows"
     elseif Sys.isapple()
         return "macos"
-    elseif Sys.isbsd()
+    elseif Sys.isfreebsd()
         return "freebsd"
+    elseif Sys.isopenbsd()
+        return "openbsd"
     else
         return "linux"
     end
@@ -422,6 +424,7 @@ const platform_names = Dict(
     "macos" => "macOS",
     "windows" => "Windows",
     "freebsd" => "FreeBSD",
+    "openbsd" => "OpenBSD",
     nothing => "Unknown",
 )
 
@@ -556,6 +559,8 @@ function os_str(p::AbstractPlatform)
         else
             return "-unknown-freebsd"
         end
+    elseif os(p) == "openbsd"
+        return "-unknown-openbsd"
     else
         return "-unknown"
     end
@@ -581,7 +586,8 @@ Sys.isapple(p::AbstractPlatform) = os(p) == "macos"
 Sys.islinux(p::AbstractPlatform) = os(p) == "linux"
 Sys.iswindows(p::AbstractPlatform) = os(p) == "windows"
 Sys.isfreebsd(p::AbstractPlatform) = os(p) == "freebsd"
-Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "macos")
+Sys.isopenbsd(p::AbstractPlatform) = os(p) == "openbsd"
+Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "openbsd", "macos")
 Sys.isunix(p::AbstractPlatform) = Sys.isbsd(p) || Sys.islinux(p)
 
 const arch_mapping = Dict(
@@ -591,6 +597,7 @@ const arch_mapping = Dict(
     "armv7l" => "arm(v7l)?", # if we just see `arm-linux-gnueabihf`, we assume it's `armv7l`
     "armv6l" => "armv6l",
     "powerpc64le" => "p(ower)?pc64le",
+    "riscv64" => "riscv64",
 )
 # Keep this in sync with `CPUID.ISAs_by_family`
 # These are the CPUID side of the microarchitectures targeted by GCC flags in BinaryBuilder.jl
@@ -626,12 +633,16 @@ const arch_march_isa_mapping = let
         ],
         "powerpc64le" => [
             "power8" => get_set("powerpc64le", "power8"),
-        ]
+        ],
+        "riscv64" => [
+            "riscv64" => get_set("riscv64", "riscv64"),
+        ],
     )
 end
 const os_mapping = Dict(
     "macos" => "-apple-darwin[\\d\\.]*",
     "freebsd" => "-(.*-)?freebsd[\\d\\.]*",
+    "openbsd" => "-(.*-)?openbsd[\\d\\.]*",
     "windows" => "-w64-mingw32",
     "linux" => "-(.*-)?linux",
 )
@@ -745,6 +756,9 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         if os == "freebsd"
             os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa)
         end
+        if os == "openbsd"
+            os_version = extract_os_version("openbsd", r".*openbsd([\d.]+)"sa)
+        end
         tags["os_version"] = os_version
 
         return Platform(arch, os, tags; validate_strict)
@@ -802,7 +816,7 @@ function parse_dl_name_version(path::String, os::String)
         # On OSX, libraries look like `libnettle.6.3.dylib`
         dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"sa
     else
-        # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
+        # On Linux and others BSD, libraries look like `libnettle.so.6.3.0`
         dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"sa
     end
 
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 4411fc9323826..f7eeafbb62231 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -404,6 +404,7 @@ falses(dims::DimOrInd...) = falses(dims)
 falses(dims::NTuple{N, Union{Integer, OneTo}}) where {N} = falses(map(to_dim, dims))
 falses(dims::NTuple{N, Integer}) where {N} = fill!(BitArray(undef, dims), false)
 falses(dims::Tuple{}) = fill!(BitArray(undef, dims), false)
+falses(dims::NTuple{N, DimOrInd}) where {N} = fill!(similar(BitArray, dims), false)
 
 """
     trues(dims)
@@ -422,6 +423,7 @@ trues(dims::DimOrInd...) = trues(dims)
 trues(dims::NTuple{N, Union{Integer, OneTo}}) where {N} = trues(map(to_dim, dims))
 trues(dims::NTuple{N, Integer}) where {N} = fill!(BitArray(undef, dims), true)
 trues(dims::Tuple{}) = fill!(BitArray(undef, dims), true)
+trues(dims::NTuple{N, DimOrInd}) where {N} = fill!(similar(BitArray, dims), true)
 
 function one(x::BitMatrix)
     m, n = size(x)
@@ -482,7 +484,7 @@ end
 reshape(B::BitArray, dims::Tuple{Vararg{Int}}) = _bitreshape(B, dims)
 function _bitreshape(B::BitArray, dims::NTuple{N,Int}) where N
     prod(dims) == length(B) ||
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(B))"))
+        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $(length(B))"))
     Br = BitArray{N}(undef, ntuple(i->0,Val(N))...)
     Br.chunks = B.chunks
     Br.len = prod(dims)
diff --git a/base/boot.jl b/base/boot.jl
index e041488cdb74c..608e273d4b514 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -64,7 +64,7 @@
 #   }
 #end
 
-# struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
+#struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
 #    mem::GenericMemory{kind, T, AS}
 #    data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
 #end
@@ -125,12 +125,13 @@
 #    file::Union{Symbol,Nothing}
 #end
 
-#struct LineInfoNode
-#    module::Module
-#    method::Any (Union{Symbol, Method, MethodInstance})
-#    file::Symbol
-#    line::Int32
-#    inlined_at::Int32
+#struct LegacyLineInfoNode end # only used internally during lowering
+
+#struct DebugInfo
+#    def::Any # (Union{Symbol, Method, MethodInstance})
+#    linetable::Any # (Union{Nothing,DebugInfo})
+#    edges::SimpleVector # Vector{DebugInfo}
+#    codelocs::String # compressed Vector{UInt8}
 #end
 
 #struct GotoNode
@@ -205,7 +206,7 @@ export
     InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
     OverflowError, StackOverflowError, SegmentationFault, UndefRefError, UndefVarError,
     TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
-    UndefKeywordError, ConcurrencyViolationError,
+    UndefKeywordError, ConcurrencyViolationError, FieldError,
     # AST representation
     Expr, QuoteNode, LineNumberNode, GlobalRef,
     # object model functions
@@ -282,7 +283,9 @@ macro _foldable_meta()
         #=:notaskstate=#true,
         #=:inaccessiblememonly=#true,
         #=:noub=#true,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 
 macro inline()   Expr(:meta, :inline)   end
@@ -296,6 +299,9 @@ TypeVar(@nospecialize(n), @nospecialize(ub)) = _typevar(n::Symbol, Union{}, ub)
 TypeVar(@nospecialize(n), @nospecialize(lb), @nospecialize(ub)) = _typevar(n::Symbol, lb, ub)
 UnionAll(@nospecialize(v), @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v::TypeVar, t)
 
+const Memory{T} = GenericMemory{:not_atomic, T, CPU}
+const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}
+
 # simple convert for use by constructors of types in Core
 # note that there is no actual conversion defined here,
 # so the methods and ccall's in Core aren't permitted to use convert
@@ -400,6 +406,11 @@ struct AssertionError <: Exception
 end
 AssertionError() = AssertionError("")
 
+struct FieldError <: Exception
+    type::DataType
+    field::Symbol
+end
+
 abstract type WrappedException <: Exception end
 
 struct LoadError <: WrappedException
@@ -466,8 +477,10 @@ eval(Core, quote
         isa(f, String) && (f = Symbol(f))
         return $(Expr(:new, :LineNumberNode, :l, :f))
     end
-    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
-        $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
+    DebugInfo(def::Union{Method,MethodInstance,Symbol}, linetable::Union{Nothing,DebugInfo}, edges::SimpleVector, codelocs::String) =
+        $(Expr(:new, :DebugInfo, :def, :linetable, :edges, :codelocs))
+    DebugInfo(def::Union{Method,MethodInstance,Symbol}) =
+        $(Expr(:new, :DebugInfo, :def, nothing, Core.svec(), ""))
     SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
     PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
     PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
@@ -482,16 +495,27 @@ eval(Core, quote
     MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
 end)
 
+const NullDebugInfo = DebugInfo(:none)
+
+struct LineInfoNode # legacy support for aiding Serializer.deserialize of old IR
+    mod::Module
+    method
+    file::Symbol
+    line::Int32
+    inlined_at::Int32
+    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) = new(mod, method, file, line, inlined_at)
+end
+
+
 function CodeInstance(
     mi::MethodInstance, owner, @nospecialize(rettype), @nospecialize(exctype), @nospecialize(inferred_const),
     @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
-    ipo_effects::UInt32, effects::UInt32, @nospecialize(analysis_results),
-    relocatability::UInt8)
+    effects::UInt32, @nospecialize(analysis_results),
+    relocatability::UInt8, edges::Union{DebugInfo,Nothing})
     return ccall(:jl_new_codeinst, Ref{CodeInstance},
-        (Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
+        (Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, UInt8, Any),
         mi, owner, rettype, exctype, inferred_const, inferred, const_flags, min_world, max_world,
-        ipo_effects, effects, analysis_results,
-        relocatability)
+        effects, analysis_results, relocatability, edges)
 end
 GlobalRef(m::Module, s::Symbol) = ccall(:jl_module_globalref, Ref{GlobalRef}, (Any, Any), m, s)
 Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
@@ -517,18 +541,13 @@ const undef = UndefInitializer()
 # empty vector constructor
 (self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)
 
+memoryref(mem::GenericMemory) = memoryrefnew(mem)
+memoryref(mem::GenericMemory, i::Integer) = memoryrefnew(memoryrefnew(mem), Int(i), @_boundscheck)
+memoryref(ref::GenericMemoryRef, i::Integer) = memoryrefnew(ref, Int(i), @_boundscheck)
 GenericMemoryRef(mem::GenericMemory) = memoryref(mem)
-GenericMemoryRef(ref::GenericMemoryRef, i::Integer) = memoryref(ref, Int(i), @_boundscheck)
-GenericMemoryRef(mem::GenericMemory, i::Integer) = memoryref(memoryref(mem), Int(i), @_boundscheck)
-GenericMemoryRef{kind,<:Any,AS}(mem::GenericMemory{kind,<:Any,AS}) where {kind,AS} = memoryref(mem)
-GenericMemoryRef{kind,<:Any,AS}(ref::GenericMemoryRef{kind,<:Any,AS}, i::Integer) where {kind,AS} = memoryref(ref, Int(i), @_boundscheck)
-GenericMemoryRef{kind,<:Any,AS}(mem::GenericMemory{kind,<:Any,AS}, i::Integer) where {kind,AS}  = memoryref(memoryref(mem), Int(i), @_boundscheck)
-GenericMemoryRef{kind,T,AS}(mem::GenericMemory{kind,T,AS}) where {kind,T,AS}  = memoryref(mem)
-GenericMemoryRef{kind,T,AS}(ref::GenericMemoryRef{kind,T,AS}, i::Integer) where {kind,T,AS} = memoryref(ref, Int(i), @_boundscheck)
-GenericMemoryRef{kind,T,AS}(mem::GenericMemory{kind,T,AS}, i::Integer) where {kind,T,AS} = memoryref(memoryref(mem), Int(i), @_boundscheck)
+GenericMemoryRef(mem::GenericMemory, i::Integer) = memoryref(mem, i)
+GenericMemoryRef(mem::GenericMemoryRef, i::Integer) = memoryref(mem, i)
 
-const Memory{T} = GenericMemory{:not_atomic, T, CPU}
-const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}
 const AtomicMemory{T} = GenericMemory{:atomic, T, CPU}
 const AtomicMemoryRef{T} = GenericMemoryRef{:atomic, T, CPU}
 
@@ -547,19 +566,22 @@ function _checked_mul_dims(m::Int, n::Int)
     return a, ovflw
 end
 function _checked_mul_dims(m::Int, d::Int...)
-   @_foldable_meta # the compiler needs to know this loop terminates
-   a = m
-   i = 1
-   ovflw = false
-   while Intrinsics.sle_int(i, nfields(d))
-     di = getfield(d, i)
-     b = Intrinsics.checked_smul_int(a, di)
-     ovflw = Intrinsics.or_int(ovflw, getfield(b, 2))
-     ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, di))
-     a = getfield(b, 1)
-     i = Intrinsics.add_int(i, 1)
+    @_foldable_meta # the compiler needs to know this loop terminates
+    a = m
+    i = 1
+    ovflw = false
+    neg = Intrinsics.ule_int(typemax_Int, m)
+    zero = false # if m==0 we won't have overflow since we go left to right
+    while Intrinsics.sle_int(i, nfields(d))
+        di = getfield(d, i)
+        b = Intrinsics.checked_smul_int(a, di)
+        zero = Intrinsics.or_int(zero, di === 0)
+        ovflw = Intrinsics.or_int(ovflw, getfield(b, 2))
+        neg = Intrinsics.or_int(neg, Intrinsics.ule_int(typemax_Int, di))
+        a = getfield(b, 1)
+        i = Intrinsics.add_int(i, 1)
    end
-   return a, ovflw
+   return a, Intrinsics.or_int(neg, Intrinsics.and_int(ovflw, Intrinsics.not_int(zero)))
 end
 
 # convert a set of dims to a length, with overflow checking
@@ -629,13 +651,13 @@ module IR
 
 export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
     NewvarNode, SSAValue, SlotNumber, Argument,
-    PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct, InterConditional, EnterNode
+    PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
+    Const, PartialStruct, InterConditional, EnterNode, memoryref
 
 using Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
     NewvarNode, SSAValue, SlotNumber, Argument,
-    PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct, InterConditional, EnterNode
+    PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
+    Const, PartialStruct, InterConditional, EnterNode, memoryref
 
 end # module IR
 
@@ -729,6 +751,13 @@ function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospec
     end
 end
 
+# If the generator is a subtype of this trait, inference caches the generated unoptimized
+# code, sacrificing memory space to improve the performance of subsequent inferences.
+# This tradeoff is not appropriate in general cases (e.g., for `GeneratedFunctionStub`s
+# generated from the front end), but it can be justified for generators involving complex
+# code transformations, such as a Cassette-like system.
+abstract type CachedGenerator end
+
 NamedTuple() = NamedTuple{(),Tuple{}}(())
 
 eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
@@ -736,8 +765,8 @@ eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
 
 using .Intrinsics: sle_int, add_int
 
-eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
-             $(Expr(:splatnew, :(NamedTuple{names,T}), :args))))
+eval(Core, :((NT::Type{NamedTuple{names,T}})(args::T) where {names, T <: Tuple} =
+             $(Expr(:splatnew, :NT, :args))))
 
 # constructors for built-in types
 
@@ -754,7 +783,7 @@ function is_top_bit_set(x::Union{Int8,UInt8})
 end
 
 # n.b. This function exists for CUDA to overload to configure error behavior (see #48097)
-throw_inexacterror(args...) = throw(InexactError(args...))
+throw_inexacterror(func::Symbol, to, val) = throw(InexactError(func, to, val))
 
 function check_sign_bit(::Type{To}, x) where {To}
     @inline
@@ -984,6 +1013,35 @@ const check_top_bit = check_sign_bit
 EnterNode(old::EnterNode, new_dest::Int) = isdefined(old, :scope) ?
     EnterNode(new_dest, old.scope) : EnterNode(new_dest)
 
+# typename(_).constprop_heuristic
+const FORCE_CONST_PROP      = 0x1
+const ARRAY_INDEX_HEURISTIC = 0x2
+const ITERATE_HEURISTIC     = 0x3
+const SAMETYPE_HEURISTIC    = 0x4
+
+# `typename` has special tfunc support in inference to improve
+# the result for `Type{Union{...}}`. It is defined here, so that the Compiler
+# can look it up by value.
+struct TypeNameError <: Exception
+    a
+    TypeNameError(@nospecialize(a)) = new(a)
+end
+
+typename(a) = throw(TypeNameError(a))
+typename(a::DataType) = a.name
+function typename(a::Union)
+    ta = typename(a.a)
+    tb = typename(a.b)
+    ta === tb || throw(TypeNameError(a))
+    return tb
+end
+typename(union::UnionAll) = typename(union.body)
+
+# Special inference support to avoid execess specialization of these methods.
+# TODO: Replace this by a generic heuristic.
+(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+(!==)(@nospecialize(a), @nospecialize(b)) = Intrinsics.not_int(a === b)
+
 include(Core, "optimized_generics.jl")
 
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 90453ccf19aa3..927c946e53e02 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -196,14 +196,18 @@ const andand = AndAnd()
 broadcasted(::AndAnd, a, b) = broadcasted((a, b) -> a && b, a, b)
 function broadcasted(::AndAnd, a, bc::Broadcasted)
     bcf = flatten(bc)
-    broadcasted((a, args...) -> a && bcf.f(args...), a, bcf.args...)
+    # Vararg type signature to specialize on args count. This is necessary for performance
+    # and innexpensive because this should only ever get called with 1+N = length(bc.args)
+    broadcasted(((a, args::Vararg{Any, N}) where {N}) -> a && bcf.f(args...), a, bcf.args...)
 end
 struct OrOr end
 const oror = OrOr()
 broadcasted(::OrOr, a, b) = broadcasted((a, b) -> a || b, a, b)
 function broadcasted(::OrOr, a, bc::Broadcasted)
     bcf = flatten(bc)
-    broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
+    # Vararg type signature to specialize on args count. This is necessary for performance
+    # and innexpensive because this should only ever get called with 1+N = length(bc.args)
+    broadcasted(((a, args::Vararg{Any, N}) where {N}) -> a || bcf.f(args...), a, bcf.args...)
 end
 
 Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{<:Any,Axes,F,Args}) where {NewStyle,Axes,F,Args} =
@@ -222,12 +226,12 @@ end
 ## Allocating the output container
 Base.similar(bc::Broadcasted, ::Type{T}) where {T} = similar(bc, T, axes(bc))
 Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}, dims) where {N,ElType} =
-    similar(Array{ElType}, dims)
+    similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}, dims) where N =
     similar(BitArray, dims)
 # In cases of conflict we fall back on Array
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{ElType}, dims) where ElType =
-    similar(Array{ElType}, dims)
+    similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) =
     similar(BitArray, dims)
 
@@ -338,13 +342,14 @@ function flatten(bc::Broadcasted)
     # concatenate the nested arguments into {a, b, c, d}
     args = cat_nested(bc)
     # build a tuple of functions `makeargs`. Its elements take
-    # the whole "flat" argument list and and generate the appropriate
+    # the whole "flat" argument list and generate the appropriate
     # input arguments for the broadcasted function `f`, e.g.,
     #          makeargs[1] = ((w, x, y, z)) -> w
     #          makeargs[2] = ((w, x, y, z)) -> g(x, y)
     #          makeargs[3] = ((w, x, y, z)) -> z
     makeargs = make_makeargs(bc.args)
     f = Base.maybeconstructor(bc.f)
+    # TODO: consider specializing on args... if performance problems emerge:
     newf = (args...) -> (@inline; f(prepare_args(makeargs, args)...))
     return Broadcasted(bc.style, newf, args, bc.axes)
 end
@@ -517,8 +522,8 @@ function _bcs(shape::Tuple, newshape::Tuple)
     return (_bcs1(shape[1], newshape[1]), _bcs(tail(shape), tail(newshape))...)
 end
 # _bcs1 handles the logic for a single dimension
-_bcs1(a::Integer, b::Integer) = a == 1 ? b : (b == 1 ? a : (a == b ? a : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $a and $b"))))
-_bcs1(a::Integer, b) = a == 1 ? b : (first(b) == 1 && last(b) == a ? b : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $a and $(length(b))")))
+_bcs1(a::Integer, b::Integer) = a == 1 ? b : (b == 1 ? a : (a == b ? a : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size; got a dimension with lengths ", a, " and ", b)))))
+_bcs1(a::Integer, b) = a == 1 ? b : (first(b) == 1 && last(b) == a ? b : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size; got a dimension with lengths ", a, " and ", length(b)))))
 _bcs1(a, b::Integer) = _bcs1(b, a)
 _bcs1(a, b) = _bcsm(b, a) ? axistype(b, a) : _bcsm(a, b) ? axistype(a, b) : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size: a has axes ", a, " and b has axes ", b)))
 # _bcsm tests whether the second index is consistent with the first
@@ -746,6 +751,7 @@ The resulting container type is established by the following rules:
  - All other combinations of arguments default to returning an `Array`, but
    custom container types can define their own implementation and promotion-like
    rules to customize the result when they appear as arguments.
+ - The element type is determined in the same manner as in [`collect`](@ref).
 
 A special syntax exists for broadcasting: `f.(args...)` is equivalent to
 `broadcast(f, args...)`, and nested `f.(g.(args...))` calls are fused into a
@@ -1057,7 +1063,7 @@ end
 
 
 @noinline throwdm(axdest, axsrc) =
-    throw(DimensionMismatch("destination axes $axdest are not compatible with source axes $axsrc"))
+    throw(DimensionMismatch(LazyString("destination axes ", axdest, " are not compatible with source axes ", axsrc)))
 
 function restart_copyto_nonleaf!(newdest, dest, bc, val, I, iter, state, count)
     # Function barrier that makes the copying to newdest type stable
diff --git a/base/c.jl b/base/c.jl
index eb552d3507662..c1b34579e0a0b 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -344,7 +344,7 @@ function ccall_macro_lower(convention, func, rettype, types, args, nreq)
         check = quote
             if !isa(func, Ptr{Cvoid})
                 name = $name
-                throw(ArgumentError("interpolated function `$name` was not a Ptr{Cvoid}, but $(typeof(func))"))
+                throw(ArgumentError(LazyString("interpolated function `", name, "` was not a Ptr{Cvoid}, but ", typeof(func))))
             end
         end
         push!(statements, check)
diff --git a/base/char.jl b/base/char.jl
index 08d661c41de56..2e8410f6903e2 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -62,7 +62,14 @@ to an output stream, or `ncodeunits(string(c))` but computed efficiently.
     This method requires at least Julia 1.1. In Julia 1.0 consider
     using `ncodeunits(string(c))`.
 """
-ncodeunits(c::Char) = write(devnull, c) # this is surprisingly efficient
+function ncodeunits(c::Char)
+    u = reinterpret(UInt32, c)
+    # We care about how many trailing bytes are all zero
+    # subtract that from the total number of bytes
+    n_nonzero_bytes = sizeof(UInt32) - div(trailing_zeros(u), 0x8)
+    # Take care of '\0', which has an all-zero bitpattern
+    n_nonzero_bytes + iszero(u)
+end
 
 """
     codepoint(c::AbstractChar) -> Integer
@@ -216,6 +223,7 @@ hash(x::Char, h::UInt) =
     hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
 
 first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8
+first_utf8_byte(c::AbstractChar) = first_utf8_byte(Char(c)::Char)
 
 # fallbacks:
 isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
diff --git a/base/client.jl b/base/client.jl
index 087efe3d3b99c..2ca88c40aeb7e 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -95,7 +95,7 @@ function scrub_repl_backtrace(bt)
     if bt !== nothing && !(bt isa Vector{Any}) # ignore our sentinel value types
         bt = bt isa Vector{StackFrame} ? copy(bt) : stacktrace(bt)
         # remove REPL-related frames from interactive printing
-        eval_ind = findlast(frame -> !frame.from_c && frame.func === :eval, bt)
+        eval_ind = findlast(frame -> !frame.from_c && startswith(String(frame.func), "__repl_entry"), bt)
         eval_ind === nothing || deleteat!(bt, eval_ind:length(bt))
     end
     return bt
@@ -226,10 +226,9 @@ function incomplete_tag(ex::Expr)
 end
 incomplete_tag(exc::Meta.ParseError) = incomplete_tag(exc.detail)
 
-cmd_suppresses_program(cmd) = cmd in ('e', 'E')
 function exec_options(opts)
     startup               = (opts.startupfile != 2)
-    global have_color     = (opts.color != 0) ? (opts.color == 1) : nothing # --color=on
+    global have_color     = colored_text(opts)
     global is_interactive = (opts.isinteractive != 0)
 
     # pre-process command line argument list
@@ -260,7 +259,7 @@ function exec_options(opts)
     # Load Distributed module only if any of the Distributed options have been specified.
     distributed_mode = (opts.worker == 1) || (opts.nprocs > 0) || (opts.machine_file != C_NULL)
     if distributed_mode
-        let Distributed = require_stdlib(PkgId(UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
+        let Distributed = require(PkgId(UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
             Core.eval(MainInclude, :(const Distributed = $Distributed))
             Core.eval(Main, :(using Base.MainInclude.Distributed))
         end
@@ -293,12 +292,12 @@ function exec_options(opts)
             invokelatest(show, Core.eval(Main, parse_input_line(arg)))
             println()
         elseif cmd == 'm'
-            @eval Main import $(Symbol(arg)).main
+            entrypoint = push!(split(arg, "."), "main")
+            Base.eval(Main, Expr(:import, Expr(:., Symbol.(entrypoint)...)))
             if !should_use_main_entrypoint()
                 error("`main` in `$arg` not declared as entry point (use `@main` to do so)")
             end
             return false
-
         elseif cmd == 'L'
             # load file immediately on all processors
             if !distributed_mode
@@ -340,11 +339,13 @@ function _global_julia_startup_file()
     # If it is not found, then continue on to the relative path based on Sys.BINDIR
     BINDIR = Sys.BINDIR
     SYSCONFDIR = Base.SYSCONFDIR
+    p1 = nothing
     if !isempty(SYSCONFDIR)
         p1 = abspath(BINDIR, SYSCONFDIR, "julia", "startup.jl")
         isfile(p1) && return p1
     end
     p2 = abspath(BINDIR, "..", "etc", "julia", "startup.jl")
+    p1 == p2 && return nothing # don't check the same path twice
     isfile(p2) && return p2
     return nothing
 end
@@ -416,79 +417,98 @@ function load_REPL()
     return nothing
 end
 
-global active_repl
+global active_repl::Any
+global active_repl_backend = nothing
+
+function run_fallback_repl(interactive::Bool)
+    let input = stdin
+        if isa(input, File) || isa(input, IOStream)
+            # for files, we can slurp in the whole thing at once
+            ex = parse_input_line(read(input, String))
+            if Meta.isexpr(ex, :toplevel)
+                # if we get back a list of statements, eval them sequentially
+                # as if we had parsed them sequentially
+                for stmt in ex.args
+                    eval_user_input(stderr, stmt, true)
+                end
+                body = ex.args
+            else
+                eval_user_input(stderr, ex, true)
+            end
+        else
+            while !eof(input)
+                if interactive
+                    print("julia> ")
+                    flush(stdout)
+                end
+                try
+                    line = ""
+                    ex = nothing
+                    while !eof(input)
+                        line *= readline(input, keep=true)
+                        ex = parse_input_line(line)
+                        if !(isa(ex, Expr) && ex.head === :incomplete)
+                            break
+                        end
+                    end
+                    eval_user_input(stderr, ex, true)
+                catch err
+                    isa(err, InterruptException) ? print("\n\n") : rethrow()
+                end
+            end
+        end
+    end
+    nothing
+end
+
+function run_std_repl(REPL::Module, quiet::Bool, banner::Symbol, history_file::Bool)
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
+    banner == :no || REPL.banner(term, short=banner==:short)
+    if term.term_type == "dumb"
+        repl = REPL.BasicREPL(term)
+        quiet || @warn "Terminal not fully functional"
+    else
+        repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
+        repl.history_file = history_file
+    end
+    # Make sure any displays pushed in .julia/config/startup.jl ends up above the
+    # REPLDisplay
+    d = REPL.REPLDisplay(repl)
+    last_active_repl = @isdefined(active_repl) ? active_repl : nothing
+    last_active_repl_backend = active_repl_backend
+    global active_repl = repl
+    pushdisplay(d)
+    try
+        global active_repl = repl
+        _atreplinit(repl)
+        REPL.run_repl(repl, backend->(global active_repl_backend = backend))
+    finally
+        popdisplay(d)
+        active_repl = last_active_repl
+        active_repl_backend = last_active_repl_backend
+    end
+    nothing
+end
 
 # run the requested sort of evaluation loop on stdio
-function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool, color_set::Bool)
+function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool)
     fallback_repl = parse(Bool, get(ENV, "JULIA_FALLBACK_REPL", "false"))
     if !fallback_repl && interactive
         load_InteractiveUtils()
-        if !isassigned(REPL_MODULE_REF)
+        REPL = REPL_MODULE_REF[]
+        if REPL === Base
             load_REPL()
         end
     end
-    # TODO cleanup REPL_MODULE_REF
-    if !fallback_repl && interactive && isassigned(REPL_MODULE_REF)
-        invokelatest(REPL_MODULE_REF[]) do REPL
-            term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
-            term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
-            banner == :no || REPL.banner(term, short=banner==:short)
-            if term.term_type == "dumb"
-                repl = REPL.BasicREPL(term)
-                quiet || @warn "Terminal not fully functional"
-            else
-                repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
-                repl.history_file = history_file
-            end
-            global active_repl = repl
-            # Make sure any displays pushed in .julia/config/startup.jl ends up above the
-            # REPLDisplay
-            pushdisplay(REPL.REPLDisplay(repl))
-            _atreplinit(repl)
-            REPL.run_repl(repl, backend->(global active_repl_backend = backend))
-        end
+    REPL = REPL_MODULE_REF[]
+    if !fallback_repl && interactive && REPL !== Base
+        invokelatest(run_std_repl, REPL, quiet, banner, history_file)
     else
-        # otherwise provide a simple fallback
         if !fallback_repl && interactive && !quiet
             @warn "REPL provider not available: using basic fallback" LOAD_PATH=join(Base.LOAD_PATH, Sys.iswindows() ? ';' : ':')
         end
-        let input = stdin
-            if isa(input, File) || isa(input, IOStream)
-                # for files, we can slurp in the whole thing at once
-                ex = parse_input_line(read(input, String))
-                if Meta.isexpr(ex, :toplevel)
-                    # if we get back a list of statements, eval them sequentially
-                    # as if we had parsed them sequentially
-                    for stmt in ex.args
-                        eval_user_input(stderr, stmt, true)
-                    end
-                    body = ex.args
-                else
-                    eval_user_input(stderr, ex, true)
-                end
-            else
-                while !eof(input)
-                    if interactive
-                        print("julia> ")
-                        flush(stdout)
-                    end
-                    try
-                        line = ""
-                        ex = nothing
-                        while !eof(input)
-                            line *= readline(input, keep=true)
-                            ex = parse_input_line(line)
-                            if !(isa(ex, Expr) && ex.head === :incomplete)
-                                break
-                            end
-                        end
-                        eval_user_input(stderr, ex, true)
-                    catch err
-                        isa(err, InterruptException) ? print("\n\n") : rethrow()
-                    end
-                end
-            end
-        end
+        run_fallback_repl(interactive)
     end
     nothing
 end
@@ -565,8 +585,7 @@ function repl_main(_)
 
     quiet                 = (opts.quiet != 0)
     history_file          = (opts.historyfile != 0)
-    color_set             = (opts.color != 0) # --color!=auto
-    return run_main_repl(interactiveinput, quiet, banner, history_file, color_set)
+    return run_main_repl(interactiveinput, quiet, banner, history_file)
 end
 
 """
@@ -582,13 +601,13 @@ The `@main` macro may be used standalone or as part of the function definition,
 case, parentheses are required. In particular, the following are equivalent:
 
 ```
-function (@main)(ARGS)
+function (@main)(args)
     println("Hello World")
 end
 ```
 
 ```
-function main(ARGS)
+function main(args)
 end
 @main
 ```
@@ -601,7 +620,7 @@ imported into `Main`, it will be treated as an entrypoint in `Main`:
 ```
 module MyApp
     export main
-    (@main)(ARGS) = println("Hello World")
+    (@main)(args) = println("Hello World")
 end
 using .MyApp
 # `julia` Will execute MyApp.main at the conclusion of script execution
@@ -611,22 +630,22 @@ Note that in particular, the semantics do not attach to the method
 or the name:
 ```
 module MyApp
-    (@main)(ARGS) = println("Hello World")
+    (@main)(args) = println("Hello World")
 end
 const main = MyApp.main
 # `julia` Will *NOT* execute MyApp.main unless there is a separate `@main` annotation in `Main`
+```
 
 !!! compat "Julia 1.11"
     This macro is new in Julia 1.11. At present, the precise semantics of `@main` are still subject to change.
-```
 """
 macro main(args...)
     if !isempty(args)
-        error("USAGE: `@main` is expected to be used as `(@main)` without macro arguments.")
+        error("`@main` is expected to be used as `(@main)` without macro arguments.")
     end
     if isdefined(__module__, :main)
         if Base.binding_module(__module__, :main) !== __module__
-            error("USAGE: Symbol `main` is already a resolved import in module $(__module__). `@main` must be used in the defining module.")
+            error("Symbol `main` is already a resolved import in module $(__module__). `@main` must be used in the defining module.")
         end
     end
     Core.eval(__module__, quote
diff --git a/base/cmd.jl b/base/cmd.jl
index 202527abdf644..84ec52f865e98 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -482,7 +482,7 @@ function cmd_gen(parsed)
     end
 end
 
-@assume_effects :effect_free :terminates_globally :noub function cmd_gen(
+@assume_effects :foldable !:consistent function cmd_gen(
     parsed::Tuple{Vararg{Tuple{Vararg{Union{String, SubString{String}}}}}}
 )
     return @invoke cmd_gen(parsed::Any)
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index a169cfb9ecd77..3672a19e19998 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -2,23 +2,30 @@
 
 # Factorials
 
-const _fact_table64 = Vector{Int64}(undef, 20)
-_fact_table64[1] = 1
-for n in 2:20
-    _fact_table64[n] = _fact_table64[n-1] * n
+const _fact_table64 = let _fact_table64 = Vector{Int64}(undef, 20)
+    _fact_table64[1] = 1
+    for n in 2:20
+        _fact_table64[n] = _fact_table64[n-1] * n
+    end
+    Tuple(_fact_table64)
 end
 
-const _fact_table128 = Vector{UInt128}(undef, 34)
-_fact_table128[1] = 1
-for n in 2:34
-    _fact_table128[n] = _fact_table128[n-1] * n
+const _fact_table128 = let _fact_table128 = Vector{UInt128}(undef, 34)
+    _fact_table128[1] = 1
+    for n in 2:34
+        _fact_table128[n] = _fact_table128[n-1] * n
+    end
+    Tuple(_fact_table128)
 end
 
-function factorial_lookup(n::Integer, table, lim)
-    n < 0 && throw(DomainError(n, "`n` must not be negative."))
-    n > lim && throw(OverflowError(string(n, " is too large to look up in the table; consider using `factorial(big(", n, "))` instead")))
-    n == 0 && return one(n)
-    @inbounds f = table[n]
+function factorial_lookup(
+    n::Union{Checked.SignedInt,Checked.UnsignedInt},
+    table::Union{NTuple{20,Int64},NTuple{34,UInt128}}, lim::Int)
+    idx = Int(n)
+    idx < 0 && throw(DomainError(n, "`n` must not be negative."))
+    idx > lim && throw(OverflowError(lazy"$n is too large to look up in the table; consider using `factorial(big($n))` instead"))
+    idx == 0 && return one(n)
+    f = getfield(table, idx)
     return oftype(n, f)
 end
 
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 12eb9a03e323b..bb5f2dd1ad180 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1,5 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+struct SlotRefinement
+    slot::SlotNumber
+    typ::Any
+    SlotRefinement(slot::SlotNumber, @nospecialize(typ)) = new(slot, typ)
+end
+
 # See if the inference result of the current statement's result value might affect
 # the final answer for the method (aside from optimization potential and exceptions).
 # To do that, we need to check both for slot assignment and SSA usage.
@@ -7,23 +13,38 @@ call_result_unused(sv::InferenceState, currpc::Int) =
     isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
 call_result_unused(si::StmtInfo) = !si.used
 
+is_const_bool_or_bottom(@nospecialize(b)) = (isa(b, Const) && isa(b.val, Bool)) || b == Bottom
+function can_propagate_conditional(@nospecialize(rt), argtypes::Vector{Any})
+    isa(rt, InterConditional) || return false
+    if rt.slot > length(argtypes)
+        # In the vararg tail - can't be conditional
+        @assert isvarargtype(argtypes[end])
+        return false
+    end
+    return isa(argtypes[rt.slot], Conditional) &&
+        is_const_bool_or_bottom(rt.thentype) && is_const_bool_or_bottom(rt.thentype)
+end
+
+function propagate_conditional(rt::InterConditional, cond::Conditional)
+    new_thentype = rt.thentype === Const(false) ? cond.elsetype : cond.thentype
+    new_elsetype = rt.elsetype === Const(true) ? cond.thentype : cond.elsetype
+    if rt.thentype == Bottom
+        @assert rt.elsetype != Bottom
+        return Conditional(cond.slot, Bottom, new_elsetype)
+    elseif rt.elsetype == Bottom
+        @assert rt.thentype != Bottom
+        return Conditional(cond.slot, new_thentype, Bottom)
+    end
+    return Conditional(cond.slot, new_thentype, new_elsetype)
+end
+
 function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                                   arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
                                   sv::AbsIntState, max_methods::Int)
     𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
-    ⊑ₚ = ⊑(𝕃ₚ)
-    if !should_infer_this_call(interp, sv)
-        add_remark!(interp, sv, "Skipped call in throw block")
-        # At this point we are guaranteed to end up throwing on this path,
-        # which is all that's required for :consistent-cy. Of course, we don't
-        # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE)
-        return CallMeta(Any, Any, effects, NoCallInfo())
-    end
-
+    ⊑ₚ, ⋤ₚ, ⊔ₚ, ⊔ᵢ  = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ), join(𝕃ᵢ)
     argtypes = arginfo.argtypes
-    matches = find_matching_methods(𝕃ᵢ, argtypes, atype, method_table(interp),
-        InferenceParams(interp).max_union_splitting, max_methods)
+    matches = find_method_matches(interp, argtypes, atype; max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
         return CallMeta(Any, Any, Effects(), NoCallInfo())
@@ -32,7 +53,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
     napplicable = length(applicable)
-    rettype = excttype = Bottom
+    rettype = exctype = Bottom
     edges = MethodInstance[]
     conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
     seen = 0               # number of signatures actually inferred
@@ -40,6 +61,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     multiple_matches = napplicable > 1
     fargs = arginfo.fargs
     all_effects = EFFECTS_TOTAL
+    slotrefinements = nothing # keeps refinement information on slot types obtained from call signature
 
     for i in 1:napplicable
         match = applicable[i]::MethodMatch
@@ -75,9 +97,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                     else
                         add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
                     end
-                    if !(exct ⊑ₚ const_call_result.exct)
-                        exct = const_call_result.exct
-                        (; const_result, edge) = const_call_result
+                    if const_call_result.exct ⋤ exct
+                        (; exct, const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
                     end
@@ -90,8 +111,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                     const_results[i] = const_result
                 end
                 edge === nothing || push!(edges, edge)
-                this_rt = tmerge(this_rt, rt)
-                this_exct = tmerge(this_exct, exct)
+                this_rt = this_rt ⊔ₚ rt
+                this_exct = this_exct ⊔ₚ exct
                 if bail_out_call(interp, this_rt, sv)
                     break
                 end
@@ -132,7 +153,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 end
                 # Treat the exception type separately. Currently, constprop often cannot determine the exception type
                 # because consistent-cy does not apply to exceptions.
-                if !(this_exct ⊑ₚ const_call_result.exct)
+                if const_call_result.exct ⋤ this_exct
                     this_exct = const_call_result.exct
                     (; const_result, edge) = const_call_result
                 else
@@ -150,8 +171,17 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         end
         @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
         seen += 1
-        rettype = tmerge(𝕃ₚ, rettype, this_rt)
-        excttype = tmerge(𝕃ₚ, excttype, this_exct)
+
+        if can_propagate_conditional(this_conditional, argtypes)
+            # The only case where we need to keep this in rt is where
+            # we can directly propagate the conditional to a slot argument
+            # that is not one of our arguments, otherwise we keep all the
+            # relevant information in `conditionals` below.
+            this_rt = this_conditional
+        end
+
+        rettype = rettype ⊔ₚ this_rt
+        exctype = exctype ⊔ₚ this_exct
         if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
             if conditionals === nothing
                 conditionals = Any[Bottom for _ in 1:length(argtypes)],
@@ -159,8 +189,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             end
             for i = 1:length(argtypes)
                 cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
-                conditionals[1][i] = tmerge(𝕃ᵢ, conditionals[1][i], cnd.thentype)
-                conditionals[2][i] = tmerge(𝕃ᵢ, conditionals[2][i], cnd.elsetype)
+                conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
+                conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
             end
         end
         if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
@@ -176,13 +206,18 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
 
     if seen ≠ napplicable
         # there is unanalyzed candidate, widen type and effects to the top
-        rettype = excttype = Any
+        rettype = exctype = Any
         all_effects = Effects()
-    elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-            (!all(matches.fullmatches) || any_ambig(matches))
-        # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        all_effects = Effects(all_effects; nothrow=false)
-        excttype = tmerge(𝕃ₚ, excttype, MethodError)
+    else
+        if (matches isa MethodMatches ? (!matches.fullmatch || any_ambig(matches)) :
+            (!all(matches.fullmatches) || any_ambig(matches)))
+            # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+            all_effects = Effects(all_effects; nothrow=false)
+            exctype = exctype ⊔ₚ MethodError
+        end
+        if sv isa InferenceState && fargs !== nothing
+            slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
+        end
     end
 
     rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
@@ -214,7 +249,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         # and avoid keeping track of a more complex result type.
         rettype = Any
     end
-    add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
+    any_slot_refined = slotrefinements !== nothing
+    add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype, sv)
     if isa(sv, InferenceState)
         # TODO (#48913) implement a proper recursion handling for irinterp:
         # This works just because currently the `:terminate` condition guarantees that
@@ -228,7 +264,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         end
     end
 
-    return CallMeta(rettype, excttype, all_effects, info)
+    return CallMeta(rettype, exctype, all_effects, info, slotrefinements)
 end
 
 struct FailedMethodMatch
@@ -255,73 +291,79 @@ struct UnionSplitMethodMatches
 end
 any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches)
 
-function find_matching_methods(𝕃::AbstractLattice,
-                               argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
-                               max_union_splitting::Int, max_methods::Int)
-    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    if 1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
-        split_argtypes = switchtupleunion(𝕃, argtypes)
-        infos = MethodMatchInfo[]
-        applicable = Any[]
-        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
-        valid_worlds = WorldRange()
-        mts = MethodTable[]
-        fullmatches = Bool[]
-        for i in 1:length(split_argtypes)
-            arg_n = split_argtypes[i]::Vector{Any}
-            sig_n = argtypes_to_type(arg_n)
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
-            mt = mt::MethodTable
-            matches = findall(sig_n, method_table; limit = max_methods)
-            if matches === nothing
-                return FailedMethodMatch("For one of the union split cases, too many methods matched")
-            end
-            push!(infos, MethodMatchInfo(matches))
-            for m in matches
-                push!(applicable, m)
-                push!(applicable_argtypes, arg_n)
-            end
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = any(match::MethodMatch->match.fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatches[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatches, thisfullmatch)
-            end
-        end
-        return UnionSplitMethodMatches(applicable,
-                                       applicable_argtypes,
-                                       UnionSplitInfo(infos),
-                                       valid_worlds,
-                                       mts,
-                                       fullmatches)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            return FailedMethodMatch("Could not identify method table for call")
-        end
+function find_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any}, @nospecialize(atype);
+                             max_union_splitting::Int = InferenceParams(interp).max_union_splitting,
+                             max_methods::Int = InferenceParams(interp).max_methods)
+    if is_union_split_eligible(typeinf_lattice(interp), argtypes, max_union_splitting)
+        return find_union_split_method_matches(interp, argtypes, atype, max_methods)
+    end
+    return find_simple_method_matches(interp, atype, max_methods)
+end
+
+# NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+is_union_split_eligible(𝕃::AbstractLattice, argtypes::Vector{Any}, max_union_splitting::Int) =
+    1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
+
+function find_union_split_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any},
+                                         @nospecialize(atype), max_methods::Int)
+    split_argtypes = switchtupleunion(typeinf_lattice(interp), argtypes)
+    infos = MethodMatchInfo[]
+    applicable = Any[]
+    applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+    valid_worlds = WorldRange()
+    mts = MethodTable[]
+    fullmatches = Bool[]
+    for i in 1:length(split_argtypes)
+        arg_n = split_argtypes[i]::Vector{Any}
+        sig_n = argtypes_to_type(arg_n)
+        mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
+        mt === nothing && return FailedMethodMatch("Could not identify method table for call")
         mt = mt::MethodTable
-        matches = findall(atype, method_table; limit = max_methods)
+        matches = findall(sig_n, method_table(interp); limit = max_methods)
         if matches === nothing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            return FailedMethodMatch("Too many methods matched")
+            return FailedMethodMatch("For one of the union split cases, too many methods matched")
+        end
+        push!(infos, MethodMatchInfo(matches))
+        for m in matches
+            push!(applicable, m)
+            push!(applicable_argtypes, arg_n)
+        end
+        valid_worlds = intersect(valid_worlds, matches.valid_worlds)
+        thisfullmatch = any(match::MethodMatch->match.fully_covers, matches)
+        found = false
+        for (i, mt′) in enumerate(mts)
+            if mt′ === mt
+                fullmatches[i] &= thisfullmatch
+                found = true
+                break
+            end
         end
-        fullmatch = any(match::MethodMatch->match.fully_covers, matches)
-        return MethodMatches(matches.matches,
-                             MethodMatchInfo(matches),
-                             matches.valid_worlds,
-                             mt,
-                             fullmatch)
+        if !found
+            push!(mts, mt)
+            push!(fullmatches, thisfullmatch)
+        end
+    end
+    info = UnionSplitInfo(infos)
+    return UnionSplitMethodMatches(
+        applicable, applicable_argtypes, info, valid_worlds, mts, fullmatches)
+end
+
+function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(atype), max_methods::Int)
+    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+    if mt === nothing
+        return FailedMethodMatch("Could not identify method table for call")
+    end
+    mt = mt::MethodTable
+    matches = findall(atype, method_table(interp); limit = max_methods)
+    if matches === nothing
+        # this means too many methods matched
+        # (assume this will always be true, so we don't compute / update valid age in this case)
+        return FailedMethodMatch("Too many methods matched")
     end
+    info = MethodMatchInfo(matches)
+    fullmatch = any(match::MethodMatch->match.fully_covers, matches)
+    return MethodMatches(
+        matches.matches, info, matches.valid_worlds, mt, fullmatch)
 end
 
 """
@@ -349,7 +391,7 @@ function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), s
                                arginfo::ArgInfo, @nospecialize(maybecondinfo))
     rt = collect_limitations!(rt, sv)
     if isa(rt, InterMustAlias)
-        rt = from_intermustalias(rt, arginfo, sv)
+        rt = from_intermustalias(typeinf_lattice(interp), rt, arginfo, sv)
     elseif is_lattice_bool(ipo_lattice(interp), rt)
         if maybecondinfo === nothing
             rt = widenconditional(rt)
@@ -369,12 +411,13 @@ function collect_limitations!(@nospecialize(typ), sv::InferenceState)
     return typ
 end
 
-function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
+function from_intermustalias(𝕃ᵢ::AbstractLattice, rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
     fargs = arginfo.fargs
     if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
         arg = ssa_def_slot(fargs[rt.slot], sv)
         if isa(arg, SlotNumber)
             argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
+            ⊑ = partialorder(𝕃ᵢ)
             if rt.vartyp ⊑ argtyp
                 return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
             else
@@ -390,10 +433,14 @@ function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::
     has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
     (; fargs, argtypes) = arginfo
     fargs === nothing && return widenconditional(rt)
+    if can_propagate_conditional(rt, argtypes)
+        return propagate_conditional(rt, argtypes[rt.slot]::Conditional)
+    end
     slot = 0
     alias = nothing
     thentype = elsetype = Any
     condval = maybe_extract_const_bool(rt)
+    ⊑, ⋤, ⊓ = partialorder(𝕃ᵢ), strictneqpartialorder(𝕃ᵢ), meet(𝕃ᵢ)
     for i in 1:length(fargs)
         # find the first argument which supports refinement,
         # and intersect all equivalent arguments with it
@@ -429,24 +476,24 @@ function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::
             end
             if condval === false
                 thentype = Bottom
-            elseif ⊑(𝕃ᵢ, new_thentype, thentype)
+            elseif new_thentype ⊑ thentype
                 thentype = new_thentype
             else
-                thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype))
+                thentype = thentype ⊓ widenconst(new_thentype)
             end
             if condval === true
                 elsetype = Bottom
-            elseif ⊑(𝕃ᵢ, new_elsetype, elsetype)
+            elseif new_elsetype ⊑ elsetype
                 elsetype = new_elsetype
             else
-                elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype))
+                elsetype = elsetype ⊓ widenconst(new_elsetype)
             end
-            if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old)
+            if (slot > 0 || condval !== false) && thentype ⋤ old
                 slot = id
                 if !(arg isa SlotNumber) && argtyp isa MustAlias
                     alias = argtyp
                 end
-            elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old)
+            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
                 slot = id
                 if !(arg isa SlotNumber) && argtyp isa MustAlias
                     alias = argtyp
@@ -474,7 +521,12 @@ function conditional_argtype(𝕃ᵢ::AbstractLattice, @nospecialize(rt), @nospe
     if isa(rt, InterConditional) && rt.slot == i
         return rt
     else
-        thentype = elsetype = tmeet(𝕃ᵢ, widenslotwrapper(argtypes[i]), fieldtype(sig, i))
+        argt = widenslotwrapper(argtypes[i])
+        if isvarargtype(argt)
+            @assert fieldcount(sig) == i
+            argt = unwrapva(argt)
+        end
+        thentype = elsetype = tmeet(𝕃ᵢ, argt, fieldtype(sig, i))
         condval = maybe_extract_const_bool(rt)
         condval === true && (elsetype = Bottom)
         condval === false && (thentype = Bottom)
@@ -482,8 +534,37 @@ function conditional_argtype(𝕃ᵢ::AbstractLattice, @nospecialize(rt), @nospe
     end
 end
 
-function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), all_effects::Effects,
-    edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
+function collect_slot_refinements(𝕃ᵢ::AbstractLattice, applicable::Vector{Any},
+    argtypes::Vector{Any}, fargs::Vector{Any}, sv::InferenceState)
+    ⊏, ⊔ = strictpartialorder(𝕃ᵢ), join(𝕃ᵢ)
+    slotrefinements = nothing
+    for i = 1:length(fargs)
+        fargᵢ = fargs[i]
+        if fargᵢ isa SlotNumber
+            fidx = slot_id(fargᵢ)
+            argt = widenslotwrapper(argtypes[i])
+            if isvarargtype(argt)
+                argt = unwrapva(argt)
+            end
+            sigt = Bottom
+            for j = 1:length(applicable)
+                match = applicable[j]::MethodMatch
+                sigt = sigt ⊔ fieldtype(match.spec_types, i)
+            end
+            if sigt ⊏ argt # i.e. signature type is strictly more specific than the type of the argument slot
+                if slotrefinements === nothing
+                    slotrefinements = fill!(Vector{Any}(undef, length(sv.slottypes)), nothing)
+                end
+                slotrefinements[fidx] = sigt
+            end
+        end
+    end
+    return slotrefinements
+end
+
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype),
+    all_effects::Effects, any_slot_refined::Bool, edges::Vector{MethodInstance},
+    matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
     sv::AbsIntState)
     # don't bother to add backedges when both type and effects information are already
     # maximized to the top since a new method couldn't refine or widen them anyway
@@ -491,9 +572,11 @@ function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype)
         # ignore the `:nonoverlayed` property if `interp` doesn't use overlayed method table
         # since it will never be tainted anyway
         if !isoverlayed(method_table(interp))
-            all_effects = Effects(all_effects; nonoverlayed=false)
+            all_effects = Effects(all_effects; nonoverlayed=ALWAYS_FALSE)
+        end
+        if all_effects === Effects() && !any_slot_refined
+            return nothing
         end
-        all_effects === Effects() && return nothing
     end
     for edge in edges
         add_backedge!(sv, edge)
@@ -605,13 +688,23 @@ function abstract_call_method(interp::AbstractInterpreter,
             end
             add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
             # TODO (#48913) implement a proper recursion handling for irinterp:
-            # This works just because currently the `:terminate` condition guarantees that
-            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # This works just because currently the `:terminate` condition usually means this is unreachable here
+            # for irinterp because there are not unresolved cycles, but it's not a good solution.
             # We should revisit this once we have a better story for handling cycles in irinterp.
-            if isa(topmost, InferenceState)
+            if isa(sv, InferenceState)
+                # since the hardlimit is against the edge to the parent frame,
+                # we should try to poison the whole edge, not just the topmost frame
                 parentframe = frame_parent(topmost)
-                if isa(sv, InferenceState) && isa(parentframe, InferenceState)
-                    poison_callstack!(sv, parentframe === nothing ? topmost : parentframe)
+                while !isa(parentframe, InferenceState)
+                    # attempt to find a parent frame that can handle this LimitedAccuracy result correctly
+                    # so we don't try to cache this incomplete intermediate result
+                    parentframe === nothing && break
+                    parentframe = frame_parent(parentframe)
+                end
+                if isa(parentframe, InferenceState)
+                    poison_callstack!(sv, parentframe)
+                elseif isa(topmost, InferenceState)
+                    poison_callstack!(sv, topmost)
                 end
             end
             # n.b. this heuristic depends on the non-local state, so we must record the limit later
@@ -682,11 +775,13 @@ function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
     # The other `CodeInfo`s we inspect will already have this field inflated, so we just
     # access it directly instead (to avoid regeneration).
     world = get_inference_world(interp)
-    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world) # Union{Method, Nothing}
-
-    inf_method2 = method_for_inference_limit_heuristics(frame) # limit only if user token match
-    inf_method2 isa Method || (inf_method2 = nothing)
-    if callee_method2 !== inf_method2
+    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world)
+    inf_method2 = method_for_inference_limit_heuristics(frame)
+    if callee_method2 !== inf_method2 # limit only if user token match
+        return false
+    end
+    if isa(frame, InferenceState) && cache_owner(frame.interp) !== cache_owner(interp)
+        # Don't assume that frames in different interpreters are the same
         return false
     end
     if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
@@ -697,10 +792,10 @@ function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
         # otherwise, we don't
 
         # check in the cycle list first
-        # all items in here are mutual parents of all others
+        # all items in here are considered mutual parents of all others
         if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
             let parent = frame_parent(frame)
-                parent !== nothing || return false
+                parent === nothing && return false
                 (is_cached(parent) || frame_parent(parent) !== nothing) || return false
                 matches_sv(parent, sv) || return false
             end
@@ -719,15 +814,14 @@ end
 
 # This function is used for computing alternate limit heuristics
 function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
-    if isdefined(method, :generator) && !(method.generator isa Core.GeneratedFunctionStub) && may_invoke_generator(method, sig, sparams)
-        method_instance = specialize_method(method, sig, sparams)
-        if isa(method_instance, MethodInstance)
-            cinfo = get_staged(method_instance, world)
-            if isa(cinfo, CodeInfo)
-                method2 = cinfo.method_for_inference_limit_heuristics
-                if method2 isa Method
-                    return method2
-                end
+    if (hasgenerator(method) && !(method.generator isa Core.GeneratedFunctionStub) &&
+        may_invoke_generator(method, sig, sparams))
+        mi = specialize_method(method, sig, sparams)
+        cinfo = get_staged(mi, world)
+        if isa(cinfo, CodeInfo)
+            method2 = cinfo.method_for_inference_limit_heuristics
+            if method2 isa Method
+                return method2
             end
         end
     end
@@ -735,11 +829,9 @@ function method_for_inference_heuristics(method::Method, @nospecialize(sig), spa
 end
 
 function matches_sv(parent::AbsIntState, sv::AbsIntState)
-    sv_method2 = method_for_inference_limit_heuristics(sv) # limit only if user token match
-    sv_method2 isa Method || (sv_method2 = nothing)
-    parent_method2 = method_for_inference_limit_heuristics(parent) # limit only if user token match
-    parent_method2 isa Method || (parent_method2 = nothing)
-    return frame_instance(parent).def === frame_instance(sv).def && sv_method2 === parent_method2
+    # limit only if user token match
+    return (frame_instance(parent).def === frame_instance(sv).def &&
+            method_for_inference_limit_heuristics(sv) === method_for_inference_limit_heuristics(parent))
 end
 
 function is_edge_recursed(edge::MethodInstance, caller::AbsIntState)
@@ -887,9 +979,17 @@ function concrete_eval_eligible(interp::AbstractInterpreter,
         end
     end
     mi = result.edge
-    if mi !== nothing && is_foldable(effects)
+    if mi !== nothing && is_foldable(effects, #=check_rtcall=#true)
         if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
-            if is_nonoverlayed(interp) || is_nonoverlayed(effects)
+            if (is_nonoverlayed(interp) || is_nonoverlayed(effects) ||
+                # Even if overlay methods are involved, when `:consistent_overlay` is
+                # explicitly applied, we can still perform concrete evaluation using the
+                # original methods for executing them.
+                # While there's a chance that the non-overlayed counterparts may raise
+                # non-egal exceptions, it will not impact the compilation validity, since:
+                # - the results of the concrete evaluation will not be inlined
+                # - the exception types from the concrete evaluation will not be propagated
+                is_consistent_overlay(effects))
                 return :concrete_eval
             end
             # disable concrete-evaluation if this function call is tainted by some overlayed
@@ -897,7 +997,12 @@ function concrete_eval_eligible(interp::AbstractInterpreter,
             add_remark!(interp, sv, "[constprop] Concrete eval disabled for overlayed methods")
         end
         if !any_conditional(arginfo)
-            return :semi_concrete_eval
+            if may_optimize(interp)
+                return :semi_concrete_eval
+            else
+                # disable irinterp if optimization is disabled, since it requires optimized IR
+                add_remark!(interp, sv, "[constprop] Semi-concrete interpretation disabled for non-optimizing interpreter")
+            end
         end
     end
     return :none
@@ -921,7 +1026,7 @@ collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.ar
 function collect_const_args(argtypes::Vector{Any}, start::Int)
     return Any[ let a = widenslotwrapper(argtypes[i])
                     isa(a, Const) ? a.val :
-                    isconstType(a) ? (a::DataType).parameters[1] :
+                    isconstType(a) ? a.parameters[1] :
                     (a::DataType).instance
                 end for i = start:length(argtypes) ]
 end
@@ -968,19 +1073,16 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
     match::MethodMatch, sv::AbsIntState)
     method = match.method
     force = force_const_prop(interp, f, method)
-    if !const_prop_entry_heuristic(interp, result, si, sv, force)
-        # N.B. remarks are emitted within `const_prop_entry_heuristic`
+    if !const_prop_rettype_heuristic(interp, result, si, sv, force)
+        # N.B. remarks are emitted within `const_prop_rettype_heuristic`
         return nothing
     end
-    nargs::Int = method.nargs
-    method.isva && (nargs -= 1)
-    length(arginfo.argtypes) < nargs && return nothing
     if !const_prop_argument_heuristic(interp, arginfo, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
+        add_remark!(interp, sv, "[constprop] Disabled by argument heuristics")
         return nothing
     end
     all_overridden = is_all_overridden(interp, arginfo, sv)
-    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden, sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, all_overridden, sv)
         add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
         return nothing
     end
@@ -998,28 +1100,28 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
     return mi
 end
 
-function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult,
-                                    si::StmtInfo, sv::AbsIntState, force::Bool)
-    if result.rt isa LimitedAccuracy
+function const_prop_rettype_heuristic(interp::AbstractInterpreter, result::MethodCallResult,
+                                      si::StmtInfo, sv::AbsIntState, force::Bool)
+    rt = result.rt
+    if rt isa LimitedAccuracy
         # optimizations like inlining are disabled for limited frames,
         # thus there won't be much benefit in constant-prop' here
         # N.B. don't allow forced constprop' for safety (xref #52763)
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (limited accuracy)")
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (limited accuracy)")
         return false
     elseif force
         return true
     elseif call_result_unused(si) && result.edgecycle
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (edgecycle with unused result)")
         return false
     end
     # check if this return type is improvable (i.e. whether it's possible that with more
     # information, we might get a more precise type)
-    rt = result.rt
     if isa(rt, Type)
         # could always be improved to `Const`, `PartialStruct` or just a more precise type,
         # unless we're already at `Bottom`
         if rt === Bottom
-            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (erroneous result)")
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (erroneous result)")
             return false
         end
         return true
@@ -1028,14 +1130,15 @@ function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodC
         return true
     elseif isa(rt, Const)
         if is_nothrow(result.effects)
-            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (nothrow const)")
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (nothrow const)")
             return false
         end
         # Could still be improved to Bottom (or at least could see the effects improved)
         return true
+    else
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (unimprovable result)")
+        return false
     end
-    add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable result)")
-    return false
 end
 
 # determines heuristically whether if constant propagation can be worthwhile
@@ -1094,16 +1197,16 @@ end
 function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
     return is_aggressive_constprop(method) ||
            InferenceParams(interp).aggressive_constant_propagation ||
-           istopfunction(f, :getproperty) ||
-           istopfunction(f, :setproperty!)
+           typename(typeof(f)).constprop_heuristic === Core.FORCE_CONST_PROP
 end
 
 function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
-    arginfo::ArgInfo, nargs::Int, all_overridden::Bool, sv::AbsIntState)
+    arginfo::ArgInfo, all_overridden::Bool, sv::AbsIntState)
     argtypes = arginfo.argtypes
-    if nargs > 1
+    heuristic = typename(typeof(f)).constprop_heuristic
+    if length(argtypes) > 1
         𝕃ᵢ = typeinf_lattice(interp)
-        if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
+        if heuristic === Core.ARRAY_INDEX_HEURISTIC
             arrty = argtypes[2]
             # don't propagate constant index into indexing of non-constant array
             if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
@@ -1116,17 +1219,14 @@ function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecializ
             elseif ⊑(𝕃ᵢ, arrty, Array) || ⊑(𝕃ᵢ, arrty, GenericMemory)
                 return false
             end
-        elseif istopfunction(f, :iterate)
+        elseif heuristic === Core.ITERATE_HEURISTIC
             itrty = argtypes[2]
             if ⊑(𝕃ᵢ, itrty, Array) || ⊑(𝕃ᵢ, itrty, GenericMemory)
                 return false
             end
         end
     end
-    if !all_overridden && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
-                           istopfunction(f, :(==)) || istopfunction(f, :!=) ||
-                           istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
-                           istopfunction(f, :<<) || istopfunction(f, :>>))
+    if !all_overridden && heuristic === Core.SAMETYPE_HEURISTIC
         # it is almost useless to inline the op when all the same type,
         # but highly worthwhile to inline promote of a constant
         length(argtypes) > 2 || return false
@@ -1197,7 +1297,7 @@ function semi_concrete_eval_call(interp::AbstractInterpreter,
     if code !== nothing
         irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world)
         if irsv !== nothing
-            irsv.parent = sv
+            assign_parentchild!(irsv, sv)
             rt, (nothrow, noub) = ir_abstract_constant_propagation(interp, irsv)
             @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
             if !(isa(rt, Type) && hasintersect(rt, Bool))
@@ -1225,16 +1325,30 @@ const_prop_result(inf_result::InferenceResult) =
     ConstCallResults(inf_result.result, inf_result.exc_result, ConstPropResult(inf_result),
                      inf_result.ipo_effects, inf_result.linfo)
 
-# return cached constant analysis result
+# return cached result of constant analysis
 return_cached_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
     const_prop_result(inf_result)
 
+function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    return has_conditional(𝕃ᵢ, sv) ? ConditionalSimpleArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
+end
+
 function const_prop_call(interp::AbstractInterpreter,
     mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState,
     concrete_eval_result::Union{Nothing, ConstCallResults}=nothing)
     inf_cache = get_inference_cache(interp)
     𝕃ᵢ = typeinf_lattice(interp)
-    inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache)
+    forwarded_argtypes = compute_forwarded_argtypes(interp, arginfo, sv)
+    # use `cache_argtypes` that has been constructed for fresh regular inference if available
+    volatile_inf_result = result.volatile_inf_result
+    if volatile_inf_result !== nothing
+        cache_argtypes = volatile_inf_result.inf_result.argtypes
+    else
+        cache_argtypes = matching_cache_argtypes(𝕃ᵢ, mi)
+    end
+    argtypes = matching_cache_argtypes(𝕃ᵢ, mi, forwarded_argtypes, cache_argtypes)
+    inf_result = cache_lookup(𝕃ᵢ, mi, argtypes, inf_cache)
     if inf_result !== nothing
         # found the cache for this constant prop'
         if inf_result.result === nothing
@@ -1244,25 +1358,39 @@ function const_prop_call(interp::AbstractInterpreter,
         @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match"
         return return_cached_result(interp, inf_result, sv)
     end
-    # perform fresh constant prop'
-    argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
-    inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp))
-    if !any(inf_result.overridden_by_const)
+    overridden_by_const = falses(length(argtypes))
+    for i = 1:length(argtypes)
+        if argtypes[i] !== argtype_by_index(cache_argtypes, i)
+            overridden_by_const[i] = true
+        end
+    end
+    if !any(overridden_by_const)
         add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
         return nothing
     end
+    # perform fresh constant prop'
+    inf_result = InferenceResult(mi, argtypes, overridden_by_const)
     frame = InferenceState(inf_result, #=cache_mode=#:local, interp)
     if frame === nothing
         add_remark!(interp, sv, "[constprop] Could not retrieve the source")
         return nothing # this is probably a bad generated function (unsound), but just ignore it
     end
-    frame.parent = sv
+    assign_parentchild!(frame, sv)
     if !typeinf(interp, frame)
         add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
+        @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+        callstack = frame.callstack::Vector{AbsIntState}
+        @assert callstack[end] === frame && length(callstack) == frame.frameid
+        pop!(callstack)
         return nothing
     end
+    @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+    @assert frame.parentid == sv.frameid
     @assert inf_result.result !== nothing
-    if concrete_eval_result !== nothing
+    # ConditionalSimpleArgtypes is allowed, because the only case in which it modifies
+    # the argtypes is when one of the argtypes is a `Conditional`, which case
+    # concrete_eval_result will not be available.
+    if concrete_eval_result !== nothing && isa(forwarded_argtypes, Union{SimpleArgtypes, ConditionalSimpleArgtypes})
         # override return type and effects with concrete evaluation result if available
         inf_result.result = concrete_eval_result.rt
         inf_result.ipo_effects = concrete_eval_result.effects
@@ -1272,27 +1400,19 @@ end
 
 # TODO implement MustAlias forwarding
 
-struct ConditionalArgtypes <: ForwardableArgtypes
+struct ConditionalSimpleArgtypes
     arginfo::ArgInfo
     sv::InferenceState
 end
 
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
-                            conditional_argtypes::ConditionalArgtypes)
-
-The implementation is able to forward `Conditional` of `conditional_argtypes`,
-as well as the other general extended lattice information.
-"""
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
-                                 conditional_argtypes::ConditionalArgtypes)
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
+                                 conditional_argtypes::ConditionalSimpleArgtypes,
+                                 cache_argtypes::Vector{Any})
     (; arginfo, sv) = conditional_argtypes
     (; fargs, argtypes) = arginfo
     given_argtypes = Vector{Any}(undef, length(argtypes))
-    def = linfo.def::Method
+    def = mi.def::Method
     nargs = Int(def.nargs)
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
-    local condargs = nothing
     for i in 1:length(argtypes)
         argtype = argtypes[i]
         # forward `Conditional` if it conveys a constraint on any other argument
@@ -1309,10 +1429,6 @@ function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
                     # TODO bail out here immediately rather than just propagating Bottom ?
                     given_argtypes[i] = Bottom
                 else
-                    if condargs === nothing
-                        condargs = Tuple{Int,Int}[]
-                    end
-                    push!(condargs, (slotid, i))
                     given_argtypes[i] = Conditional(slotid, thentype, elsetype)
                 end
                 continue
@@ -1320,21 +1436,7 @@ function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
         end
         given_argtypes[i] = widenslotwrapper(argtype)
     end
-    if condargs !== nothing
-        given_argtypes = let condargs=condargs
-            va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int
-                # invalidate `Conditional` imposed on varargs
-                for (slotid, i) in condargs
-                    if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise
-                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
-                    end
-                end
-            end
-        end
-    else
-        given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
-    end
-    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
+    return pick_const_args!(𝕃, given_argtypes, cache_argtypes)
 end
 
 # This is only for use with `Conditional`.
@@ -1774,7 +1876,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
     @nospecialize f
     la = length(argtypes)
     𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
+    ⊑, ⊏, ⊔, ⊓ = partialorder(𝕃ᵢ), strictpartialorder(𝕃ᵢ), join(𝕃ᵢ), meet(𝕃ᵢ)
     if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
         cnd = argtypes[2]
         if isa(cnd, Conditional)
@@ -1789,12 +1891,12 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 a = ssa_def_slot(fargs[3], sv)
                 b = ssa_def_slot(fargs[4], sv)
                 if isa(a, SlotNumber) && cnd.slot == slot_id(a)
-                    tx = (cnd.thentype ⊑ᵢ tx ? cnd.thentype : tmeet(𝕃ᵢ, tx, widenconst(cnd.thentype)))
+                    tx = (cnd.thentype ⊑ tx ? cnd.thentype : tx ⊓ widenconst(cnd.thentype))
                 end
                 if isa(b, SlotNumber) && cnd.slot == slot_id(b)
-                    ty = (cnd.elsetype ⊑ᵢ ty ? cnd.elsetype : tmeet(𝕃ᵢ, ty, widenconst(cnd.elsetype)))
+                    ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : ty ⊓ widenconst(cnd.elsetype))
                 end
-                return tmerge(𝕃ᵢ, tx, ty)
+                return tx ⊔ ty
             end
         end
     end
@@ -1909,26 +2011,39 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 return Conditional(aty.slot, thentype, elsetype)
             end
         elseif f === isdefined
-            uty = argtypes[2]
             a = ssa_def_slot(fargs[2], sv)
-            if isa(uty, Union) && isa(a, SlotNumber)
-                fld = argtypes[3]
-                thentype = Bottom
-                elsetype = Bottom
-                for ty in uniontypes(uty)
-                    cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
-                    if isa(cnd, Const)
-                        if cnd.val::Bool
-                            thentype = tmerge(thentype, ty)
+            if isa(a, SlotNumber)
+                argtype2 = argtypes[2]
+                if isa(argtype2, Union)
+                    fld = argtypes[3]
+                    thentype = Bottom
+                    elsetype = Bottom
+                    for ty in uniontypes(argtype2)
+                        cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
+                        if isa(cnd, Const)
+                            if cnd.val::Bool
+                                thentype = thentype ⊔ ty
+                            else
+                                elsetype = elsetype ⊔ ty
+                            end
                         else
-                            elsetype = tmerge(elsetype, ty)
+                            thentype = thentype ⊔ ty
+                            elsetype = elsetype ⊔ ty
                         end
-                    else
-                        thentype = tmerge(thentype, ty)
-                        elsetype = tmerge(elsetype, ty)
+                    end
+                    return Conditional(a, thentype, elsetype)
+                else
+                    thentype = form_partially_defined_struct(argtype2, argtypes[3])
+                    if thentype !== nothing
+                        elsetype = argtype2
+                        if rt === Const(false)
+                            thentype = Bottom
+                        elseif rt === Const(true)
+                            elsetype = Bottom
+                        end
+                        return Conditional(a, thentype, elsetype)
                     end
                 end
-                return Conditional(a, thentype, elsetype)
             end
         end
     end
@@ -1936,6 +2051,34 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
     return rt
 end
 
+function form_partially_defined_struct(@nospecialize(obj), @nospecialize(name))
+    obj isa Const && return nothing # nothing to refine
+    name isa Const || return nothing
+    objt0 = widenconst(obj)
+    objt = unwrap_unionall(objt0)
+    objt isa DataType || return nothing
+    isabstracttype(objt) && return nothing
+    fldidx = try_compute_fieldidx(objt, name.val)
+    fldidx === nothing && return nothing
+    nminfld = datatype_min_ninitialized(objt)
+    if ismutabletype(objt)
+        # A mutable struct can have non-contiguous undefined fields, but `PartialStruct` cannot
+        # model such a state. So here `PartialStruct` can be used to represent only the
+        # objects where the field following the minimum initialized fields is also defined.
+        if fldidx ≠ nminfld+1
+            # if it is already represented as a `PartialStruct`, we can add one more
+            # `isdefined`-field information on top of those implied by its `fields`
+            if !(obj isa PartialStruct && fldidx == length(obj.fields)+1)
+                return nothing
+            end
+        end
+    else
+        fldidx > nminfld || return nothing
+    end
+    return PartialStruct(objt0, Any[obj isa PartialStruct && i≤length(obj.fields) ?
+        obj.fields[i] : fieldtype(objt0,i) for i = 1:fldidx])
+end
+
 function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any}, call::CallMeta)
     na = length(argtypes)
     if isvarargtype(argtypes[end])
@@ -1950,8 +2093,8 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An
     elseif na == 3
         a2 = argtypes[2]
         a3 = argtypes[3]
-        ⊑ᵢ = ⊑(typeinf_lattice(interp))
-        nothrow = a2 ⊑ᵢ TypeVar && (a3 ⊑ᵢ Type || a3 ⊑ᵢ TypeVar)
+        ⊑ = partialorder(typeinf_lattice(interp))
+        nothrow = a2 ⊑ TypeVar && (a3 ⊑ Type || a3 ⊑ TypeVar)
     else
         return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
     end
@@ -1983,19 +2126,21 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An
     return CallMeta(ret, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
 end
 
-function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState)
+function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState)
+    argtypes = arginfo.argtypes
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
     ft === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
     isexact || return CallMeta(Any, Any, Effects(), NoCallInfo())
     unwrapped = unwrap_unionall(types)
-    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
-        return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    types === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    if !(unwrapped isa DataType && unwrapped.name === Tuple.name)
+        return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())
     end
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
     nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    nargtype === Bottom && return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())
     nargtype isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) # other cases are not implemented below
     isdispatchelem(ft) || return CallMeta(Any, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
     ft = ft::DataType
@@ -2009,11 +2154,12 @@ function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgIn
     tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
     ti = tienv[1]; env = tienv[2]::SimpleVector
     result = abstract_call_method(interp, method, ti, env, false, si, sv)
-    (; rt, edge, effects, volatile_inf_result) = result
+    (; rt, exct, edge, effects, volatile_inf_result) = result
     match = MethodMatch(ti, env, method, argtype <: method.sig)
     res = nothing
     sig = match.spec_types
     argtypes′ = invoke_rewrite(argtypes)
+    fargs = arginfo.fargs
     fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
     arginfo = ArgInfo(fargs′, argtypes′)
     # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
@@ -2022,20 +2168,28 @@ function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgIn
     #     argtypes′[i] = t ⊑ a ? t : a
     # end
     𝕃ₚ = ipo_lattice(interp)
+    ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
     f = singleton_type(ft′)
     invokecall = InvokeCall(types, lookupsig)
     const_call_result = abstract_call_method_with_const_args(interp,
         result, f, arginfo, si, match, sv, invokecall)
     const_result = volatile_inf_result
     if const_call_result !== nothing
-        if ⊑(𝕃ₚ, const_call_result.rt, rt)
+        if const_call_result.rt ⊑ rt
             (; rt, effects, const_result, edge) = const_call_result
         end
+        if const_call_result.exct ⋤ exct
+            (; exct, const_result, edge) = const_call_result
+        end
     end
     rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
     info = InvokeCallInfo(match, const_result)
     edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    return CallMeta(rt, Any, effects, info)
+    if !match.fully_covers
+        effects = Effects(effects; nothrow=false)
+        exct = exct ⊔ TypeError
+    end
+    return CallMeta(rt, exct, effects, info)
 end
 
 function invoke_rewrite(xs::Vector{Any})
@@ -2056,22 +2210,34 @@ end
 
 function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
     na = length(argtypes)
-    𝕃ᵢ = typeinf_lattice(interp)
+    ⊔ = join(typeinf_lattice(interp))
     if na == 2
         argtype2 = argtypes[2]
         if isvarargtype(argtype2)
-            exct = tmerge(𝕃ᵢ, unwrapva(argtype2), ArgumentError)
+            exct = unwrapva(argtype2) ⊔ ArgumentError
         else
             exct = argtype2
         end
     elseif na == 3 && isvarargtype(argtypes[3])
-        exct = tmerge(𝕃ᵢ, argtypes[2], ArgumentError)
+        exct = argtypes[2] ⊔ ArgumentError
     else
         exct = ArgumentError
     end
     return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
 end
 
+function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    exct = if length(argtypes) == 1
+        ArgumentError
+    elseif !isvarargtype(argtypes[2])
+        MethodError
+    else
+        ⊔ = join(typeinf_lattice(interp))
+        MethodError ⊔ ArgumentError
+    end
+    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
+end
+
 # call where the function is known exactly
 function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
@@ -2092,6 +2258,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return abstract_applicable(interp, argtypes, sv, max_methods)
         elseif f === throw
             return abstract_throw(interp, argtypes, sv)
+        elseif f === Core.throw_methoderror
+            return abstract_throw_methoderror(interp, argtypes, sv)
         end
         rt = abstract_call_builtin(interp, f, arginfo, sv)
         ft = popfirst!(argtypes)
@@ -2102,7 +2270,17 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             exct = builtin_exct(𝕃ᵢ, f, argtypes, rt)
         end
         pushfirst!(argtypes, ft)
-        return CallMeta(rt, exct, effects, NoCallInfo())
+        refinements = nothing
+        if sv isa InferenceState && f === typeassert
+            # perform very limited back-propagation of invariants after this type assertion
+            if rt !== Bottom && isa(fargs, Vector{Any})
+                farg2 = fargs[2]
+                if farg2 isa SlotNumber
+                    refinements = SlotRefinement(farg2, rt)
+                end
+            end
+        end
+        return CallMeta(rt, exct, effects, NoCallInfo(), refinements)
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
         return CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo())
@@ -2146,14 +2324,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         end
     elseif is_return_type(f)
         return return_type_tfunc(interp, argtypes, si, sv)
-    elseif la == 2 && istopfunction(f, :!)
-        # handle Conditional propagation through !Bool
-        aty = argtypes[2]
-        if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), si, Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
-            return CallMeta(Conditional(aty.slot, aty.elsetype, aty.thentype), Any, call.effects, call.info)
-        end
-    elseif la == 3 && istopfunction(f, :!==)
+    elseif la == 3 && f === Core.:(!==)
         # mark !== as exactly a negated call to ===
         call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)
         rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
@@ -2163,7 +2334,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
         end
         return call
-    elseif la == 3 && istopfunction(f, :(>:))
+    elseif la == 3 && f === Core.:(>:)
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
         if fargs !== nothing && length(fargs) == 3
@@ -2173,7 +2344,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
         return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
-    elseif la == 2 && istopfunction(f, :typename)
+    elseif la == 2 && f === Core.typename
         return CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure())
     elseif f === Core._hasmethod
         return _hasmethod_tfunc(interp, argtypes, sv)
@@ -2185,35 +2356,43 @@ end
 function abstract_call_opaque_closure(interp::AbstractInterpreter,
     closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, check::Bool=true)
     sig = argtypes_to_type(arginfo.argtypes)
-    result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, si, sv)
-    (; rt, edge, effects, volatile_inf_result) = result
     tt = closure.typ
-    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
+    ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt)
+    ocargsig′ = unwrap_unionall(ocargsig)
+    ocargsig′ isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig)
+    hasintersect(sig, ocsig) || return CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo())
+    ocmethod = closure.source::Method
+    result = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
+    (; rt, exct, edge, effects, volatile_inf_result) = result
+    match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig)
     𝕃ₚ = ipo_lattice(interp)
-    ⊑ₚ = ⊑(𝕃ₚ)
+    ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
     const_result = volatile_inf_result
     if !result.edgecycle
         const_call_result = abstract_call_method_with_const_args(interp, result,
             nothing, arginfo, si, match, sv)
         if const_call_result !== nothing
-            if const_call_result.rt ⊑ₚ rt
+            if const_call_result.rt ⊑ rt
                 (; rt, effects, const_result, edge) = const_call_result
             end
+            if const_call_result.exct ⋤ exct
+                (; exct, const_result, edge) = const_call_result
+            end
         end
     end
     if check # analyze implicit type asserts on argument and return type
-        ftt = closure.typ
-        (aty, rty) = (unwrap_unionall(ftt)::DataType).parameters
-        rty = rewrap_unionall(rty isa TypeVar ? rty.lb : rty, ftt)
-        if !(rt ⊑ₚ rty && tuple_tfunc(𝕃ₚ, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
+        rty = (unwrap_unionall(tt)::DataType).parameters[2]
+        rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, tt)
+        if !(rt ⊑ rty && sig ⊑ ocsig)
             effects = Effects(effects; nothrow=false)
+            exct = exct ⊔ TypeError
         end
     end
     rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
     info = OpaqueClosureCallInfo(match, const_result)
     edge !== nothing && add_backedge!(sv, edge)
-    return CallMeta(rt, Any, effects, info)
+    return CallMeta(rt, exct, effects, info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
@@ -2264,7 +2443,7 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtIn
     return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
 end
 
-function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
+function sp_type_rewrap(@nospecialize(T), mi::MethodInstance, isreturn::Bool)
     isref = false
     if unwrapva(T) === Bottom
         return Bottom
@@ -2279,12 +2458,12 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
     else
         return Any
     end
-    if isa(linfo.def, Method)
-        spsig = linfo.def.sig
+    if isa(mi.def, Method)
+        spsig = mi.def.sig
         if isa(spsig, UnionAll)
-            if !isempty(linfo.sparam_vals)
+            if !isempty(mi.sparam_vals)
                 sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
-                                  v for v in  linfo.sparam_vals]
+                                  v for v in  mi.sparam_vals]
                 T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
                 isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
                 for v in sparam_vals
@@ -2320,7 +2499,9 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::U
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
     abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
-    nothing
+    rt = e.args[1]
+    isa(rt, Type) || (rt = Any)
+    return RTEffects(rt, Any, EFFECTS_UNKNOWN)
 end
 
 function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
@@ -2354,10 +2535,9 @@ function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(
 end
 
 function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsIntState)
-    if e.head === :call
+    if e.head === :call && length(e.args) ≥ 1
         # TODO: We still have non-linearized cglobal
-        @assert e.args[1] === Core.tuple ||
-                e.args[1] === GlobalRef(Core, :tuple)
+        @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple)
     else
         # Some of our tests expect us to handle invalid IR here and error later
         # - permit that for now.
@@ -2391,19 +2571,25 @@ function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::
 end
 
 struct RTEffects
-    rt
-    exct
+    rt::Any
+    exct::Any
     effects::Effects
-    RTEffects(@nospecialize(rt), @nospecialize(exct), effects::Effects) = new(rt, exct, effects)
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function RTEffects(rt, exct, effects::Effects, refinements=nothing)
+        @nospecialize rt exct refinements
+        return new(rt, exct, effects, refinements)
+    end
 end
 
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
-    si = StmtInfo(!call_result_unused(sv, sv.currpc))
-    (; rt, exct, effects, info) = abstract_call(interp, arginfo, si, sv)
-    sv.stmt_info[sv.currpc] = info
-    # mark this call statement as DCE-eligible
-    # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret?
-    return RTEffects(rt, exct, effects)
+    unused = call_result_unused(sv, sv.currpc)
+    if unused
+        add_curr_ssaflag!(sv, IR_FLAG_UNUSED)
+    end
+    si = StmtInfo(!unused)
+    call = abstract_call(interp, arginfo, si, sv)
+    sv.stmt_info[sv.currpc] = call.info
+    return call
 end
 
 function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2414,268 +2600,284 @@ function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{
         return RTEffects(Bottom, Any, Effects())
     end
     arginfo = ArgInfo(ea, argtypes)
-    return abstract_call(interp, arginfo, sv)
+    (; rt, exct, effects, refinements) = abstract_call(interp, arginfo, sv)
+    return RTEffects(rt, exct, effects, refinements)
 end
 
-function abstract_eval_the_exception(interp::AbstractInterpreter, sv::InferenceState)
-    return sv.handlers[sv.handler_at[sv.currpc][2]].exct
-end
-abstract_eval_the_exception(::AbstractInterpreter, ::IRInterpretationState) = Any
-
-function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                                      sv::AbsIntState)
-    effects = Effects()
-    ehead = e.head
+function abstract_eval_new(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                           sv::AbsIntState)
     𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    exct = Any
-    if ehead === :call
-        (; rt, exct, effects) = abstract_eval_call(interp, e, vtypes, sv)
-        t = rt
-    elseif ehead === :new
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv), true)
-        ut = unwrap_unionall(t)
-        exct = Union{ErrorException, TypeError}
-        if isa(ut, DataType) && !isabstracttype(ut)
-            ismutable = ismutabletype(ut)
-            fcount = datatype_fieldcount(ut)
-            nargs = length(e.args) - 1
-            has_any_uninitialized = (fcount === nothing || (fcount > nargs && (let t = t
-                    any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
-                end)))
-            if has_any_uninitialized
-                # allocation with undefined field is inconsistent always
-                consistent = ALWAYS_FALSE
-            elseif ismutable
-                # mutable allocation isn't `:consistent`, but we still have a chance that
-                # return type information later refines the `:consistent`-cy of the method
-                consistent = CONSISTENT_IF_NOTRETURNED
-            else
-                consistent = ALWAYS_TRUE # immutable allocation is consistent
-            end
-            if isconcretedispatch(t)
-                nothrow = true
-                @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
-                ats = Vector{Any}(undef, nargs)
-                local anyrefine = false
-                local allconst = true
-                for i = 1:nargs
-                    at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], vtypes, sv))
-                    ft = fieldtype(t, i)
-                    nothrow && (nothrow = at ⊑ᵢ ft)
-                    at = tmeet(𝕃ᵢ, at, ft)
-                    at === Bottom && @goto always_throw
-                    if ismutable && !isconst(t, i)
-                        ats[i] = ft # can't constrain this field (as it may be modified later)
-                        continue
-                    end
-                    allconst &= isa(at, Const)
-                    if !anyrefine
-                        anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
-                                    ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
-                    end
-                    ats[i] = at
+    rt, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv), true)
+    ut = unwrap_unionall(rt)
+    exct = Union{ErrorException,TypeError}
+    if isa(ut, DataType) && !isabstracttype(ut)
+        ismutable = ismutabletype(ut)
+        fcount = datatype_fieldcount(ut)
+        nargs = length(e.args) - 1
+        has_any_uninitialized = (fcount === nothing || (fcount > nargs && (let t = rt
+                any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
+            end)))
+        if has_any_uninitialized
+            # allocation with undefined field is inconsistent always
+            consistent = ALWAYS_FALSE
+        elseif ismutable
+            # mutable allocation isn't `:consistent`, but we still have a chance that
+            # return type information later refines the `:consistent`-cy of the method
+            consistent = CONSISTENT_IF_NOTRETURNED
+        else
+            consistent = ALWAYS_TRUE # immutable allocation is consistent
+        end
+        if isconcretedispatch(rt)
+            nothrow = true
+            @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
+            ats = Vector{Any}(undef, nargs)
+            local anyrefine = false
+            local allconst = true
+            for i = 1:nargs
+                at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], vtypes, sv))
+                ft = fieldtype(rt, i)
+                nothrow && (nothrow = ⊑(𝕃ᵢ, at, ft))
+                at = tmeet(𝕃ᵢ, at, ft)
+                at === Bottom && return RTEffects(Bottom, TypeError, EFFECTS_THROWS)
+                if ismutable && !isconst(rt, i)
+                    ats[i] = ft # can't constrain this field (as it may be modified later)
+                    continue
                 end
-                # For now, don't allow:
-                # - Const/PartialStruct of mutables (but still allow PartialStruct of mutables
-                #   with `const` fields if anything refined)
-                # - partially initialized Const/PartialStruct
-                if fcount == nargs
-                    if consistent === ALWAYS_TRUE && allconst
-                        argvals = Vector{Any}(undef, nargs)
-                        for j in 1:nargs
-                            argvals[j] = (ats[j]::Const).val
-                        end
-                        t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
-                    elseif anyrefine
-                        t = PartialStruct(t, ats)
-                    end
+                allconst &= isa(at, Const)
+                if !anyrefine
+                    anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
+                                ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
                 end
-            else
-                t = refine_partial_type(t)
-                nothrow = false
+                ats[i] = at
+            end
+            if fcount == nargs && consistent === ALWAYS_TRUE && allconst
+                argvals = Vector{Any}(undef, nargs)
+                for j in 1:nargs
+                    argvals[j] = (ats[j]::Const).val
+                end
+                rt = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), rt, argvals, nargs))
+            elseif anyrefine || nargs > datatype_min_ninitialized(rt)
+                # propagate partially initialized struct as `PartialStruct` when:
+                # - any refinement information is available (`anyrefine`), or when
+                # - `nargs` is greater than `n_initialized` derived from the struct type
+                #   information alone
+                rt = PartialStruct(rt, ats)
             end
         else
-            consistent = ALWAYS_FALSE
+            rt = refine_partial_type(rt)
             nothrow = false
         end
-        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
-        nothrow && (exct = Union{})
-    elseif ehead === :splatnew
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv), true)
-        nothrow = false # TODO: More precision
-        if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t)
-            at = abstract_eval_value(interp, e.args[2], vtypes, sv)
-            n = fieldcount(t)
-            if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
-                (let t = t, at = at
-                    all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
+    else
+        consistent = ALWAYS_FALSE
+        nothrow = false
+    end
+    nothrow && (exct = Union{})
+    effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_splatnew(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                                sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv), true)
+    nothrow = false
+    if length(e.args) == 2 && isconcretedispatch(rt) && !ismutabletype(rt)
+        at = abstract_eval_value(interp, e.args[2], vtypes, sv)
+        n = fieldcount(rt)
+        if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+            (let t = rt, at = at
+                all(i::Int -> getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
+            end))
+            nothrow = isexact
+            rt = Const(ccall(:jl_new_structt, Any, (Any, Any), rt, at.val))
+        elseif (isa(at, PartialStruct) && ⊑(𝕃ᵢ, at, Tuple) && n > 0 &&
+                n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
+                (let t = rt, at = at
+                    all(i::Int -> ⊑(𝕃ᵢ, (at.fields::Vector{Any})[i], fieldtype(t, i)), 1:n)
                 end))
-                nothrow = isexact
-                t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif (isa(at, PartialStruct) && at ⊑ᵢ Tuple && n > 0 && n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
-                    (let t = t, at = at, ⊑ᵢ = ⊑ᵢ
-                        all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n)
-                    end))
-                nothrow = isexact
-                t = PartialStruct(t, at.fields::Vector{Any})
+            nothrow = isexact
+            rt = PartialStruct(rt, at.fields::Vector{Any})
+        end
+    else
+        rt = refine_partial_type(rt)
+    end
+    consistent = !ismutabletype(rt) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
+    effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                                          sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt = Union{}
+    effects = Effects() # TODO
+    if length(e.args) >= 5
+        ea = e.args
+        argtypes = collect_argtypes(interp, ea, vtypes, sv)
+        if argtypes === nothing
+            rt = Bottom
+            effects = EFFECTS_THROWS
+        else
+            mi = frame_instance(sv)
+            rt = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
+                argtypes[5], argtypes[6:end], mi)
+            if ea[4] !== true && isa(rt, PartialOpaque)
+                rt = widenconst(rt)
+                # Propagation of PartialOpaque disabled
             end
+            if isa(rt, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
+                # Infer this now so that the specialization is available to
+                # optimization.
+                argtypes = most_general_argtypes(rt)
+                pushfirst!(argtypes, rt.env)
+                callinfo = abstract_call_opaque_closure(interp, rt,
+                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
+                sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+            end
+        end
+    end
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                               sv::AbsIntState)
+    effects = EFFECTS_UNKNOWN
+    rt = abstract_eval_value(interp, e.args[1], vtypes, sv)
+    if rt isa Const && rt.val isa Expr
+        # `copyast` makes copies of Exprs
+        rt = Expr
+    end
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_isdefined(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                                 sv::AbsIntState)
+    sym = e.args[1]
+    rt = Bool
+    effects = EFFECTS_TOTAL
+    exct = Union{}
+    isa(sym, Symbol) && (sym = GlobalRef(frame_module(sv), sym))
+    if isa(sym, SlotNumber) && vtypes !== nothing
+        vtyp = vtypes[slot_id(sym)]
+        if vtyp.typ === Bottom
+            rt = Const(false) # never assigned previously
+        elseif !vtyp.undef
+            rt = Const(true) # definitely assigned previously
+        else # form `Conditional` to refine `vtyp.undef` in the then branch
+            rt = Conditional(sym, vtyp.typ, vtyp.typ; isdefined=true)
+        end
+    elseif isa(sym, GlobalRef)
+        if InferenceParams(interp).assume_bindings_static
+            rt = Const(isdefined_globalref(sym))
+        elseif isdefinedconst_globalref(sym)
+            rt = Const(true)
         else
-            t = refine_partial_type(t)
+            effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
         end
-        consistent = !ismutabletype(t) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
-        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
-    elseif ehead === :new_opaque_closure
-        t = Union{}
-        effects = Effects() # TODO
-        merge_effects!(interp, sv, effects)
-        if length(e.args) >= 4
-            ea = e.args
-            argtypes = collect_argtypes(interp, ea, vtypes, sv)
-            if argtypes === nothing
-                t = Bottom
-            else
-                mi = frame_instance(sv)
-                t = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
-                    argtypes[4], argtypes[5:end], mi)
-                if isa(t, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
-                    # Infer this now so that the specialization is available to
-                    # optimization.
-                    argtypes = most_general_argtypes(t)
-                    pushfirst!(argtypes, t.env)
-                    callinfo = abstract_call_opaque_closure(interp, t,
-                        ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
-                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
-                end
+    elseif isexpr(sym, :static_parameter)
+        n = sym.args[1]::Int
+        if 1 <= n <= length(sv.sptypes)
+            sp = sv.sptypes[n]
+            if !sp.undef
+                rt = Const(true)
+            elseif sp.typ === Bottom
+                rt = Const(false)
             end
         end
+    else
+        effects = EFFECTS_UNKNOWN
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_throw_undef_if_not(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
+    condt = abstract_eval_value(interp, e.args[2], vtypes, sv)
+    condval = maybe_extract_const_bool(condt)
+    rt = Nothing
+    exct = UndefVarError
+    effects = EFFECTS_THROWS
+    if condval isa Bool
+        if condval
+            effects = EFFECTS_TOTAL
+            exct = Union{}
+        else
+            rt = Union{}
+        end
+    elseif !hasintersect(widenconst(condt), Bool)
+        rt = Union{}
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_the_exception(::AbstractInterpreter, sv::InferenceState)
+    (;handlers, handler_at) = sv.handler_info::HandlerInfo
+    return the_exception_info(handlers[handler_at[sv.currpc][2]].exct)
+end
+abstract_eval_the_exception(::AbstractInterpreter, ::IRInterpretationState) = the_exception_info(Any)
+the_exception_info(@nospecialize t) = RTEffects(t, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
+
+function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsIntState)
+    n = e.args[1]::Int
+    nothrow = false
+    if 1 <= n <= length(sv.sptypes)
+        sp = sv.sptypes[n]
+        rt = sp.typ
+        nothrow = !sp.undef
+    else
+        rt = Any
+    end
+    exct = nothrow ? Union{} : UndefVarError
+    effects = Effects(EFFECTS_TOTAL; nothrow)
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                                      sv::AbsIntState)
+    ehead = e.head
+    if ehead === :call
+        return abstract_eval_call(interp, e, vtypes, sv)
+    elseif ehead === :new
+        return abstract_eval_new(interp, e, vtypes, sv)
+    elseif ehead === :splatnew
+        return abstract_eval_splatnew(interp, e, vtypes, sv)
+    elseif ehead === :new_opaque_closure
+        return abstract_eval_new_opaque_closure(interp, e, vtypes, sv)
     elseif ehead === :foreigncall
-        (; rt, exct, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv)
-        t = rt
+        return abstract_eval_foreigncall(interp, e, vtypes, sv)
     elseif ehead === :cfunction
-        effects = EFFECTS_UNKNOWN
-        t = e.args[1]
-        isa(t, Type) || (t = Any)
-        abstract_eval_cfunction(interp, e, vtypes, sv)
+        return abstract_eval_cfunction(interp, e, vtypes, sv)
     elseif ehead === :method
-        t = (length(e.args) == 1) ? Any : Nothing
-        effects = EFFECTS_UNKNOWN
+        rt = (length(e.args) == 1) ? Any : Nothing
+        return RTEffects(rt, Any, EFFECTS_UNKNOWN)
     elseif ehead === :copyast
-        effects = EFFECTS_UNKNOWN
-        t = abstract_eval_value(interp, e.args[1], vtypes, sv)
-        if t isa Const && t.val isa Expr
-            # `copyast` makes copies of Exprs
-            t = Expr
-        end
+        return abstract_eval_copyast(interp, e, vtypes, sv)
     elseif ehead === :invoke || ehead === :invoke_modify
         error("type inference data-flow error: tried to double infer a function")
     elseif ehead === :isdefined
-        sym = e.args[1]
-        t = Bool
-        effects = EFFECTS_TOTAL
-        exct = Union{}
-        if isa(sym, SlotNumber) && vtypes !== nothing
-            vtyp = vtypes[slot_id(sym)]
-            if vtyp.typ === Bottom
-                t = Const(false) # never assigned previously
-            elseif !vtyp.undef
-                t = Const(true) # definitely assigned previously
-            end
-        elseif isa(sym, Symbol)
-            if isdefined(frame_module(sv), sym)
-                t = Const(true)
-            elseif InferenceParams(interp).assume_bindings_static
-                t = Const(false)
-            else
-                effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
-            end
-        elseif isa(sym, GlobalRef)
-            if isdefined(sym.mod, sym.name)
-                t = Const(true)
-            elseif InferenceParams(interp).assume_bindings_static
-                t = Const(false)
-            else
-                effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
-            end
-        elseif isexpr(sym, :static_parameter)
-            n = sym.args[1]::Int
-            if 1 <= n <= length(sv.sptypes)
-                sp = sv.sptypes[n]
-                if !sp.undef
-                    t = Const(true)
-                elseif sp.typ === Bottom
-                    t = Const(false)
-                end
-            end
-        else
-            effects = EFFECTS_UNKNOWN
-        end
+        return abstract_eval_isdefined(interp, e, vtypes, sv)
     elseif ehead === :throw_undef_if_not
-        condt = argextype(stmt.args[2], ir)
-        condval = maybe_extract_const_bool(condt)
-        t = Nothing
-        exct = UndefVarError
-        effects = EFFECTS_THROWS
-        if condval isa Bool
-            if condval
-                effects = EFFECTS_TOTAL
-                exct = Union{}
-            else
-                t = Union{}
-            end
-        elseif !hasintersect(windenconst(condt), Bool)
-            t = Union{}
-        end
+        return abstract_eval_throw_undef_if_not(interp, e, vtypes, sv)
     elseif ehead === :boundscheck
-        t = Bool
-        exct = Union{}
-        effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
+        return RTEffects(Bool, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
     elseif ehead === :the_exception
-        t = abstract_eval_the_exception(interp, sv)
-        exct = Union{}
-        effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
+        return abstract_eval_the_exception(interp, sv)
     elseif ehead === :static_parameter
-        n = e.args[1]::Int
-        nothrow = false
-        exct = UndefVarError
-        if 1 <= n <= length(sv.sptypes)
-            sp = sv.sptypes[n]
-            t = sp.typ
-            nothrow = !sp.undef
-        else
-            t = Any
-        end
-        if nothrow
-            exct = Union{}
-        end
-        effects = Effects(EFFECTS_TOTAL; nothrow)
+        return abstract_eval_static_parameter(interp, e, sv)
     elseif ehead === :gc_preserve_begin || ehead === :aliasscope
-        t = Any
-        exct = Union{}
-        effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE, effect_free=EFFECT_FREE_GLOBALLY)
-    elseif ehead === :gc_preserve_end || ehead === :leave || ehead === :pop_exception || ehead === :global || ehead === :popaliasscope
-        t = Nothing
-        exct = Union{}
-        effects = Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY)
-    elseif ehead === :method
-        t = Method
-        exct = Union{}
-        effects = Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY)
+        return RTEffects(Any, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE, effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :gc_preserve_end || ehead === :leave || ehead === :pop_exception ||
+           ehead === :global || ehead === :popaliasscope
+        return RTEffects(Nothing, Union{}, Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :globaldecl
+        return RTEffects(Nothing, Any, EFFECTS_UNKNOWN)
     elseif ehead === :thunk
-        t = Any
-        effects = EFFECTS_UNKNOWN
-    elseif false
-        @label always_throw
-        t = Bottom
-        effects = EFFECTS_THROWS
-    else
-        t = abstract_eval_value_expr(interp, e, sv)
-        # N.B.: abstract_eval_value_expr can modify the global effects, but
-        # we move out any arguments with effects during SSA construction later
-        # and recompute the effects.
-        effects = EFFECTS_TOTAL
+        return RTEffects(Any, Any, Effects())
     end
-    return RTEffects(t, exct, effects)
+    # N.B.: abstract_eval_value_expr can modify the global effects, but
+    # we move out any arguments with effects during SSA construction later
+    # and recompute the effects.
+    rt = abstract_eval_value_expr(interp, e, sv)
+    return RTEffects(rt, Any, EFFECTS_TOTAL)
 end
 
 # refine the result of instantiation of partially-known type `t` if some invariant can be assumed
@@ -2686,7 +2888,7 @@ function refine_partial_type(@nospecialize t)
         # if the first/second parameter of `NamedTuple` is known to be empty,
         # the second/first argument should also be empty tuple type,
         # so refine it here
-        return Const(NamedTuple())
+        return Const((;))
     end
     return t
 end
@@ -2717,7 +2919,8 @@ function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Un
         val = phi.values[i]
         # N.B.: Phi arguments are restricted to not have effects, so we can drop
         # them here safely.
-        rt = tmerge(typeinf_lattice(interp), rt, abstract_eval_special_value(interp, val, vtypes, sv).rt)
+        thisval = abstract_eval_special_value(interp, val, vtypes, sv).rt
+        rt = tmerge(typeinf_lattice(interp), rt, thisval)
     end
     return rt
 end
@@ -2731,11 +2934,18 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     if !isa(e, Expr)
         if isa(e, PhiNode)
             add_curr_ssaflag!(sv, IR_FLAGS_REMOVABLE)
-            return RTEffects(abstract_eval_phi(interp, e, vtypes, sv), Union{}, EFFECTS_TOTAL)
-        end
-        (; rt, exct, effects) = abstract_eval_special_value(interp, e, vtypes, sv)
+            # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
+            # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
+            # over the first and last iterations. By tmerging in the current old_rt, we ensure that
+            # we will not lose an intermediate value.
+            rt = abstract_eval_phi(interp, e, vtypes, sv)
+            old_rt = sv.ssavaluetypes[sv.currpc]
+            rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
+            return RTEffects(rt, Union{}, EFFECTS_TOTAL)
+        end
+        (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, e, vtypes, sv)
     else
-        (; rt, exct, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv)
+        (; rt, exct, effects, refinements) = abstract_eval_statement_expr(interp, e, vtypes, sv)
         if effects.noub === NOUB_IF_NOINBOUNDS
             if has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
                 effects = Effects(effects; noub=ALWAYS_FALSE)
@@ -2765,7 +2975,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
     merge_effects!(interp, sv, effects)
 
-    return RTEffects(rt, exct, effects)
+    return RTEffects(rt, exct, effects, refinements)
 end
 
 function override_effects(effects::Effects, override::EffectsOverride)
@@ -2777,14 +2987,16 @@ function override_effects(effects::Effects, override::EffectsOverride)
         notaskstate = override.notaskstate ? true : effects.notaskstate,
         inaccessiblememonly = override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
         noub = override.noub ? ALWAYS_TRUE :
-               override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE ? NOUB_IF_NOINBOUNDS :
-               effects.noub)
+            (override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE) ? NOUB_IF_NOINBOUNDS :
+            effects.noub,
+        nortcall = override.nortcall ? true : effects.nortcall)
 end
 
 isdefined_globalref(g::GlobalRef) = !iszero(ccall(:jl_globalref_boundp, Cint, (Any,), g))
+isdefinedconst_globalref(g::GlobalRef) = isconst(g) && isdefined_globalref(g)
 
 function abstract_eval_globalref_type(g::GlobalRef)
-    if isdefined_globalref(g) && isconst(g)
+    if isdefinedconst_globalref(g)
         return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
     end
     ty = ccall(:jl_get_binding_type, Any, (Any, Any), g.mod, g.name)
@@ -2803,18 +3015,22 @@ function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv::
         if is_mutation_free_argtype(rt)
             inaccessiblememonly = ALWAYS_TRUE
         end
-    elseif isdefined_globalref(g)
-        nothrow = true
     elseif InferenceParams(interp).assume_bindings_static
         consistent = inaccessiblememonly = ALWAYS_TRUE
-        rt = Union{}
+        if isdefined_globalref(g)
+            nothrow = true
+        else
+            rt = Union{}
+        end
+    elseif isdefinedconst_globalref(g)
+        nothrow = true
     end
     return RTEffects(rt, nothrow ? Union{} : UndefVarError, Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly))
 end
 
 function handle_global_assignment!(interp::AbstractInterpreter, frame::InferenceState, lhs::GlobalRef, @nospecialize(newty))
     effect_free = ALWAYS_FALSE
-    nothrow = global_assignment_nothrow(lhs.mod, lhs.name, newty)
+    nothrow = global_assignment_nothrow(lhs.mod, lhs.name, ignorelimited(newty))
     inaccessiblememonly = ALWAYS_FALSE
     if !nothrow
         sub_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
@@ -2917,7 +3133,7 @@ end
         # pick up the first "interesting" slot, convert `rt` to its `Conditional`
         # TODO: ideally we want `Conditional` and `InterConditional` to convey
         # constraints on multiple slots
-        for slot_id = 1:info.nargs
+        for slot_id = 1:Int(info.nargs)
             rt = bool_rt_to_conditional(rt, slot_id, info)
             rt isa InterConditional && break
         end
@@ -2928,6 +3144,9 @@ end
     ⊑ᵢ = ⊑(typeinf_lattice(info.interp))
     old = info.slottypes[slot_id]
     new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional
+    if isvarargtype(old) || isvarargtype(new)
+        return rt
+    end
     if new ⊑ᵢ old && !(old ⊑ᵢ new)
         if isa(rt, Const)
             val = rt.val
@@ -2952,15 +3171,16 @@ end
 @nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
     if isa(rt, PartialStruct)
         fields = copy(rt.fields)
-        local anyrefine = false
+        anyrefine = !isvarargtype(rt.fields[end]) &&
+            length(rt.fields) > datatype_min_ninitialized(unwrap_unionall(rt.typ))
         𝕃 = typeinf_lattice(info.interp)
+        ⊏ = strictpartialorder(𝕃)
         for i in 1:length(fields)
             a = fields[i]
             a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
             if !anyrefine
                 # TODO: consider adding && const_prop_profitable(a) here?
-                anyrefine = has_extended_info(a) ||
-                            ⊏(𝕃, a, fieldtype(rt.typ, i))
+                anyrefine = has_extended_info(a) || a ⊏ fieldtype(rt.typ, i)
             end
             fields[i] = a
         end
@@ -3006,39 +3226,43 @@ struct BasicStmtChange
     rt::Any # extended lattice element or `nothing` - `nothing` if this statement may not be used as an SSA Value
     exct::Any
     # TODO effects::Effects
-    BasicStmtChange(changes::Union{Nothing,StateUpdate}, @nospecialize(rt), @nospecialize(exct)) = new(changes, rt, exct)
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function BasicStmtChange(changes::Union{Nothing,StateUpdate}, rt::Any, exct::Any,
+                             refinements=nothing)
+        @nospecialize rt exct refinements
+        return new(changes, rt, exct, refinements)
+    end
 end
 
 @inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
     @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
     if isa(stmt, NewvarNode)
-        changes = StateUpdate(stmt.slot, VarState(Bottom, true), pc_vartable, false)
+        changes = StateUpdate(stmt.slot, VarState(Bottom, true))
         return BasicStmtChange(changes, nothing, Union{})
     elseif !isa(stmt, Expr)
         (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
         return BasicStmtChange(nothing, rt, exct)
     end
     changes = nothing
-    stmt = stmt::Expr
     hd = stmt.head
     if hd === :(=)
-        (; rt, exct) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
+        (; rt, exct, refinements) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
         if rt === Bottom
-            return BasicStmtChange(nothing, Bottom, exct)
+            return BasicStmtChange(nothing, Bottom, exct, refinements)
         end
         lhs = stmt.args[1]
         if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(rt, false), pc_vartable, false)
+            changes = StateUpdate(lhs, VarState(rt, false))
         elseif isa(lhs, GlobalRef)
             handle_global_assignment!(interp, frame, lhs, rt)
         elseif !isa(lhs, SSAValue)
             merge_effects!(interp, frame, EFFECTS_UNKNOWN)
         end
-        return BasicStmtChange(changes, rt, exct)
+        return BasicStmtChange(changes, rt, exct, refinements)
     elseif hd === :method
         fname = stmt.args[1]
         if isa(fname, SlotNumber)
-            changes = StateUpdate(fname, VarState(Any, false), pc_vartable, false)
+            changes = StateUpdate(fname, VarState(Any, false))
         end
         return BasicStmtChange(changes, nothing, Union{})
     elseif (hd === :code_coverage_effect || (
@@ -3046,8 +3270,8 @@ end
             is_meta_expr(stmt)))
         return BasicStmtChange(nothing, Nothing, Bottom)
     else
-        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct)
+        (; rt, exct, refinements) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
+        return BasicStmtChange(nothing, rt, exct, refinements)
     end
 end
 
@@ -3080,7 +3304,7 @@ function update_bestguess!(interp::AbstractInterpreter, frame::InferenceState,
     # narrow representation of bestguess slightly to prepare for tmerge with rt
     if rt isa InterConditional && bestguess isa Const
         slot_id = rt.slot
-        old_id_type = slottypes[slot_id]
+        old_id_type = widenconditional(slottypes[slot_id])
         if bestguess.val === true && rt.elsetype !== Bottom
             bestguess = InterConditional(slot_id, old_id_type, Bottom)
         elseif bestguess.val === false && rt.thentype !== Bottom
@@ -3107,22 +3331,21 @@ end
 
 function update_exc_bestguess!(interp::AbstractInterpreter, @nospecialize(exct), frame::InferenceState)
     𝕃ₚ = ipo_lattice(interp)
-    cur_hand = frame.handler_at[frame.currpc][1]
-    if cur_hand == 0
+    handler = gethandler(frame)
+    if handler === nothing
         if !⊑(𝕃ₚ, exct, frame.exc_bestguess)
             frame.exc_bestguess = tmerge(𝕃ₚ, frame.exc_bestguess, exct)
             update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
-                caller_handler = caller.handler_at[caller_pc][1]
-                caller_exct = caller_handler == 0 ?
-                    caller.exc_bestguess : caller.handlers[caller_handler].exct
+                caller_handler = gethandler(caller, caller_pc)
+                caller_exct = caller_handler === nothing ?
+                    caller.exc_bestguess : caller_handler.exct
                 return caller_exct !== Any
             end
         end
     else
-        handler_frame = frame.handlers[cur_hand]
-        if !⊑(𝕃ₚ, exct, handler_frame.exct)
-            handler_frame.exct = tmerge(𝕃ₚ, handler_frame.exct, exct)
-            enter = frame.src.code[handler_frame.enter_idx]::EnterNode
+        if !⊑(𝕃ₚ, exct, handler.exct)
+            handler.exct = tmerge(𝕃ₚ, handler.exct, exct)
+            enter = frame.src.code[handler.enter_idx]::EnterNode
             exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
             push!(frame.ip, exceptbb)
         end
@@ -3132,9 +3355,9 @@ end
 function propagate_to_error_handler!(currstate::VarTable, frame::InferenceState, 𝕃ᵢ::AbstractLattice)
     # If this statement potentially threw, propagate the currstate to the
     # exception handler, BEFORE applying any state changes.
-    cur_hand = frame.handler_at[frame.currpc][1]
-    if cur_hand != 0
-        enter = frame.src.code[frame.handlers[cur_hand].enter_idx]::EnterNode
+    curr_hand = gethandler(frame)
+    if curr_hand !== nothing
+        enter = frame.src.code[curr_hand.enter_idx]::EnterNode
         exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
         if update_bbstate!(𝕃ᵢ, frame, exceptbb, currstate)
             push!(frame.ip, exceptbb)
@@ -3153,7 +3376,6 @@ end
 # make as much progress on `frame` as possible (without handling cycles)
 function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     @assert !is_inferred(frame)
-    frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
     ssavaluetypes = frame.ssavaluetypes
     bbs = frame.cfg.blocks
@@ -3189,7 +3411,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     @goto branch
                 elseif isa(stmt, GotoIfNot)
                     condx = stmt.cond
-                    condxslot = ssa_def_slot(condx, frame)
+                    condslot = ssa_def_slot(condx, frame)
                     condt = abstract_eval_value(interp, condx, currstate, frame)
                     if condt === Bottom
                         ssavaluetypes[currpc] = Bottom
@@ -3197,10 +3419,10 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         @goto find_next_bb
                     end
                     orig_condt = condt
-                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condxslot, SlotNumber)
+                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condslot, SlotNumber)
                         # if this non-`Conditional` object is a slot, we form and propagate
                         # the conditional constraint on it
-                        condt = Conditional(condxslot, Const(true), Const(false))
+                        condt = Conditional(condslot, Const(true), Const(false))
                     end
                     condval = maybe_extract_const_bool(condt)
                     nothrow = (condval !== nothing) || ⊑(𝕃ᵢ, orig_condt, Bool)
@@ -3246,21 +3468,31 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         # We continue with the true branch, but process the false
                         # branch here.
                         if isa(condt, Conditional)
-                            else_change = conditional_change(𝕃ᵢ, currstate, condt.elsetype, condt.slot)
+                            else_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#false)
                             if else_change !== nothing
-                                false_vartable = stoverwrite1!(copy(currstate), else_change)
+                                elsestate = copy(currstate)
+                                stoverwrite1!(elsestate, else_change)
+                            elseif condslot isa SlotNumber
+                                elsestate = copy(currstate)
                             else
-                                false_vartable = currstate
+                                elsestate = currstate
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this else branch
+                                stoverwrite1!(elsestate, condition_object_change(currstate, condt, condslot, #=then_or_else=#false))
                             end
-                            changed = update_bbstate!(𝕃ᵢ, frame, falsebb, false_vartable)
-                            then_change = conditional_change(𝕃ᵢ, currstate, condt.thentype, condt.slot)
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, elsestate)
+                            then_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#true)
+                            thenstate = currstate
                             if then_change !== nothing
-                                stoverwrite1!(currstate, then_change)
+                                stoverwrite1!(thenstate, then_change)
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this then branch
+                                stoverwrite1!(thenstate, condition_object_change(currstate, condt, condslot, #=then_or_else=#true))
                             end
                         else
-                            changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
                         end
-                        if changed
+                        if else_changed
                             handle_control_backedge!(interp, frame, currpc, stmt.dest)
                             push!(W, falsebb)
                         end
@@ -3281,7 +3513,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
                     if isdefined(stmt, :scope)
                         scopet = abstract_eval_value(interp, stmt.scope, currstate, frame)
-                        handler = frame.handlers[frame.handler_at[frame.currpc+1][1]]
+                        handler = gethandler(frame, frame.currpc+1)::TryCatchFrame
                         @assert handler.scopet !== nothing
                         if !⊑(𝕃ᵢ, scopet, handler.scopet)
                             handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
@@ -3300,7 +3532,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 # Fall through terminator - treat as regular stmt
             end
             # Process non control-flow statements
-            (; changes, rt, exct) = abstract_eval_basic_statement(interp,
+            (; changes, rt, exct, refinements) = abstract_eval_basic_statement(interp,
                 stmt, currstate, frame)
             if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
                 if exct !== Union{}
@@ -3321,6 +3553,15 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
             if changes !== nothing
                 stoverwrite1!(currstate, changes)
             end
+            if refinements isa SlotRefinement
+                apply_refinement!(𝕃ᵢ, refinements.slot, refinements.typ, currstate, changes)
+            elseif refinements isa Vector{Any}
+                for i = 1:length(refinements)
+                    newtyp = refinements[i]
+                    newtyp === nothing && continue
+                    apply_refinement!(𝕃ᵢ, SlotNumber(i), newtyp, currstate, changes)
+                end
+            end
             if rt === nothing
                 ssavaluetypes[currpc] = Any
                 continue
@@ -3355,17 +3596,31 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
         end
     end # while currbb <= nbbs
 
-    frame.dont_work_on_me = false
     nothing
 end
 
-function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecialize(typ), slot::Int)
-    vtype = state[slot]
+function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp),
+                           currstate::VarTable, currchanges::Union{Nothing,StateUpdate})
+    if currchanges !== nothing && currchanges.var == slot
+        return # type propagation from statement (like assignment) should have the precedence
+    end
+    vtype = currstate[slot_id(slot)]
     oldtyp = vtype.typ
-    if iskindtype(typ)
+    ⊏ = strictpartialorder(𝕃ᵢ)
+    if newtyp ⊏ oldtyp
+        stmtupdate = StateUpdate(slot, VarState(newtyp, vtype.undef))
+        stoverwrite1!(currstate, stmtupdate)
+    end
+end
+
+function conditional_change(𝕃ᵢ::AbstractLattice, currstate::VarTable, condt::Conditional, then_or_else::Bool)
+    vtype = currstate[condt.slot]
+    oldtyp = vtype.typ
+    newtyp = then_or_else ? condt.thentype : condt.elsetype
+    if iskindtype(newtyp)
         # this code path corresponds to the special handling for `isa(x, iskindtype)` check
         # implemented within `abstract_call_builtin`
-    elseif ⊑(𝕃ᵢ, ignorelimited(typ), ignorelimited(oldtyp))
+    elseif ⊑(𝕃ᵢ, ignorelimited(newtyp), ignorelimited(oldtyp))
         # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
         # since we probably formed these types with `typesubstract`,
         # the comparison is likely simple
@@ -3375,24 +3630,42 @@ function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecia
     if oldtyp isa LimitedAccuracy
         # typ is better unlimited, but we may still need to compute the tmeet with the limit
         # "causes" since we ignored those in the comparison
-        typ = tmerge(𝕃ᵢ, typ, LimitedAccuracy(Bottom, oldtyp.causes))
+        newtyp = tmerge(𝕃ᵢ, newtyp, LimitedAccuracy(Bottom, oldtyp.causes))
     end
-    return StateUpdate(SlotNumber(slot), VarState(typ, vtype.undef), state, true)
+    # if this `Conditional` is from from `@isdefined condt.slot`, refine its `undef` information
+    newundef = condt.isdefined ? !then_or_else : vtype.undef
+    return StateUpdate(SlotNumber(condt.slot), VarState(newtyp, newundef), #=conditional=#true)
+end
+
+function condition_object_change(currstate::VarTable, condt::Conditional,
+                                 condslot::SlotNumber, then_or_else::Bool)
+    vtype = currstate[slot_id(condslot)]
+    newcondt = Conditional(condt.slot,
+        then_or_else ? condt.thentype : Union{},
+        then_or_else ? Union{} : condt.elsetype)
+    return StateUpdate(condslot, VarState(newcondt, vtype.undef))
 end
 
 # make as much progress on `frame` as possible (by handling cycles)
 function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
     typeinf_local(interp, frame)
+    @assert isempty(frame.ip)
+    callstack = frame.callstack::Vector{AbsIntState}
+    frame.cycleid == length(callstack) && return true
 
-    # If the current frame is part of a cycle, solve the cycle before finishing
     no_active_ips_in_callers = false
-    while !no_active_ips_in_callers
+    while true
+        # If the current frame is not the top part of a cycle, continue to the top of the cycle before resuming work
+        frame.cycleid == frame.frameid || return false
+        # If done, return and finalize this cycle
+        no_active_ips_in_callers && return true
+        # Otherwise, do at least one iteration over the entire current cycle
         no_active_ips_in_callers = true
-        for caller in frame.callers_in_cycle
-            caller.dont_work_on_me && return false # cycle is above us on the stack
+        for i = reverse(frame.cycleid:length(callstack))
+            caller = callstack[i]::InferenceState
             if !isempty(caller.ip)
                 # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
-                # `frame.callers_in_cycle`, which is why making incremental progress requires the
+                # `frame.cycleid`, which is why making incremental progress requires the
                 # outer while loop.
                 typeinf_local(interp, caller)
                 no_active_ips_in_callers = false
diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl
index 0102a59667c1e..645c865d085b3 100644
--- a/base/compiler/abstractlattice.jl
+++ b/base/compiler/abstractlattice.jl
@@ -288,9 +288,13 @@ has_extended_unionsplit(::JLTypeLattice) = false
 ⊑(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(𝕃, a, b)
 ⊏(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(𝕃, a, b)
 ⋤(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(𝕃, a, b)
+tmerge(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmerge(𝕃, a, b)
+tmeet(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmeet(𝕃, a, b)
 partialorder(𝕃::AbstractLattice) = ⊑(𝕃)
 strictpartialorder(𝕃::AbstractLattice) = ⊏(𝕃)
 strictneqpartialorder(𝕃::AbstractLattice) = ⋤(𝕃)
+join(𝕃::AbstractLattice) = tmerge(𝕃)
+meet(𝕃::AbstractLattice) = tmeet(𝕃)
 
 # Fallbacks for external packages using these methods
 const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance)
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 8b9c26be2ec81..5cc01391267d7 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -14,6 +14,7 @@ const setproperty! = Core.setfield!
 const swapproperty! = Core.swapfield!
 const modifyproperty! = Core.modifyfield!
 const replaceproperty! = Core.replacefield!
+const _DOCS_ALIASING_WARNING = ""
 
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
 
@@ -47,10 +48,12 @@ struct EffectsOverride
     inaccessiblememonly::Bool
     noub::Bool
     noub_if_noinbounds::Bool
+    consistent_overlay::Bool
+    nortcall::Bool
 end
 function EffectsOverride(
     override::EffectsOverride =
-        EffectsOverride(false, false, false, false, false, false, false, false, false);
+        EffectsOverride(false, false, false, false, false, false, false, false, false, false, false);
     consistent::Bool = override.consistent,
     effect_free::Bool = override.effect_free,
     nothrow::Bool = override.nothrow,
@@ -59,7 +62,9 @@ function EffectsOverride(
     notaskstate::Bool = override.notaskstate,
     inaccessiblememonly::Bool = override.inaccessiblememonly,
     noub::Bool = override.noub,
-    noub_if_noinbounds::Bool = override.noub_if_noinbounds)
+    noub_if_noinbounds::Bool = override.noub_if_noinbounds,
+    consistent_overlay::Bool = override.consistent_overlay,
+    nortcall::Bool = override.nortcall)
     return EffectsOverride(
         consistent,
         effect_free,
@@ -69,9 +74,11 @@ function EffectsOverride(
         notaskstate,
         inaccessiblememonly,
         noub,
-        noub_if_noinbounds)
+        noub_if_noinbounds,
+        consistent_overlay,
+        nortcall)
 end
-const NUM_EFFECTS_OVERRIDES = 9 # sync with julia.h
+const NUM_EFFECTS_OVERRIDES = 11 # sync with julia.h
 
 # essential files and libraries
 include("essentials.jl")
@@ -176,6 +183,17 @@ something(x::Any, y...) = x
 # compiler #
 ############
 
+baremodule BuildSettings
+using Core: ARGS, include
+using Core.Compiler: >, getindex, length
+
+global MAX_METHODS::Int = 3
+
+if length(ARGS) > 2 && ARGS[2] === "--buildsettings"
+    include(BuildSettings, ARGS[3])
+end
+end
+
 if false
     import Base: Base, @show
 else
@@ -203,6 +221,7 @@ include("compiler/ssair/ir.jl")
 include("compiler/ssair/tarjan.jl")
 
 include("compiler/abstractlattice.jl")
+include("compiler/stmtinfo.jl")
 include("compiler/inferenceresult.jl")
 include("compiler/inferencestate.jl")
 
@@ -210,7 +229,6 @@ include("compiler/typeutils.jl")
 include("compiler/typelimits.jl")
 include("compiler/typelattice.jl")
 include("compiler/tfuncs.jl")
-include("compiler/stmtinfo.jl")
 
 include("compiler/abstractinterpretation.jl")
 include("compiler/typeinfer.jl")
diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl
index a3d30baef9efa..7778c96e019e5 100644
--- a/base/compiler/effects.jl
+++ b/base/compiler/effects.jl
@@ -43,16 +43,24 @@ following meanings:
     except that it may access or modify mutable memory pointed to by its call arguments.
     This may later be refined to `ALWAYS_TRUE` in a case when call arguments are known to be immutable.
     This state corresponds to LLVM's `inaccessiblemem_or_argmemonly` function attribute.
-- `noub::UInt8`: indicates that the method will not execute any undefined behavior (for any input).
-  Note that undefined behavior may technically cause the method to violate any other effect
-  assertions (such as `:consistent` or `:effect_free`) as well, but we do not model this,
-  and they assume the absence of undefined behavior.
-  * `ALWAYS_TRUE`: this method is guaranteed to not execute any undefined behavior.
+- `noub::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to not execute any undefined behavior (for any input).
   * `ALWAYS_FALSE`: this method may execute undefined behavior.
   * `NOUB_IF_NOINBOUNDS`: this method is guaranteed to not execute any undefined behavior
     if the caller does not set nor propagate the `@inbounds` context.
-- `nonoverlayed::Bool`: indicates that any methods that may be called within this method
-  are not defined in an [overlayed method table](@ref OverlayMethodTable).
+  Note that undefined behavior may technically cause the method to violate any other effect
+  assertions (such as `:consistent` or `:effect_free`) as well, but we do not model this,
+  and they assume the absence of undefined behavior.
+- `nonoverlayed::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to not invoke any methods that defined in an
+    [overlayed method table](@ref OverlayMethodTable).
+  * `CONSISTENT_OVERLAY`: this method may invoke overlayed methods, but all such overlayed
+    methods are `:consistent` with their non-overlayed original counterparts
+    (see [`Base.@assume_effects`](@ref) for the exact definition of `:consistenct`-cy).
+  * `ALWAYS_FALSE`: this method may invoke overlayed methods.
+- `nortcall::Bool`: this method does not call `Core.Compiler.return_type`,
+  and it is guaranteed that any other methods this method might call also do not call
+  `Core.Compiler.return_type`.
 
 Note that the representations above are just internal implementation details and thus likely
 to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
@@ -94,8 +102,13 @@ The output represents the state of different effect properties in the following
     - `+u` (green): `true`
     - `-u` (red): `false`
     - `?u` (yellow): `NOUB_IF_NOINBOUNDS`
-
-Additionally, if the `nonoverlayed` property is false, a red prime symbol (′) is displayed after the tuple.
+8. `:nonoverlayed` (`o`):
+    - `+o` (green): `ALWAYS_TRUE`
+    - `-o` (red): `ALWAYS_FALSE`
+    - `?o` (yellow): `CONSISTENT_OVERLAY`
+9. `:nortcall` (`r`):
+    - `+r` (green): `true`
+    - `-r` (red): `false`
 """
 struct Effects
     consistent::UInt8
@@ -105,7 +118,8 @@ struct Effects
     notaskstate::Bool
     inaccessiblememonly::UInt8
     noub::UInt8
-    nonoverlayed::Bool
+    nonoverlayed::UInt8
+    nortcall::Bool
     function Effects(
         consistent::UInt8,
         effect_free::UInt8,
@@ -114,7 +128,8 @@ struct Effects
         notaskstate::Bool,
         inaccessiblememonly::UInt8,
         noub::UInt8,
-        nonoverlayed::Bool)
+        nonoverlayed::UInt8,
+        nortcall::Bool)
         return new(
             consistent,
             effect_free,
@@ -123,7 +138,8 @@ struct Effects
             notaskstate,
             inaccessiblememonly,
             noub,
-            nonoverlayed)
+            nonoverlayed,
+            nortcall)
     end
 end
 
@@ -150,12 +166,15 @@ const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
 # :noub bits
 const NOUB_IF_NOINBOUNDS = 0x01 << 1
 
-const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  true)
-const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  true)
-const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, true) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
-const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, false) # unknown really
+# :nonoverlayed bits
+const CONSISTENT_OVERLAY = 0x01 << 1
+
+const EFFECTS_TOTAL   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_THROWS  = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_TRUE, false) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
 
-function Effects(effects::Effects = _EFFECTS_UNKNOWN;
+function Effects(effects::Effects=Effects(
+    ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, false);
     consistent::UInt8 = effects.consistent,
     effect_free::UInt8 = effects.effect_free,
     nothrow::Bool = effects.nothrow,
@@ -163,7 +182,8 @@ function Effects(effects::Effects = _EFFECTS_UNKNOWN;
     notaskstate::Bool = effects.notaskstate,
     inaccessiblememonly::UInt8 = effects.inaccessiblememonly,
     noub::UInt8 = effects.noub,
-    nonoverlayed::Bool = effects.nonoverlayed)
+    nonoverlayed::UInt8 = effects.nonoverlayed,
+    nortcall::Bool = effects.nortcall)
     return Effects(
         consistent,
         effect_free,
@@ -172,7 +192,8 @@ function Effects(effects::Effects = _EFFECTS_UNKNOWN;
         notaskstate,
         inaccessiblememonly,
         noub,
-        nonoverlayed)
+        nonoverlayed,
+        nortcall)
 end
 
 function is_better_effects(new::Effects, old::Effects)
@@ -229,11 +250,19 @@ function is_better_effects(new::Effects, old::Effects)
     elseif new.noub != old.noub
         return false
     end
-    if new.nonoverlayed
-        any_improved |= !old.nonoverlayed
+    if new.nonoverlayed == ALWAYS_TRUE
+        any_improved |= old.nonoverlayed != ALWAYS_TRUE
+    elseif new.nonoverlayed == CONSISTENT_OVERLAY
+        old.nonoverlayed == ALWAYS_TRUE && return false
+        any_improved |= old.nonoverlayed != CONSISTENT_OVERLAY
     elseif new.nonoverlayed != old.nonoverlayed
         return false
     end
+    if new.nortcall
+        any_improved |= !old.nortcall
+    elseif new.nortcall != old.nortcall
+        return false
+    end
     return any_improved
 end
 
@@ -246,7 +275,8 @@ function merge_effects(old::Effects, new::Effects)
         merge_effectbits(old.notaskstate, new.notaskstate),
         merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
         merge_effectbits(old.noub, new.noub),
-        merge_effectbits(old.nonoverlayed, new.nonoverlayed))
+        merge_effectbits(old.nonoverlayed, new.nonoverlayed),
+        merge_effectbits(old.nortcall, new.nortcall))
 end
 
 function merge_effectbits(old::UInt8, new::UInt8)
@@ -265,17 +295,19 @@ is_notaskstate(effects::Effects)         = effects.notaskstate
 is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAYS_TRUE
 is_noub(effects::Effects)                = effects.noub === ALWAYS_TRUE
 is_noub_if_noinbounds(effects::Effects)  = effects.noub === NOUB_IF_NOINBOUNDS
-is_nonoverlayed(effects::Effects)        = effects.nonoverlayed
+is_nonoverlayed(effects::Effects)        = effects.nonoverlayed === ALWAYS_TRUE
+is_nortcall(effects::Effects)            = effects.nortcall
 
 # implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
-is_foldable(effects::Effects) =
+is_foldable(effects::Effects, check_rtcall::Bool=false) =
     is_consistent(effects) &&
     (is_noub(effects) || is_noub_if_noinbounds(effects)) &&
     is_effect_free(effects) &&
-    is_terminates(effects)
+    is_terminates(effects) &&
+    (!check_rtcall || is_nortcall(effects))
 
-is_foldable_nothrow(effects::Effects) =
-    is_foldable(effects) &&
+is_foldable_nothrow(effects::Effects, check_rtcall::Bool=false) =
+    is_foldable(effects, check_rtcall) &&
     is_nothrow(effects)
 
 # TODO add `is_noub` here?
@@ -295,6 +327,8 @@ is_effect_free_if_inaccessiblememonly(effects::Effects) = !iszero(effects.effect
 
 is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly === INACCESSIBLEMEM_OR_ARGMEMONLY
 
+is_consistent_overlay(effects::Effects) = effects.nonoverlayed === CONSISTENT_OVERLAY
+
 function encode_effects(e::Effects)
     return ((e.consistent          % UInt32) << 0)  |
            ((e.effect_free         % UInt32) << 3)  |
@@ -303,7 +337,8 @@ function encode_effects(e::Effects)
            ((e.notaskstate         % UInt32) << 7)  |
            ((e.inaccessiblememonly % UInt32) << 8)  |
            ((e.noub                % UInt32) << 10) |
-           ((e.nonoverlayed        % UInt32) << 12)
+           ((e.nonoverlayed        % UInt32) << 12) |
+           ((e.nortcall            % UInt32) << 14)
 end
 
 function decode_effects(e::UInt32)
@@ -315,7 +350,8 @@ function decode_effects(e::UInt32)
         _Bool((e >> 7) & 0x01),
         UInt8((e >> 8) & 0x03),
         UInt8((e >> 10) & 0x03),
-        _Bool((e >> 12) & 0x01))
+        UInt8((e >> 12) & 0x03),
+        _Bool((e >> 14) & 0x01))
 end
 
 function encode_effects_override(eo::EffectsOverride)
@@ -329,6 +365,8 @@ function encode_effects_override(eo::EffectsOverride)
     eo.inaccessiblememonly && (e |= (0x0001 << 6))
     eo.noub                && (e |= (0x0001 << 7))
     eo.noub_if_noinbounds  && (e |= (0x0001 << 8))
+    eo.consistent_overlay  && (e |= (0x0001 << 9))
+    eo.nortcall            && (e |= (0x0001 << 10))
     return e
 end
 
@@ -342,7 +380,9 @@ function decode_effects_override(e::UInt16)
         !iszero(e & (0x0001 << 5)),
         !iszero(e & (0x0001 << 6)),
         !iszero(e & (0x0001 << 7)),
-        !iszero(e & (0x0001 << 8)))
+        !iszero(e & (0x0001 << 8)),
+        !iszero(e & (0x0001 << 9)),
+        !iszero(e & (0x0001 << 10)))
 end
 
 decode_statement_effects_override(ssaflag::UInt32) =
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 86eed13686ae9..7da96c4cc2e93 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -1,77 +1,81 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance) ->
-        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-
-Returns argument types `cache_argtypes::Vector{Any}` for `linfo` that are in the native
-Julia type domain. `overridden_by_const::BitVector` is all `false` meaning that
-there is no additional extended lattice information there.
-
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ForwardableArgtypes) ->
-        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-
-Returns cache-correct extended lattice argument types `cache_argtypes::Vector{Any}`
-for `linfo` given some `argtypes` accompanied by `overridden_by_const::BitVector`
-that marks which argument contains additional extended lattice information.
-
-In theory, there could be a `cache` containing a matching `InferenceResult`
-for the provided `linfo` and `given_argtypes`. The purpose of this function is
-to return a valid value for `cache_lookup(𝕃, linfo, argtypes, cache).argtypes`,
-so that we can construct cache-correct `InferenceResult`s in the first place.
-"""
-function matching_cache_argtypes end
-
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance)
-    mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
-    cache_argtypes = most_general_argtypes(mthd, linfo.specTypes)
-    return cache_argtypes, falses(length(cache_argtypes))
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance)
+    (; def, specTypes) = mi
+    return most_general_argtypes(isa(def, Method) ? def : nothing, specTypes)
 end
 
-struct SimpleArgtypes <: ForwardableArgtypes
+struct SimpleArgtypes
     argtypes::Vector{Any}
 end
 
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::SimpleArgtypes)
+# Like `SimpleArgtypes`, but allows the argtypes to be wider than the current call.
+# As a result, it is not legal to refine the cache result with information more
+# precise than was it deducible from the `WidenedSimpleArgtypes`.
+struct WidenedArgtypes
+    argtypes::Vector{Any}
+end
 
-The implementation for `argtypes` with general extended lattice information.
-This is supposed to be used for debugging and testing or external `AbstractInterpreter`
-usages and in general `matching_cache_argtypes(::MethodInstance, ::ConditionalArgtypes)`
-is more preferred it can forward `Conditional` information.
-"""
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, simple_argtypes::SimpleArgtypes)
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
+                                 simple_argtypes::Union{SimpleArgtypes, WidenedArgtypes},
+                                 cache_argtypes::Vector{Any})
     (; argtypes) = simple_argtypes
     given_argtypes = Vector{Any}(undef, length(argtypes))
     for i = 1:length(argtypes)
         given_argtypes[i] = widenslotwrapper(argtypes[i])
     end
-    given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
-    return pick_const_args(𝕃, linfo, given_argtypes)
+    return pick_const_args!(𝕃, given_argtypes, cache_argtypes)
 end
 
-function pick_const_args(𝕃::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any})
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
-    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
+function pick_const_arg(𝕃::AbstractLattice, @nospecialize(given_argtype), @nospecialize(cache_argtype))
+    if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
+        # prefer the argtype we were given over the one computed from `mi`
+        if (isa(given_argtype, PartialStruct) && isa(cache_argtype, Type) &&
+            !⊏(𝕃, given_argtype, cache_argtype))
+            # if the type information of this `PartialStruct` is less strict than
+            # declared method signature, narrow it down using `tmeet`
+            given_argtype = tmeet(𝕃, given_argtype, cache_argtype)
+        end
+        return given_argtype
+    else
+        return cache_argtype
+    end
 end
 
-function pick_const_args!(𝕃::AbstractLattice, cache_argtypes::Vector{Any}, overridden_by_const::BitVector, given_argtypes::Vector{Any})
-    for i = 1:length(given_argtypes)
-        given_argtype = given_argtypes[i]
-        cache_argtype = cache_argtypes[i]
-        if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
-            # prefer the argtype we were given over the one computed from `linfo`
-            if (isa(given_argtype, PartialStruct) && isa(cache_argtype, Type) &&
-                !⊏(𝕃, given_argtype, cache_argtype))
-                # if the type information of this `PartialStruct` is less strict than
-                # declared method signature, narrow it down using `tmeet`
-                given_argtype = tmeet(𝕃, given_argtype, cache_argtype)
-            end
-            cache_argtypes[i] = given_argtype
-            overridden_by_const[i] = true
+function pick_const_args!(𝕃::AbstractLattice, given_argtypes::Vector{Any}, cache_argtypes::Vector{Any})
+    ngiven = length(given_argtypes)
+    ncache = length(cache_argtypes)
+    if ngiven == 0 || ncache == 0
+        return Any[]
+    end
+    given_va = given_argtypes[end]
+    cache_va = cache_argtypes[end]
+    if isvarargtype(given_va)
+        va = unwrapva(given_va)
+        if isvarargtype(cache_va)
+            # Process the common prefix, then join
+            nprocessargs = max(ngiven-1, ncache-1)
+            resize!(given_argtypes, nprocessargs+1)
+            given_argtypes[end] = Vararg{pick_const_arg(𝕃, va, unwrapva(cache_va))}
+        else
+            nprocessargs = ncache
+            resize!(given_argtypes, nprocessargs)
         end
+        for i = ngiven:nprocessargs
+            given_argtypes[i] = va
+        end
+    elseif isvarargtype(cache_va)
+        nprocessargs = ngiven
+    else
+        @assert ngiven == ncache
+        nprocessargs = ngiven
     end
-    return cache_argtypes, overridden_by_const
+    for i = 1:nprocessargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = argtype_by_index(cache_argtypes, i)
+        given_argtypes[i] = pick_const_arg(𝕃, given_argtype, cache_argtype)
+    end
+    return given_argtypes
 end
 
 function is_argtype_match(𝕃::AbstractLattice,
@@ -80,29 +84,39 @@ function is_argtype_match(𝕃::AbstractLattice,
                           overridden_by_const::Bool)
     if is_forwardable_argtype(𝕃, given_argtype)
         return is_lattice_equal(𝕃, given_argtype, cache_argtype)
+    else
+        return !overridden_by_const
     end
-    return !overridden_by_const
 end
 
-va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance) =
-    va_process_argtypes(Returns(nothing), 𝕃, given_argtypes, mi)
-function va_process_argtypes(@specialize(va_handler!), 𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance)
-    def = mi.def
-    isva = isa(def, Method) ? def.isva : false
-    nargs = isa(def, Method) ? Int(def.nargs) : length(mi.specTypes.parameters)
-    if isva || isvarargtype(given_argtypes[end])
+function va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, nargs::UInt, isva::Bool)
+    nargs = Int(nargs)
+    if isva || (!isempty(given_argtypes) && isvarargtype(given_argtypes[end]))
         isva_given_argtypes = Vector{Any}(undef, nargs)
         for i = 1:(nargs-isva)
-            isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
+            newarg = argtype_by_index(given_argtypes, i)
+            if isva && has_conditional(𝕃) && isa(newarg, Conditional)
+                if newarg.slot > (nargs-isva)
+                    newarg = widenconditional(newarg)
+                end
+            end
+            isva_given_argtypes[i] = newarg
         end
         if isva
             if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
                 last = length(given_argtypes)
             else
                 last = nargs
+                if has_conditional(𝕃)
+                    for i = last:length(given_argtypes)
+                        newarg = given_argtypes[i]
+                        if isa(newarg, Conditional) && newarg.slot > (nargs-isva)
+                            given_argtypes[i] = widenconditional(newarg)
+                        end
+                    end
+                end
             end
             isva_given_argtypes[nargs] = tuple_tfunc(𝕃, given_argtypes[last:end])
-            va_handler!(isva_given_argtypes, last)
         end
         return isva_given_argtypes
     end
@@ -110,90 +124,45 @@ function va_process_argtypes(@specialize(va_handler!), 𝕃::AbstractLattice, gi
     return given_argtypes
 end
 
-function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(specTypes),
-    withfirst::Bool = true)
-    toplevel = method === nothing
-    isva = !toplevel && method.isva
-    linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
-    nargs::Int = toplevel ? 0 : method.nargs
-    # For opaque closure, the closure environment is processed elsewhere
-    withfirst || (nargs -= 1)
-    cache_argtypes = Vector{Any}(undef, nargs)
-    # First, if we're dealing with a varargs method, then we set the last element of `args`
-    # to the appropriate `Tuple` type or `PartialStruct` instance.
-    if !toplevel && isva
-        if specTypes::Type == Tuple
-            linfo_argtypes = Any[Any for i = 1:nargs]
-            if nargs > 1
-                linfo_argtypes[end] = Tuple
-            end
-            vargtype = Tuple
-        else
-            linfo_argtypes_length = length(linfo_argtypes)
-            if nargs > linfo_argtypes_length
-                va = linfo_argtypes[linfo_argtypes_length]
-                if isvarargtype(va)
-                    new_va = rewrap_unionall(unconstrain_vararg_length(va), specTypes)
-                    vargtype = Tuple{new_va}
-                else
-                    vargtype = Tuple{}
-                end
-            else
-                vargtype_elements = Any[]
-                for i in nargs:linfo_argtypes_length
-                    p = linfo_argtypes[i]
-                    p = unwraptv(isvarargtype(p) ? unconstrain_vararg_length(p) : p)
-                    push!(vargtype_elements, elim_free_typevars(rewrap_unionall(p, specTypes)))
-                end
-                for i in 1:length(vargtype_elements)
-                    atyp = vargtype_elements[i]
-                    if issingletontype(atyp)
-                        # replace singleton types with their equivalent Const object
-                        vargtype_elements[i] = Const(atyp.instance)
-                    elseif isconstType(atyp)
-                        vargtype_elements[i] = Const(atyp.parameters[1])
-                    end
-                end
-                vargtype = tuple_tfunc(fallback_lattice, vargtype_elements)
-            end
-        end
-        cache_argtypes[nargs] = vargtype
-        nargs -= 1
+function most_general_argtypes(method::Union{Method,Nothing}, @nospecialize(specTypes))
+    mi_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
+    nargtypes = length(mi_argtypes)
+    nargs = isa(method, Method) ? Int(method.nargs) : 0
+    if length(mi_argtypes) < nargs && isvarargtype(mi_argtypes[end])
+        resize!(mi_argtypes, nargs)
     end
-    # Now, we propagate type info from `linfo_argtypes` into `cache_argtypes`, improving some
+    # Now, we propagate type info from `mi_argtypes` into `cache_argtypes`, improving some
     # type info as we go (where possible). Note that if we're dealing with a varargs method,
     # we already handled the last element of `cache_argtypes` (and decremented `nargs` so that
     # we don't overwrite the result of that work here).
-    linfo_argtypes_length = length(linfo_argtypes)
-    if linfo_argtypes_length > 0
-        n = linfo_argtypes_length > nargs ? nargs : linfo_argtypes_length
-        tail_index = n
-        local lastatype
-        for i = 1:n
-            atyp = linfo_argtypes[i]
-            if i == n && isvarargtype(atyp)
-                atyp = unwrapva(atyp)
-                tail_index -= 1
-            end
-            atyp = unwraptv(atyp)
-            if issingletontype(atyp)
-                # replace singleton types with their equivalent Const object
-                atyp = Const(atyp.instance)
-            elseif isconstType(atyp)
-                atyp = Const(atyp.parameters[1])
-            else
-                atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
-            end
-            i == n && (lastatype = atyp)
-            cache_argtypes[i] = atyp
+    tail_index = min(nargtypes, nargs)
+    local lastatype
+    for i = 1:nargtypes
+        atyp = mi_argtypes[i]
+        wasva = false
+        if i == nargtypes && isvarargtype(atyp)
+            wasva = true
+            atyp = unwrapva(atyp)
         end
-        for i = (tail_index + 1):nargs
-            cache_argtypes[i] = lastatype
+        atyp = unwraptv(atyp)
+        if issingletontype(atyp)
+            # replace singleton types with their equivalent Const object
+            atyp = Const(atyp.instance)
+        elseif isconstType(atyp)
+            atyp = Const(atyp.parameters[1])
+        else
+            atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
         end
-    else
-        @assert nargs == 0 "invalid specialization of method" # wrong number of arguments
+        mi_argtypes[i] = atyp
+        if wasva
+            lastatype = atyp
+            mi_argtypes[end] = Vararg{widenconst(atyp)}
+        end
+    end
+    for i = (tail_index+1):(nargs-1)
+        mi_argtypes[i] = lastatype
     end
-    cache_argtypes
+    return mi_argtypes
 end
 
 # eliminate free `TypeVar`s in order to make the life much easier down the road:
@@ -209,24 +178,17 @@ function elim_free_typevars(@nospecialize t)
     end
 end
 
-function cache_lookup(𝕃::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
-    method = linfo.def::Method
-    nargs = Int(method.nargs)
-    method.isva && (nargs -= 1)
-    length(given_argtypes) ≥ nargs || return nothing
+function cache_lookup(𝕃::AbstractLattice, mi::MethodInstance, given_argtypes::Vector{Any},
+                      cache::Vector{InferenceResult})
+    method = mi.def::Method
+    nargtypes = length(given_argtypes)
     for cached_result in cache
-        cached_result.linfo === linfo || continue
+        cached_result.linfo === mi || @goto next_cache
         cache_argtypes = cached_result.argtypes
-        cache_overridden_by_const = cached_result.overridden_by_const
-        for i in 1:nargs
-            if !is_argtype_match(𝕃, widenmustalias(given_argtypes[i]),
-                                 cache_argtypes[i], cache_overridden_by_const[i])
-                @goto next_cache
-            end
-        end
-        if method.isva
-            if !is_argtype_match(𝕃, tuple_tfunc(𝕃, given_argtypes[(nargs + 1):end]),
-                                 cache_argtypes[end], cache_overridden_by_const[end])
+        @assert length(cache_argtypes) == nargtypes "invalid `cache_argtypes` for `mi`"
+        cache_overridden_by_const = cached_result.overridden_by_const::BitVector
+        for i in 1:nargtypes
+            if !is_argtype_match(𝕃, given_argtypes[i], cache_argtypes[i], cache_overridden_by_const[i])
                 @goto next_cache
             end
         end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 663fd78c90dba..6953dea5b9bd7 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -177,7 +177,7 @@ mutable struct LazyCFGReachability
 end
 function get!(x::LazyCFGReachability)
     isdefined(x, :reachability) && return x.reachability
-    domtree = construct_domtree(x.ir.cfg.blocks)
+    domtree = construct_domtree(x.ir)
     return x.reachability = CFGReachability(x.ir.cfg, domtree)
 end
 
@@ -189,8 +189,8 @@ end
 function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
     isdefined(x, :domtree) && return x.domtree
     return @timeit "domtree 2" x.domtree = IsPostDom ?
-        construct_postdomtree(x.ir.cfg.blocks) :
-        construct_domtree(x.ir.cfg.blocks)
+        construct_postdomtree(x.ir) :
+        construct_domtree(x.ir)
 end
 
 const LazyDomtree = LazyGenericDomtree{false}
@@ -209,10 +209,10 @@ to enable flow-sensitive analysis.
 """
 const VarTable = Vector{VarState}
 
-const CACHE_MODE_NULL     = 0x00      # not cached, without optimization
-const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization allowed
-const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization allowed
-const CACHE_MODE_VOLATILE = 0x01 << 2 # not cached, optimization allowed
+const CACHE_MODE_NULL     = 0x00      # not cached, optimization optional
+const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization required
+const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization required
+const CACHE_MODE_VOLATILE = 0x01 << 2 # not cached, optimization required
 
 mutable struct TryCatchFrame
     exct
@@ -222,6 +222,11 @@ mutable struct TryCatchFrame
     TryCatchFrame(@nospecialize(exct), @nospecialize(scopet), enter_idx::Int) = new(exct, scopet, enter_idx)
 end
 
+struct HandlerInfo
+    handlers::Vector{TryCatchFrame}
+    handler_at::Vector{Tuple{Int,Int}} # tuple of current (handler, exception stack) value at the pc
+end
+
 mutable struct InferenceState
     #= information about this method instance =#
     linfo::MethodInstance
@@ -237,22 +242,24 @@ mutable struct InferenceState
     currbb::Int
     currpc::Int
     ip::BitSet#=TODO BoundedMinPrioritySet=# # current active instruction pointers
-    handlers::Vector{TryCatchFrame}
-    handler_at::Vector{Tuple{Int, Int}} # tuple of current (handler, exception stack) value at the pc
+    handler_info::Union{Nothing,HandlerInfo}
     ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
     # TODO: Could keep this sparsely by doing structural liveness analysis ahead of time.
     bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
     ssavaluetypes::Vector{Any}
-    stmt_edges::Vector{Union{Nothing,Vector{Any}}}
+    stmt_edges::Vector{Vector{Any}}
     stmt_info::Vector{CallInfo}
 
     #= intermediate states for interprocedural abstract interpretation =#
     pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
     limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
     cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
-    callers_in_cycle::Vector{InferenceState}
-    dont_work_on_me::Bool
-    parent # ::Union{Nothing,AbsIntState}
+
+    # IPO tracking of in-process work, shared with all frames given AbstractInterpreter
+    callstack #::Vector{AbsIntState}
+    parentid::Int # index into callstack of the parent frame that originally added this frame (call frame_parent to extract the current parent of the SCC)
+    frameid::Int # index into callstack at which this object is found (or zero, if this is not a cached frame and has no parent)
+    cycleid::Int # index into the callstack of the topmost frame in the cycle (all frames in the same cycle share the same cycleid)
 
     #= results =#
     result::InferenceResult # remember where to put the result
@@ -276,25 +283,25 @@ mutable struct InferenceState
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
     function InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::UInt8,
                             interp::AbstractInterpreter)
-        linfo = result.linfo
+        mi = result.linfo
         world = get_inference_world(interp)
         if world == typemax(UInt)
             error("Entering inference from a generated function with an invalid world")
         end
-        def = linfo.def
+        def = mi.def
         mod = isa(def, Method) ? def.module : def
-        sptypes = sptypes_from_meth_instance(linfo)
+        sptypes = sptypes_from_meth_instance(mi)
         code = src.code::Vector{Any}
         cfg = compute_basic_blocks(code)
         method_info = MethodInfo(src)
 
         currbb = currpc = 1
         ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
-        handler_at, handlers = compute_trycatch(code, BitSet())
+        handler_info = compute_trycatch(code)
         nssavalues = src.ssavaluetypes::Int
         ssavalue_uses = find_ssavalue_uses(code, nssavalues)
         nstmts = length(code)
-        stmt_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:nstmts ]
+        stmt_edges = Vector{Vector{Any}}(undef, nstmts)
         stmt_info = CallInfo[ NoCallInfo() for i = 1:nstmts ]
 
         nslots = length(src.slotflags)
@@ -302,9 +309,15 @@ mutable struct InferenceState
         bb_vartables = Union{Nothing,VarTable}[ nothing for i = 1:length(cfg.blocks) ]
         bb_vartable1 = bb_vartables[1] = VarTable(undef, nslots)
         argtypes = result.argtypes
+
+        argtypes = va_process_argtypes(typeinf_lattice(interp), argtypes, src.nargs, src.isva)
+
         nargtypes = length(argtypes)
         for i = 1:nslots
             argtyp = (i > nargtypes) ? Bottom : argtypes[i]
+            if argtyp === Bool && has_conditional(typeinf_lattice(interp))
+                argtyp = Conditional(i, Const(true), Const(false))
+            end
             slottypes[i] = argtyp
             bb_vartable1[i] = VarState(argtyp, i > nargtypes)
         end
@@ -314,38 +327,44 @@ mutable struct InferenceState
         pclimitations = IdSet{InferenceState}()
         limitations = IdSet{InferenceState}()
         cycle_backedges = Vector{Tuple{InferenceState,Int}}()
-        callers_in_cycle = Vector{InferenceState}()
-        dont_work_on_me = false
-        parent = nothing
+        callstack = AbsIntState[]
 
         valid_worlds = WorldRange(1, get_world_counter())
         bestguess = Bottom
         exc_bestguess = Bottom
         ipo_effects = EFFECTS_TOTAL
 
-        insert_coverage = should_insert_coverage(mod, src)
+        insert_coverage = should_insert_coverage(mod, src.debuginfo)
         if insert_coverage
             ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
         end
 
         if def isa Method
-            ipo_effects = Effects(ipo_effects; nonoverlayed=is_nonoverlayed(def))
+            nonoverlayed = is_nonoverlayed(def) ? ALWAYS_TRUE :
+                is_effect_overridden(def, :consistent_overlay) ? CONSISTENT_OVERLAY :
+                ALWAYS_FALSE
+            ipo_effects = Effects(ipo_effects; nonoverlayed)
         end
 
         restrict_abstract_call_sites = isa(def, Module)
 
-        # some more setups
-        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
-        !iszero(cache_mode & CACHE_MODE_LOCAL) && push!(get_inference_cache(interp), result)
-
         this = new(
-            linfo, world, mod, sptypes, slottypes, src, cfg, method_info,
-            currbb, currpc, ip, handlers, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
+            mi, world, mod, sptypes, slottypes, src, cfg, method_info,
+            currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
+            pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0,
             result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects,
             restrict_abstract_call_sites, cache_mode, insert_coverage,
             interp)
 
+        # some more setups
+        if !iszero(cache_mode & CACHE_MODE_LOCAL)
+            push!(get_inference_cache(interp), result)
+        end
+        if !iszero(cache_mode & CACHE_MODE_GLOBAL)
+            push!(callstack, this)
+            this.cycleid = this.frameid = length(callstack)
+        end
+
         # Apply generated function restrictions
         if src.min_world != 1 || src.max_world != typemax(UInt)
             # From generated functions
@@ -356,6 +375,14 @@ mutable struct InferenceState
     end
 end
 
+gethandler(frame::InferenceState, pc::Int=frame.currpc) = gethandler(frame.handler_info, pc)
+gethandler(::Nothing, ::Int) = nothing
+function gethandler(handler_info::HandlerInfo, pc::Int)
+    handler_idx = handler_info.handler_at[pc][1]
+    handler_idx == 0 && return nothing
+    return handler_info.handlers[handler_idx]
+end
+
 is_nonoverlayed(m::Method) = !isdefined(m, :external_mt)
 is_nonoverlayed(interp::AbstractInterpreter) = !isoverlayed(method_table(interp))
 isoverlayed(::MethodTableView) = error("unsatisfied MethodTableView interface")
@@ -368,7 +395,21 @@ is_inferred(result::InferenceResult) = result.result !== nothing
 
 was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
 
-function compute_trycatch(code::Vector{Any}, ip::BitSet)
+compute_trycatch(ir::IRCode) = compute_trycatch(ir.stmts.stmt, ir.cfg.blocks)
+
+"""
+    compute_trycatch(code, [, bbs]) -> handler_info::Union{Nothing,HandlerInfo}
+
+Given the code of a function, compute, at every statement, the current
+try/catch handler, and the current exception stack top. This function returns
+a tuple of:
+
+    1. `handler_info.handler_at`: A statement length vector of tuples
+       `(catch_handler, exception_stack)`, which are indices into `handlers`
+
+    2. `handler_info.handlers`: A `TryCatchFrame` vector of handlers
+"""
+function compute_trycatch(code::Vector{Any}, bbs::Union{Vector{BasicBlock},Nothing}=nothing)
     # The goal initially is to record the frame like this for the state at exit:
     # 1: (enter 3) # == 0
     # 3: (expr)    # == 1
@@ -377,17 +418,19 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
     # then we can find all `try`s by walking backwards from :enter statements,
     # and all `catch`es by looking at the statement after the :enter
     n = length(code)
-    empty!(ip)
+    ip = BitSet()
     ip.offset = 0 # for _bits_findnext
     push!(ip, n + 1)
-    handler_at = fill((0, 0), n)
-    handlers = TryCatchFrame[]
+    handler_info = nothing
 
     # start from all :enter statements and record the location of the try
     for pc = 1:n
         stmt = code[pc]
         if isa(stmt, EnterNode)
+            (;handlers, handler_at) = handler_info =
+                (handler_info === nothing ? HandlerInfo(TryCatchFrame[], fill((0, 0), n)) : handler_info)
             l = stmt.catch_dest
+            (bbs !== nothing) && (l = first(bbs[l].stmts))
             push!(handlers, TryCatchFrame(Bottom, isdefined(stmt, :scope) ? Bottom : nothing, pc))
             handler_id = length(handlers)
             handler_at[pc + 1] = (handler_id, 0)
@@ -399,7 +442,12 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
         end
     end
 
+    if handler_info === nothing
+        return nothing
+    end
+
     # now forward those marks to all :leave statements
+    (;handlers, handler_at) = handler_info
     while true
         # make progress on the active ip set
         pc = _bits_findnext(ip.bits, 0)::Int
@@ -412,8 +460,10 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
             stmt = code[pc]
             if isa(stmt, GotoNode)
                 pc´ = stmt.label
+                (bbs !== nothing) && (pc´ = first(bbs[pc´].stmts))
             elseif isa(stmt, GotoIfNot)
                 l = stmt.dest::Int
+                (bbs !== nothing) && (l = first(bbs[l].stmts))
                 if handler_at[l] != cur_stacks
                     @assert handler_at[l][1] == 0 || handler_at[l][1] == cur_stacks[1] "unbalanced try/catch"
                     handler_at[l] = cur_stacks
@@ -424,6 +474,7 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
                 break
             elseif isa(stmt, EnterNode)
                 l = stmt.catch_dest
+                (bbs !== nothing) && (l = first(bbs[l].stmts))
                 # We assigned a handler number above. Here we just merge that
                 # with out current handler information.
                 if l != 0
@@ -470,38 +521,35 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
     end
 
     @assert first(ip) == n + 1
-    return handler_at, handlers
+    return handler_info
 end
 
 # check if coverage mode is enabled
-function should_insert_coverage(mod::Module, src::CodeInfo)
+function should_insert_coverage(mod::Module, debuginfo::DebugInfo)
     coverage_enabled(mod) && return true
     JLOptions().code_coverage == 3 || return false
     # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
-    linetable = src.linetable
-    if isa(linetable, Vector{Any})
-        for line in linetable
-            line = line::LineInfoNode
-            if is_file_tracked(line.file)
-                return true
-            end
-        end
-    elseif isa(linetable, Vector{LineInfoNode})
-        for line in linetable
-            if is_file_tracked(line.file)
-                return true
-            end
-        end
-    end
+    return _should_insert_coverage(debuginfo)
+end
+
+_should_insert_coverage(mod::Symbol) = is_file_tracked(mod)
+_should_insert_coverage(mod::Method) = _should_insert_coverage(mod.file)
+_should_insert_coverage(mod::MethodInstance) = _should_insert_coverage(mod.def)
+_should_insert_coverage(mod::Module) = false
+function _should_insert_coverage(info::DebugInfo)
+    linetable = info.linetable
+    linetable === nothing || (_should_insert_coverage(linetable) && return true)
+    _should_insert_coverage(info.def) && return true
     return false
 end
 
 function InferenceState(result::InferenceResult, cache_mode::UInt8, interp::AbstractInterpreter)
     # prepare an InferenceState object for inferring lambda
     world = get_inference_world(interp)
-    src = retrieve_code_info(result.linfo, world)
+    mi = result.linfo
+    src = retrieve_code_info(mi, world)
     src === nothing && return nothing
-    maybe_validate_code(result.linfo, src, "lowered")
+    maybe_validate_code(mi, src, "lowered")
     return InferenceState(result, src, cache_mode, interp)
 end
 InferenceState(result::InferenceResult, cache_mode::Symbol, interp::AbstractInterpreter) =
@@ -586,13 +634,13 @@ end
 
 const EMPTY_SPTYPES = VarState[]
 
-function sptypes_from_meth_instance(linfo::MethodInstance)
-    def = linfo.def
+function sptypes_from_meth_instance(mi::MethodInstance)
+    def = mi.def
     isa(def, Method) || return EMPTY_SPTYPES # toplevel
     sig = def.sig
-    if isempty(linfo.sparam_vals)
+    if isempty(mi.sparam_vals)
         isa(sig, UnionAll) || return EMPTY_SPTYPES
-        # linfo is unspecialized
+        # mi is unspecialized
         spvals = Any[]
         sig′ = sig
         while isa(sig′, UnionAll)
@@ -600,7 +648,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
             sig′ = sig′.body
         end
     else
-        spvals = linfo.sparam_vals
+        spvals = mi.sparam_vals
     end
     nvals = length(spvals)
     sptypes = Vector{VarState}(undef, nvals)
@@ -618,7 +666,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
                 if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
                     # if this parameter came from `arg::Type{T}`,
                     # then `arg` is more precise than `Type{T} where lb<:T<:ub`
-                    ty = fieldtype(linfo.specTypes, j)
+                    ty = fieldtype(mi.specTypes, j)
                     @goto ty_computed
                 elseif (va = va_from_vatuple(sⱼ)) !== nothing
                     # if this parameter came from `::Tuple{.., Vararg{T,vᵢ}}`,
@@ -649,8 +697,8 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
                 # type variables, we can use it for a more accurate analysis of whether `v`
                 # is constrained or not, otherwise we should use `def.sig` which always
                 # doesn't contain any free type variables
-                if !has_free_typevars(linfo.specTypes)
-                    sig = linfo.specTypes
+                if !has_free_typevars(mi.specTypes)
+                    sig = mi.specTypes
                 end
                 @assert !has_free_typevars(sig)
                 constrains_param(v, sig, #=covariant=#true)
@@ -695,10 +743,7 @@ function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize
         for r in frame.ssavalue_uses[ssa_id]
             if was_reached(frame, r)
                 usebb = block_for_inst(frame.cfg, r)
-                # We're guaranteed to visit the statement if it's in the current
-                # basic block, since SSA values can only ever appear after their
-                # def.
-                if usebb != frame.currbb
+                if usebb != frame.currbb || r < ssa_id
                     push!(W, usebb)
                 end
             end
@@ -707,9 +752,9 @@ function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize
     return nothing
 end
 
-function add_cycle_backedge!(caller::InferenceState, frame::InferenceState, currpc::Int)
+function add_cycle_backedge!(caller::InferenceState, frame::InferenceState)
     update_valid_age!(caller, frame.valid_worlds)
-    backedge = (caller, currpc)
+    backedge = (caller, caller.currpc)
     contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
     add_backedge!(caller, frame.linfo)
     return frame
@@ -717,37 +762,24 @@ end
 
 function get_stmt_edges!(caller::InferenceState, currpc::Int=caller.currpc)
     stmt_edges = caller.stmt_edges
-    edges = stmt_edges[currpc]
-    if edges === nothing
-        edges = stmt_edges[currpc] = []
+    if !isassigned(stmt_edges, currpc)
+        return stmt_edges[currpc] = Any[]
+    else
+        return stmt_edges[currpc]
     end
-    return edges
 end
 
 function empty_backedges!(frame::InferenceState, currpc::Int=frame.currpc)
-    edges = frame.stmt_edges[currpc]
-    edges === nothing || empty!(edges)
-    return nothing
-end
-
-function print_callstack(sv::InferenceState)
-    while sv !== nothing
-        print(sv.linfo)
-        is_cached(sv) || print("  [uncached]")
-        println()
-        for cycle in sv.callers_in_cycle
-            print(' ', cycle.linfo)
-            println()
-        end
-        sv = sv.parent
+    if isassigned(frame.stmt_edges, currpc)
+        empty!(frame.stmt_edges[currpc])
     end
+    return nothing
 end
 
 function narguments(sv::InferenceState, include_va::Bool=true)
-    def = sv.linfo.def
-    nargs = length(sv.result.argtypes)
+    nargs = Int(sv.src.nargs)
     if !include_va
-        nargs -= isa(def, Method) && def.isva
+        nargs -= sv.src.isva
     end
     return nargs
 end
@@ -769,7 +801,9 @@ mutable struct IRInterpretationState
     const lazyreachability::LazyCFGReachability
     valid_worlds::WorldRange
     const edges::Vector{Any}
-    parent # ::Union{Nothing,AbsIntState}
+    callstack #::Vector{AbsIntState}
+    frameid::Int
+    parentid::Int
 
     function IRInterpretationState(interp::AbstractInterpreter,
         method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
@@ -779,9 +813,12 @@ mutable struct IRInterpretationState
         for i = 1:length(given_argtypes)
             given_argtypes[i] = widenslotwrapper(argtypes[i])
         end
-        given_argtypes = va_process_argtypes(optimizer_lattice(interp), given_argtypes, mi)
-        argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
-            for i = 1:length(given_argtypes)]
+        if isa(mi.def, Method)
+            argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
+                for i = 1:length(given_argtypes)]
+        else
+            argtypes_refined = Bool[false for i = 1:length(given_argtypes)]
+        end
         empty!(ir.argtypes)
         append!(ir.argtypes, given_argtypes)
         tpdum = TwoPhaseDefUseMap(length(ir.stmts))
@@ -789,32 +826,56 @@ mutable struct IRInterpretationState
         lazyreachability = LazyCFGReachability(ir)
         valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
         edges = Any[]
-        parent = nothing
+        callstack = AbsIntState[]
         return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
-                   ssa_refined, lazyreachability, valid_worlds, edges, parent)
+                ssa_refined, lazyreachability, valid_worlds, edges, callstack, 0, 0)
     end
 end
 
 function IRInterpretationState(interp::AbstractInterpreter,
-    code::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
-    @assert code.def === mi
-    src = @atomic :monotonic code.inferred
+    codeinst::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
+    @assert codeinst.def === mi "method instance is not synced with code instance"
+    src = @atomic :monotonic codeinst.inferred
     if isa(src, String)
-        src = _uncompressed_ir(code, src)
+        src = _uncompressed_ir(codeinst, src)
     else
         isa(src, CodeInfo) || return nothing
     end
     method_info = MethodInfo(src)
     ir = inflate_ir(src, mi)
+    argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, src.nargs, src.isva)
     return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
-                                 code.min_world, code.max_world)
+                                 codeinst.min_world, codeinst.max_world)
 end
 
+
 # AbsIntState
 # ===========
 
 const AbsIntState = Union{InferenceState,IRInterpretationState}
 
+function print_callstack(frame::AbsIntState)
+    print("=================== Callstack: ==================\n")
+    frames = frame.callstack::Vector{AbsIntState}
+    for idx = (frame.frameid == 0 ? 0 : 1):length(frames)
+        sv = (idx == 0 ? frame : frames[idx])
+        idx == frame.frameid && print("*")
+        print("[")
+        print(idx)
+        if sv isa InferenceState && !isa(sv.interp, NativeInterpreter)
+            print(", ")
+            print(typeof(sv.interp))
+        end
+        print("] ")
+        print(frame_instance(sv))
+        is_cached(sv) || print("  [uncached]")
+        sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]")
+        println()
+        @assert sv.frameid == idx
+    end
+    print("================= End callstack ==================\n")
+end
+
 frame_instance(sv::InferenceState) = sv.linfo
 frame_instance(sv::IRInterpretationState) = sv.mi
 
@@ -825,10 +886,37 @@ function frame_module(sv::AbsIntState)
     return def.module
 end
 
-frame_parent(sv::InferenceState) = sv.parent::Union{Nothing,AbsIntState}
-frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
+function frame_parent(sv::InferenceState)
+    sv.parentid == 0 && return nothing
+    callstack = sv.callstack::Vector{AbsIntState}
+    sv = callstack[sv.cycleid]::InferenceState
+    sv.parentid == 0 && return nothing
+    return callstack[sv.parentid]
+end
+frame_parent(sv::IRInterpretationState) = sv.parentid == 0 ? nothing : (sv.callstack::Vector{AbsIntState})[sv.parentid]
+
+# add the orphan child to the parent and the parent to the child
+function assign_parentchild!(child::InferenceState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.cycleid = child.frameid = length(callstack)
+    nothing
+end
+function assign_parentchild!(child::IRInterpretationState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.frameid = length(callstack)
+    nothing
+end
 
-is_constproped(sv::InferenceState) = any(sv.result.overridden_by_const)
+function is_constproped(sv::InferenceState)
+    (;overridden_by_const) = sv.result
+    return overridden_by_const !== nothing
+end
 is_constproped(::IRInterpretationState) = true
 
 is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL)
@@ -843,9 +931,6 @@ method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_
 frame_world(sv::InferenceState) = sv.world
 frame_world(sv::IRInterpretationState) = sv.world
 
-callers_in_cycle(sv::InferenceState) = sv.callers_in_cycle
-callers_in_cycle(sv::IRInterpretationState) = ()
-
 function is_effect_overridden(sv::AbsIntState, effect::Symbol)
     if is_effect_overridden(frame_instance(sv), effect)
         return true
@@ -854,8 +939,8 @@ function is_effect_overridden(sv::AbsIntState, effect::Symbol)
     end
     return false
 end
-function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
-    def = linfo.def
+function is_effect_overridden(mi::MethodInstance, effect::Symbol)
+    def = mi.def
     return isa(def, Method) && is_effect_overridden(def, effect)
 end
 is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
@@ -882,20 +967,39 @@ Note that cycles may be visited in any order.
 struct AbsIntStackUnwind
     sv::AbsIntState
 end
-iterate(unw::AbsIntStackUnwind) = (unw.sv, (unw.sv, 0))
-function iterate(unw::AbsIntStackUnwind, (sv, cyclei)::Tuple{AbsIntState, Int})
-    # iterate through the cycle before walking to the parent
-    callers = callers_in_cycle(sv)
-    if callers !== () && cyclei < length(callers)
-        cyclei += 1
-        parent = callers[cyclei]
-    else
-        cyclei = 0
-        parent = frame_parent(sv)
+iterate(unw::AbsIntStackUnwind) = (unw.sv, length(unw.sv.callstack::Vector{AbsIntState}))
+function iterate(unw::AbsIntStackUnwind, frame::Int)
+    frame == 0 && return nothing
+    return ((unw.sv.callstack::Vector{AbsIntState})[frame], frame - 1)
+end
+
+struct AbsIntCycle
+    frames::Vector{AbsIntState}
+    cycleid::Int
+    cycletop::Int
+end
+iterate(unw::AbsIntCycle) = unw.cycleid == 0 ? nothing : (unw.frames[unw.cycletop], unw.cycletop)
+function iterate(unw::AbsIntCycle, frame::Int)
+    frame == unw.cycleid && return nothing
+    return (unw.frames[frame - 1], frame - 1)
+end
+
+"""
+    callers_in_cycle(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract
+interpretation stack (including the given `AbsIntState` itself) that are part
+of the same cycle, only if it is part of a cycle with multiple frames.
+"""
+function callers_in_cycle(sv::InferenceState)
+    callstack = sv.callstack::Vector{AbsIntState}
+    cycletop = cycleid = sv.cycleid
+    while cycletop < length(callstack) && (callstack[cycletop + 1]::InferenceState).cycleid == cycleid
+        cycletop += 1
     end
-    parent === nothing && return nothing
-    return (parent, (parent, cyclei))
+    return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop)
 end
+callers_in_cycle(sv::IRInterpretationState) = AbsIntCycle(sv.callstack::Vector{AbsIntState}, 0, 0)
 
 # temporarily accumulate our edges to later add as backedges in the callee
 function add_backedge!(caller::InferenceState, mi::MethodInstance)
@@ -979,30 +1083,6 @@ bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceStat
 bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
     state.rt === Any
 
-function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
-    if InferenceParams(interp).unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-function should_infer_for_effects(sv::InferenceState)
-    def = sv.linfo.def
-    def isa Method || return false # toplevel frame will not be [semi-]concrete-evaluated
-    effects = sv.ipo_effects
-    override = decode_effects_override(def.purity)
-    effects.consistent === ALWAYS_FALSE && !is_effect_overridden(override, :consistent) && return false
-    effects.effect_free === ALWAYS_FALSE && !is_effect_overridden(override, :effect_free) && return false
-    !effects.terminates && !is_effect_overridden(override, :terminates_globally) && return false
-    return true
-end
-should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
-
 add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
 add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
 
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 1615781727669..fb712b1c71b12 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -23,39 +23,48 @@ const IR_FLAG_INBOUNDS    = one(UInt32) << 0
 const IR_FLAG_INLINE      = one(UInt32) << 1
 # This statement is marked as @noinline by user
 const IR_FLAG_NOINLINE    = one(UInt32) << 2
-# This statement is on a code path that eventually `throw`s.
-const IR_FLAG_THROW_BLOCK = one(UInt32) << 3
 # An optimization pass has updated this statement in a way that may
 # have exposed information that inference did not see. Re-running
 # inference on this statement may be profitable.
-const IR_FLAG_REFINED     = one(UInt32) << 4
+const IR_FLAG_REFINED     = one(UInt32) << 3
 # This statement is proven :consistent
-const IR_FLAG_CONSISTENT  = one(UInt32) << 5
+const IR_FLAG_CONSISTENT  = one(UInt32) << 4
 # This statement is proven :effect_free
-const IR_FLAG_EFFECT_FREE = one(UInt32) << 6
+const IR_FLAG_EFFECT_FREE = one(UInt32) << 5
 # This statement is proven :nothrow
-const IR_FLAG_NOTHROW     = one(UInt32) << 7
+const IR_FLAG_NOTHROW     = one(UInt32) << 6
 # This statement is proven :terminates
-const IR_FLAG_TERMINATES  = one(UInt32) << 8
+const IR_FLAG_TERMINATES  = one(UInt32) << 7
 # This statement is proven :noub
-const IR_FLAG_NOUB        = one(UInt32) << 9
+const IR_FLAG_NOUB        = one(UInt32) << 8
 # TODO: Both of these should eventually go away once
 # This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
-const IR_FLAG_EFIIMO      = one(UInt32) << 10
+const IR_FLAG_EFIIMO      = one(UInt32) << 9
 # This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
-const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 11
+const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 10
+# This statement is :nortcall
+const IR_FLAG_NORTCALL    = one(UInt32) << 11
+# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
+const IR_FLAG_UNUSED      = one(UInt32) << 12
 
-const NUM_IR_FLAGS = 12 # sync with julia.h
+const NUM_IR_FLAGS = 13 # sync with julia.h
 
 const IR_FLAGS_EFFECTS =
-    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_NOUB
+    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
+    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
 
-const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
 
 const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
 
 has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
 
+function iscallstmt(@nospecialize stmt)
+    stmt isa Expr || return false
+    head = stmt.head
+    return head === :call || head === :invoke || head === :foreigncall
+end
+
 function flags_for_effects(effects::Effects)
     flags = zero(UInt32)
     if is_consistent(effects)
@@ -69,12 +78,18 @@ function flags_for_effects(effects::Effects)
     if is_nothrow(effects)
         flags |= IR_FLAG_NOTHROW
     end
+    if is_terminates(effects)
+        flags |= IR_FLAG_TERMINATES
+    end
     if is_inaccessiblemem_or_argmemonly(effects)
         flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
     end
     if is_noub(effects)
         flags |= IR_FLAG_NOUB
     end
+    if is_nortcall(effects)
+        flags |= IR_FLAG_NORTCALL
+    end
     return flags
 end
 
@@ -115,22 +130,18 @@ function src_inlining_policy(interp::AbstractInterpreter,
         return src_inlineable
     elseif isa(src, IRCode)
         return true
-    elseif isa(src, SemiConcreteResult)
-        return true
     end
     @assert !isa(src, CodeInstance) # handled by caller
     return false
 end
 
-function inlining_policy end # deprecated legacy name used by Cthulhu
-
 struct InliningState{Interp<:AbstractInterpreter}
     edges::Vector{Any}
     world::UInt
     interp::Interp
 end
 function InliningState(sv::InferenceState, interp::AbstractInterpreter)
-    edges = sv.stmt_edges[1]::Vector{Any}
+    edges = sv.stmt_edges[1]
     return InliningState(edges, sv.world, interp)
 end
 function InliningState(interp::AbstractInterpreter)
@@ -160,7 +171,7 @@ function OptimizationState(sv::InferenceState, interp::AbstractInterpreter)
                              sv.sptypes, sv.slottypes, inlining, sv.cfg,
                              sv.unreachable, sv.bb_vartables, sv.insert_coverage)
 end
-function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
+function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
     # prepare src for running optimization passes if it isn't already
     nssavalues = src.ssavaluetypes
     if nssavalues isa Int
@@ -168,7 +179,7 @@ function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::Abstrac
     else
         nssavalues = length(src.ssavaluetypes::Vector{Any})
     end
-    sptypes = sptypes_from_meth_instance(linfo)
+    sptypes = sptypes_from_meth_instance(mi)
     nslots = length(src.slotflags)
     slottypes = src.slottypes
     if slottypes === nothing
@@ -176,7 +187,7 @@ function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::Abstrac
     end
     stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
     # cache some useful state computations
-    def = linfo.def
+    def = mi.def
     mod = isa(def, Method) ? def.module : def
     # Allow using the global MI cache, but don't track edges.
     # This method is mostly used for unit testing the optimizer
@@ -190,13 +201,13 @@ function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::Abstrac
             for slot = 1:nslots
         ])
     end
-    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
+    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
 end
-function OptimizationState(linfo::MethodInstance, interp::AbstractInterpreter)
+function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
     world = get_inference_world(interp)
-    src = retrieve_code_info(linfo, world)
+    src = retrieve_code_info(mi, world)
     src === nothing && return nothing
-    return OptimizationState(linfo, src, interp)
+    return OptimizationState(mi, src, interp)
 end
 
 function argextype end # imported by EscapeAnalysis
@@ -248,9 +259,8 @@ end
 
 _topmod(sv::OptimizationState) = _topmod(sv.mod)
 
-is_stmt_inline(stmt_flag::UInt32)      = has_flag(stmt_flag, IR_FLAG_INLINE)
-is_stmt_noinline(stmt_flag::UInt32)    = has_flag(stmt_flag, IR_FLAG_NOINLINE)
-is_stmt_throw_block(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_THROW_BLOCK)
+is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
+is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
 
 function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
     Targ = args[1]
@@ -287,12 +297,7 @@ function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src:
     return (false, true, true)
 end
 
-"""
-    stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact}) ->
-        (consistent::Bool, removable::Bool, nothrow::Bool)
-
-Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
-"""
+# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
 function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
     # TODO: We're duplicating analysis from inference here.
     isa(stmt, PiNode) && return (true, true, true)
@@ -302,8 +307,7 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
     isa(stmt, GotoNode) && return (true, false, true)
     isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
     if isa(stmt, GlobalRef)
-        nothrow = isdefined(stmt.mod, stmt.name)
-        consistent = nothrow && isconst(stmt.mod, stmt.name)
+        nothrow = consistent = isdefinedconst_globalref(stmt)
         return (consistent, nothrow, nothrow)
     elseif isa(stmt, Expr)
         (; head, args) = stmt
@@ -317,12 +321,6 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
             f = argextype(args[1], src)
             f = singleton_type(f)
             f === nothing && return (false, false, false)
-            if f === UnionAll
-                # TODO: This is a weird special case - should be determined in inference
-                argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
-                nothrow = _builtin_nothrow(𝕃ₒ, f, argtypes, rt)
-                return (true, nothrow, nothrow)
-            end
             if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
                 # TODO: these are not yet linearized
                 return (false, false, false)
@@ -335,7 +333,8 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
             consistent = is_consistent(effects)
             effect_free = is_effect_free(effects)
             nothrow = is_nothrow(effects)
-            removable = effect_free & nothrow
+            terminates = is_terminates(effects)
+            removable = effect_free & nothrow & terminates
             return (consistent, removable, nothrow)
         elseif head === :new
             return new_expr_effect_flags(𝕃ₒ, args, src)
@@ -346,7 +345,8 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
             consistent = is_consistent(effects)
             effect_free = is_effect_free(effects)
             nothrow = is_nothrow(effects)
-            removable = effect_free & nothrow
+            terminates = is_terminates(effects)
+            removable = effect_free & nothrow & terminates
             return (consistent, removable, nothrow)
         elseif head === :new_opaque_closure
             length(args) < 4 && return (false, false, false)
@@ -356,7 +356,7 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
             ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
             rt_lb = argextype(args[2], src)
             rt_ub = argextype(args[3], src)
-            source = argextype(args[4], src)
+            source = argextype(args[5], src)
             if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
                 return (false, false, false)
             end
@@ -386,7 +386,7 @@ function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt),
     elseif nothrow
         flag |= IR_FLAG_NOTHROW
     end
-    if !(isexpr(stmt, :call) || isexpr(stmt, :invoke))
+    if !iscallstmt(stmt)
         # There is a bit of a subtle point here, which is that some non-call
         # statements (e.g. PiNode) can be UB:, however, we consider it
         # illegal to introduce such statements that actually cause UB (for any
@@ -531,6 +531,8 @@ function any_stmt_may_throw(ir::IRCode, bb::Int)
     return false
 end
 
+visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
+    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
 function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
     visited = BitSet((bb,))
     worklist = Int[bb]
@@ -575,7 +577,7 @@ function get!(lazyagdomtree::LazyAugmentedDomtree)
             cfg_insert_edge!(cfg, bb, length(cfg.blocks))
         end
     end
-    domtree = construct_domtree(cfg.blocks)
+    domtree = construct_domtree(cfg)
     return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
 end
 
@@ -593,26 +595,28 @@ mutable struct PostOptAnalysisState
     all_nothrow::Bool
     all_noub::Bool
     any_conditional_ub::Bool
+    nortcall::Bool
     function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
         inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
         tpdum = TwoPhaseDefUseMap(length(ir.stmts))
         lazypostdomtree = LazyPostDomtree(ir)
         lazyagdomtree = LazyAugmentedDomtree(ir)
         return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
-                   true, true, nothing, true, true, false)
+                   true, true, nothing, true, true, false, true)
     end
 end
 
 give_up_refinements!(sv::PostOptAnalysisState) =
     sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
-    sv.all_nothrow = sv.all_noub = false
+    sv.all_nothrow = sv.all_noub = sv.nortcall = false
 
 function any_refinable(sv::PostOptAnalysisState)
     effects = sv.result.ipo_effects
     return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
             (!is_effect_free(effects) & sv.all_effect_free) |
             (!is_nothrow(effects) & sv.all_nothrow) |
-            (!is_noub(effects) & sv.all_noub))
+            (!is_noub(effects) & sv.all_noub) |
+            (!is_nortcall(effects) & sv.nortcall))
 end
 
 struct GetNativeEscapeCache{CodeCache}
@@ -657,7 +661,8 @@ function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
         effect_free = sv.all_effect_free ? ALWAYS_TRUE :
                       sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
         nothrow = sv.all_nothrow ? true : effects.nothrow,
-        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub)
+        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
+        nortcall = sv.nortcall ? true : effects.nortcall)
     return true
 end
 
@@ -673,7 +678,7 @@ function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
     f === nothing && return false
     if f === getfield
         nargs = 4
-    elseif f === memoryref || f === memoryrefget || f === memoryref_isassigned
+    elseif f === memoryrefnew || f === memoryrefget || f === memoryref_isassigned
         nargs = 4
     elseif f === memoryrefset!
         nargs = 5
@@ -782,6 +787,13 @@ function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
             sv.all_noub = false
         end
     end
+    if !has_flag(flag, IR_FLAG_NORTCALL)
+        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
+        # there's no need to taint with `:nortcall`, allowing concrete evaluation
+        if iscallstmt(stmt)
+            sv.nortcall = false
+        end
+    end
 end
 
 function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
@@ -828,31 +840,38 @@ function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
 
     stmt_inconsistent = scan_inconsistency!(inst, sv)
 
-    if stmt_inconsistent && inst.idx == lstmt
-        if isa(stmt, ReturnNode) && isdefined(stmt, :val)
+    if stmt_inconsistent
+        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
+            # Taint :consistent if this statement may raise since :consistent requires
+            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
             sv.all_retpaths_consistent = false
-        elseif isa(stmt, GotoIfNot)
-            # Conditional Branch with inconsistent condition.
-            # If we do not know this function terminates, taint consistency, now,
-            # :consistent requires consistent termination. TODO: Just look at the
-            # inconsistent region.
-            if !sv.result.ipo_effects.terminates
-                sv.all_retpaths_consistent = false
-            elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
-                       return any_stmt_may_throw(sv.ir, succ)
-                   end
-                # check if this `GotoIfNot` leads to conditional throws, which taints consistency
+        end
+        if inst.idx == lstmt
+            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
                 sv.all_retpaths_consistent = false
-            else
-                (; cfg, domtree) = get!(sv.lazyagdomtree)
-                for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
-                    if succ == length(cfg.blocks)
-                        # Phi node in the virtual exit -> We have a conditional
-                        # return. TODO: Check if all the retvals are egal.
-                        sv.all_retpaths_consistent = false
-                    else
-                        visit_bb_phis!(sv.ir, succ) do phiidx::Int
-                            push!(sv.inconsistent, phiidx)
+            elseif isa(stmt, GotoIfNot)
+                # Conditional Branch with inconsistent condition.
+                # If we do not know this function terminates, taint consistency, now,
+                # :consistent requires consistent termination. TODO: Just look at the
+                # inconsistent region.
+                if !sv.result.ipo_effects.terminates
+                    sv.all_retpaths_consistent = false
+                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
+                        return any_stmt_may_throw(sv.ir, succ)
+                    end
+                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
+                    sv.all_retpaths_consistent = false
+                else
+                    (; cfg, domtree) = get!(sv.lazyagdomtree)
+                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
+                        if succ == length(cfg.blocks)
+                            # Phi node in the virtual exit -> We have a conditional
+                            # return. TODO: Check if all the retvals are egal.
+                            sv.all_retpaths_consistent = false
+                        else
+                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
+                                push!(sv.inconsistent, phiidx)
+                            end
                         end
                     end
                 end
@@ -994,22 +1013,81 @@ function run_passes_ipo_safe(
     if is_asserts()
         @timeit "verify 3" begin
             verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp))
-            verify_linetable(ir.linetable)
+            verify_linetable(ir.debuginfo, length(ir.stmts))
         end
     end
     @label __done__  # used by @pass
     return ir
 end
 
-function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
-    linetable = ci.linetable
-    if !isa(linetable, Vector{LineInfoNode})
-        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
+function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
+    # Remove `nothing`s at the end, we don't handle them well
+    # (we expect the last instruction to be a terminator)
+    codelocs = debuginfo.codelocs
+    for i = length(code):-1:1
+        if code[i] !== nothing
+            resize!(code, i)
+            resize!(ssavaluetypes, i)
+            resize!(codelocs, 3i)
+            resize!(info, i)
+            resize!(ssaflags, i)
+            break
+        end
+    end
+    # If the last instruction is not a terminator, add one. This can
+    # happen for implicit return on dead branches.
+    term = code[end]
+    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
+        push!(code, ReturnNode())
+        push!(ssavaluetypes, Union{})
+        push!(codelocs, 0, 0, 0)
+        push!(info, NoCallInfo())
+        push!(ssaflags, IR_FLAG_NOTHROW)
+
+        # Update CFG to include appended terminator
+        old_range = cfg.blocks[end].stmts
+        new_range = StmtRange(first(old_range), last(old_range) + 1)
+        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
+        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
     end
+    nothing
+end
+
+function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
+    while true
+        next = getdebugidx(di, codeloc)
+        line = next[1]
+        line < 0 && return false # invalid info
+        line == 0 && next[2] == 0 && return false # no new info
+        prevloc <= 0 && return true # no old info
+        prev = getdebugidx(di, prevloc)
+        next === prev && return false # exactly identical
+        prevline = prev[1]
+        prevline < 0 && return true # previous invalid info, now valid
+        edge = next[2]
+        edge === prev[2] || return true # change to this edge
+        linetable = di.linetable
+        # check for change to line number here
+        if linetable === nothing || line == 0
+            line == prevline || return true
+        else
+            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
+        end
+        # check for change to edge here
+        edge == 0 && return false # no edge here
+        di = di.edges[Int(edge)]::DebugInfo
+        codeloc = Int(next[3])
+        prevloc = Int(prev[3])
+    end
+end
 
+function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
     # Update control-flow to reflect any unreachable branches.
     ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    code = copy_exprargs(ci.code)
+    ci.code = code = copy_exprargs(ci.code)
+    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
+    codelocs = di.codelocs
+    ssaflags = ci.ssaflags
     for i = 1:length(code)
         expr = code[i]
         if !(i in sv.unreachable)
@@ -1025,11 +1103,11 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
                     ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
                     expr = Expr(:call, Core.typeassert, expr.cond, Bool)
                 elseif i + 1 in sv.unreachable
-                    @assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
+                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
                     cfg_delete_edge!(sv.cfg, block, block + 1)
                     expr = GotoNode(expr.dest)
                 elseif expr.dest in sv.unreachable
-                    @assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
+                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
                     cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
                     expr = nothing
                 end
@@ -1049,6 +1127,20 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
                         code[i] = nothing
                     end
                 end
+            elseif isa(expr, PhiNode)
+                new_edges = Int32[]
+                new_vals = Any[]
+                for j = 1:length(expr.edges)
+                    edge = expr.edges[j]
+                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
+                    push!(new_edges, edge)
+                    if isassigned(expr.values, j)
+                        push!(new_vals, expr.values[j])
+                    else
+                        resize!(new_vals, length(new_edges))
+                    end
+                end
+                code[i] = PhiNode(new_edges, new_vals)
             end
         end
     end
@@ -1056,20 +1148,17 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
     # Go through and add an unreachable node after every
     # Union{} call. Then reindex labels.
     stmtinfo = sv.stmt_info
-    codelocs = ci.codelocs
-    ssaflags = ci.ssaflags
     meta = Expr[]
     idx = 1
     oldidx = 1
     nstmts = length(code)
     ssachangemap = labelchangemap = blockchangemap = nothing
-    prevloc = zero(eltype(ci.codelocs))
+    prevloc = 0
     while idx <= length(code)
-        codeloc = codelocs[idx]
-        if sv.insert_coverage && codeloc != prevloc && codeloc != 0
+        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
             # insert a side-effect instruction before the current instruction in the same basic block
             insert!(code, idx, Expr(:code_coverage_effect))
-            insert!(codelocs, idx, codeloc)
+            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
             insert!(ssavaluetypes, idx, Nothing)
             insert!(stmtinfo, idx, NoCallInfo())
             insert!(ssaflags, idx, IR_FLAG_NULL)
@@ -1088,7 +1177,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
             end
             blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
             idx += 1
-            prevloc = codeloc
+            prevloc = oldidx
         end
         if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
             # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
@@ -1108,24 +1197,23 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
                 # Any statements from here to the end of the block have been wrapped in Core.Const(...)
                 # by type inference (effectively deleting them). Only task left is to replace the block
                 # terminator with an explicit `unreachable` marker.
-                if block_end > idx
-                    code[block_end] = ReturnNode()
-                    codelocs[block_end] = codelocs[idx]
-                    ssavaluetypes[block_end] = Union{}
-                    stmtinfo[block_end] = NoCallInfo()
-                    ssaflags[block_end] = IR_FLAG_NOTHROW
 
-                    # Verify that type-inference did its job
+                if block_end > idx
                     if is_asserts()
+                        # Verify that type-inference did its job
                         for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
                             @assert i in sv.unreachable
                         end
                     end
-
+                    code[block_end] = ReturnNode()
+                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
+                    ssavaluetypes[block_end] = Union{}
+                    stmtinfo[block_end] = NoCallInfo()
+                    ssaflags[block_end] = IR_FLAG_NOTHROW
                     idx += block_end - idx
                 else
                     insert!(code, idx + 1, ReturnNode())
-                    insert!(codelocs, idx + 1, codelocs[idx])
+                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
                     insert!(ssavaluetypes, idx + 1, Union{})
                     insert!(stmtinfo, idx + 1, NoCallInfo())
                     insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
@@ -1151,6 +1239,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
         idx += 1
         oldidx += 1
     end
+    empty!(sv.unreachable)
 
     if ssachangemap !== nothing && labelchangemap !== nothing
         renumber_ir_elements!(code, ssachangemap, labelchangemap)
@@ -1162,14 +1251,14 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
     for i = 1:length(code)
         code[i] = process_meta!(meta, code[i])
     end
-    strip_trailing_junk!(ci, sv.cfg, code, stmtinfo)
+    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
     types = Any[]
     stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
     # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
     # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
     # and eliminates slots (see below)
     argtypes = sv.slottypes
-    return IRCode(stmts, sv.cfg, linetable, argtypes, meta, sv.sptypes)
+    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes)
 end
 
 function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
@@ -1183,14 +1272,13 @@ end
 function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
     # need `ci` for the slot metadata, IR for the code
     svdef = sv.linfo.def
-    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
-    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.stmt)
+    @timeit "domtree 1" domtree = construct_domtree(ir)
+    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
     𝕃ₒ = optimizer_lattice(sv.inlining.interp)
     @timeit "construct_ssa" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
     # NOTE now we have converted `ir` to the SSA form and eliminated slots
     # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
-    resize!(ir.argtypes, nargs)
+    resize!(ir.argtypes, ci.nargs)
     return ir
 end
 
@@ -1203,7 +1291,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
 function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                        params::OptimizationParams, error_path::Bool = false)
+                        params::OptimizationParams)
     #=const=# UNKNOWN_CALL_COST = 20
     head = ex.head
     if is_meta_expr_head(head)
@@ -1264,10 +1352,10 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 return 0
             elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
                 atyp = argextype(ex.args[2], src, sptypes)
-                return isknowntype(atyp) ? 1 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
             elseif f === Core.memoryrefset! && length(ex.args) >= 3
                 atyp = argextype(ex.args[2], src, sptypes)
-                return isknowntype(atyp) ? 5 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
             elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
                 return 1
             end
@@ -1283,7 +1371,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         if extyp === Union{}
             return 0
         end
-        return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+        return params.inline_nonleaf_penalty
     elseif head === :foreigncall
         foreigncall = ex.args[1]
         if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
@@ -1306,7 +1394,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         end
         a = ex.args[2]
         if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
+            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params))
         end
         return cost
     elseif head === :copyast
@@ -1320,8 +1408,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
     thiscost = 0
     dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
     if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, params,
-                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
+        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
     elseif stmt isa GotoNode
         # loops are generally always expensive
         # but assume that forward jumps are already counted for from
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
index f74cb90e6ab51..6967efe495be1 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
@@ -1213,6 +1213,7 @@ escape_builtin!(::typeof(Core.donotdelete), _...) = false
 # not really safe, but `ThrownEscape` will be imposed later
 escape_builtin!(::typeof(isdefined), _...) = false
 escape_builtin!(::typeof(throw), _...) = false
+escape_builtin!(::typeof(Core.throw_methoderror), _...) = false
 
 function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any})
     length(args) == 4 || return false
diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index dfe0550d7a06d..f6a30cdee4f17 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -202,7 +202,7 @@ DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0)
 """
 Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
 there are three separate length `n` arrays, `label`, `semi` and `ancestor`.
-Instead, for efficiency, we use one array in a array-of-structs style setup.
+Instead, for efficiency, we use one array in an array-of-structs style setup.
 """
 struct SNCAData
     semi::PreNumber
@@ -345,10 +345,7 @@ function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, m
     ancestors = copy(D.to_parent_pre)
     relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre)
     for w::PreNumber in reverse(relevant_blocks)
-        # LLVM initializes this to the parent, the paper initializes this to
-        # `w`, but it doesn't really matter (the parent is a predecessor, so at
-        # worst we'll discover it below). Save a memory reference here.
-        semi_w = typemax(PreNumber)
+        semi_w = ancestors[w]
         last_linked = PreNumber(w + 1)
         for v ∈ dom_edges(domtree, blocks, D.from_pre[w])
             # For the purpose of the domtree, ignore virtual predecessors into
@@ -660,6 +657,8 @@ end
 Compute the nearest common (post-)dominator of `a` and `b`.
 """
 function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
+    a == 0 && return a
+    b == 0 && return b
     alevel = domtree.nodes[a].level
     blevel = domtree.nodes[b].level
     # W.l.g. assume blevel <= alevel
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index a89583494ee13..70318b9e1a979 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -12,14 +12,16 @@ struct InliningTodo
     mi::MethodInstance
     # The IR of the inlinee
     ir::IRCode
+    # The DebugInfo table for the inlinee
+    di::DebugInfo
     # If the function being inlined is a single basic block we can use a
     # simpler inlining algorithm. This flag determines whether that's allowed
     linear_inline_eligible::Bool
     # Effects of the call statement
     effects::Effects
 end
-function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
-    return InliningTodo(mi, ir, linear_inline_eligible(ir), effects)
+function InliningTodo(mi::MethodInstance, (ir, di)::Tuple{IRCode, DebugInfo}, effects::Effects)
+    return InliningTodo(mi, ir, di, linear_inline_eligible(ir), effects)
 end
 
 struct ConstantCase
@@ -78,7 +80,7 @@ function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds
     @timeit "analysis" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, OptimizationParams(state.interp))
+    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, state.interp)
     return ir
 end
 
@@ -298,69 +300,29 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
-# duplicated from IRShow
-function normalize_method_name(m)
-    if m isa Method
-        return m.name
-    elseif m isa MethodInstance
-        return (m.def::Method).name
-    elseif m isa Symbol
-        return m
-    else
-        return Symbol("")
-    end
-end
-@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
-
-inline_node_is_duplicate(topline::LineInfoNode, line::LineInfoNode) =
-    topline.module === line.module &&
-    method_name(topline) === method_name(line) &&
-    topline.file === line.file &&
-    topline.line === line.line
-
-function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode,
-                              inlinee::MethodInstance, inlined_at::Int32)
-    inlinee_def = inlinee.def::Method
-    coverage = coverage_enabled(inlinee_def.module)
-    linetable_offset::Int32 = length(linetable)
-    # Append the linetable of the inlined function to our line table
-    topline::Int32 = linetable_offset + Int32(1)
-    coverage_by_path = JLOptions().code_coverage == 3
-    push!(linetable, LineInfoNode(inlinee_def.module, inlinee_def.name, inlinee_def.file, inlinee_def.line, inlined_at))
-    oldlinetable = inlinee_ir.linetable
-    extra_coverage_line = zero(Int32)
-    for oldline in eachindex(oldlinetable)
-        entry = oldlinetable[oldline]
-        if !coverage && coverage_by_path && is_file_tracked(entry.file)
-            # include topline coverage entry if in path-specific coverage mode, and any file falls under path
-            coverage = true
-        end
-        newentry = LineInfoNode(entry.module, entry.method, entry.file, entry.line,
-            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
-        if oldline == 1
-            # check for a duplicate on the first iteration (likely true)
-            if inline_node_is_duplicate(linetable[topline], newentry)
-                continue
-            else
-                linetable_offset += 1
-            end
+# TODO append `inlinee_debuginfo` to inner linetable when `inlined_at[2] ≠ 0`
+function ir_inline_linetable!(debuginfo::DebugInfoStream, inlinee_debuginfo::DebugInfo, inlined_at::NTuple{3,Int32})
+    # Append the linetable of the inlined function to our edges table
+    linetable_offset = 1
+    while true
+        if linetable_offset > length(debuginfo.edges)
+            push!(debuginfo.edges, inlinee_debuginfo)
+            break
+        elseif debuginfo.edges[linetable_offset] === inlinee_debuginfo
+            break
         end
-        push!(linetable, newentry)
+        linetable_offset += 1
     end
-    if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline
-        extra_coverage_line = topline
-    end
-    return linetable_offset, extra_coverage_line
+    return (inlined_at[1], Int32(linetable_offset), Int32(0))
 end
 
 function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-                              ir::IRCode, mi::MethodInstance, inlined_at::Int32, argexprs::Vector{Any})
+                              ir::IRCode, di::DebugInfo, mi::MethodInstance, inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
     def = mi.def::Method
-    linetable = inline_target isa IRCode ? inline_target.linetable : inline_target.ir.linetable
-    topline::Int32 = length(linetable) + Int32(1)
-    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, ir, mi, inlined_at)
-    if extra_coverage_line != 0
-        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
+    debuginfo = inline_target isa IRCode ? inline_target.debuginfo : inline_target.ir.debuginfo
+    topline = new_inlined_at = ir_inline_linetable!(debuginfo, di, inlined_at)
+    if should_insert_coverage(def.module, di)
+        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, topline))
     end
     spvals_ssa = nothing
     if !validate_sparams(mi.sparam_vals)
@@ -380,7 +342,7 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod
             NewInstruction(Expr(:call, GlobalRef(Core, :getfield), argexprs[1], QuoteNode(:captures)),
             ir.argtypes[1], topline))
     end
-    return SSASubstitute(mi, argexprs, spvals_ssa, linetable_offset)
+    return SSASubstitute(mi, argexprs, spvals_ssa, new_inlined_at)
 end
 
 function adjust_boundscheck!(inline_compact::IncrementalCompact, idx′::Int, stmt::Expr, boundscheck::Symbol)
@@ -396,13 +358,11 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                          item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
     inlined_at = compact.result[idx][:line]
-
-    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.mi, inlined_at, argexprs)
-
+    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.di, item.mi, inlined_at, argexprs)
     boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck
 
     # If the iterator already moved on to the next basic block,
-    # temporarily re-open in again.
+    # temporarily re-open it again.
     local return_value
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     new_new_offset = length(compact.new_new_nodes)
@@ -410,14 +370,16 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     if item.linear_inline_eligible
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
-        for ((_, idx′), stmt′) in inline_compact
+        @assert isempty(inline_compact.perm) && isempty(inline_compact.pending_perm) "linetable not in canonical form (missing compact call)"
+        for ((lineidx, idx′), stmt′) in inline_compact
             # This dance is done to maintain accurate usage counts in the
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
+            # alter the line number information for InsertBefore to point to the current instruction in the new linetable
+            inline_compact[SSAValue(idx′)][:line] = (ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(lineidx))
             insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
-            stmt′ = ssa_substitute!(insert_node!, inline_compact[SSAValue(idx′)], stmt′,
-                                    ssa_substitute)
+            stmt′ = ssa_substitute_op!(insert_node!, inline_compact[SSAValue(idx′)], stmt′, ssa_substitute)
             if isa(stmt′, ReturnNode)
                 val = stmt′.val
                 return_value = SSAValue(idx′)
@@ -444,11 +406,12 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         pn = PhiNode()
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
-        for ((_, idx′), stmt′) in inline_compact
+        @assert isempty(inline_compact.perm) && isempty(inline_compact.pending_perm) "linetable not in canonical form (missing compact call)"
+        for ((lineidx, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
+            inline_compact[SSAValue(idx′)][:line] = (ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(lineidx))
             insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
-            stmt′ = ssa_substitute!(insert_node!, inline_compact[SSAValue(idx′)], stmt′,
-                                    ssa_substitute)
+            stmt′ = ssa_substitute_op!(insert_node!, inline_compact[SSAValue(idx′)], stmt′, ssa_substitute)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -484,7 +447,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
 end
 
 function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-    argexprs::Vector{Any}, nargs_def::Int, line_idx::Int32)
+    argexprs::Vector{Any}, nargs_def::Int, line_idx::NTuple{3,Int32})
     newargexprs = argexprs[1:(nargs_def-1)]
     tuple_call = Expr(:call, TOP_TUPLE)
     tuple_typs = Any[]
@@ -559,7 +522,7 @@ assuming their order stays the same post-discovery in `ml_matches`.
 """
 function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
                                union_split::UnionSplit, boundscheck::Symbol,
-                               todo_bbs::Vector{Tuple{Int,Int}}, params::OptimizationParams)
+                               todo_bbs::Vector{Tuple{Int,Int}}, interp::AbstractInterpreter)
     (; fully_covered, atype, cases, bbs) = union_split
     stmt, typ, line = compact.result[idx][:stmt], compact.result[idx][:type], compact.result[idx][:line]
     join_bb = bbs[end]
@@ -577,20 +540,22 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
         @assert nparams == fieldcount(mtype)
         if !(i == ncases && fully_covered)
             for i = 1:nparams
-                a, m = fieldtype(atype, i), fieldtype(mtype, i)
+                aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
                 # If this is always true, we don't need to check for it
-                a <: m && continue
+                aft <: mft && continue
                 # Generate isa check
-                isa_expr = Expr(:call, isa, argexprs[i], m)
-                ssa = insert_node_here!(compact, NewInstruction(isa_expr, Bool, line))
+                isa_expr = Expr(:call, isa, argexprs[i], mft)
+                isa_type = isa_tfunc(optimizer_lattice(interp), argextype(argexprs[i], compact), Const(mft))
+                ssa = insert_node_here!(compact, NewInstruction(isa_expr, isa_type, line))
                 if cond === true
                     cond = ssa
                 else
                     and_expr = Expr(:call, and_int, cond, ssa)
-                    cond = insert_node_here!(compact, NewInstruction(and_expr, Bool, line))
+                    and_type = and_int_tfunc(optimizer_lattice(interp), argextype(cond, compact), isa_type)
+                    cond = insert_node_here!(compact, NewInstruction(and_expr, and_type, line))
                 end
             end
-            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Union{}, line))
+            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Any, line))
         end
         bb = next_cond_bb - 1
         finish_current_bb!(compact, 0)
@@ -600,10 +565,12 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
             for i = 1:nparams
                 argex = argexprs[i]
                 (isa(argex, SSAValue) || isa(argex, Argument)) || continue
-                a, m = fieldtype(atype, i), fieldtype(mtype, i)
-                if !(a <: m)
+                aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
+                if !(aft <: mft)
+                    𝕃ₒ = optimizer_lattice(interp)
+                    narrowed_type = tmeet(𝕃ₒ, argextype(argex, compact), mft)
                     argexprs′[i] = insert_node_here!(compact,
-                        NewInstruction(PiNode(argex, m), m, line))
+                        NewInstruction(PiNode(argex, mft), narrowed_type, line))
                 end
             end
         end
@@ -621,7 +588,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
             push!(pn.edges, bb)
             push!(pn.values, val)
             insert_node_here!(compact,
-                NewInstruction(GotoNode(join_bb), Union{}, line))
+                NewInstruction(GotoNode(join_bb), Any, line))
         else
             insert_node_here!(compact,
                 NewInstruction(ReturnNode(), Union{}, line))
@@ -634,7 +601,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
         ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line))
         push!(pn.edges, bb)
         push!(pn.values, ssa)
-        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Union{}, line))
+        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Any, line))
         finish_current_bb!(compact, 0)
     end
 
@@ -642,7 +609,8 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
     return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
-function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, params::OptimizationParams)
+function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, interp::AbstractInterpreter)
+    params = OptimizationParams(interp)
     # Compute the new CFG first (modulo statement ranges, which will be computed below)
     state = CFGInliningState(ir)
     for (idx, item) in todo
@@ -699,7 +667,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
                 if isa(item, InliningTodo)
                     compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, item, boundscheck, state.todo_bbs)
                 elseif isa(item, UnionSplit)
-                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, item, boundscheck, state.todo_bbs, params)
+                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, item, boundscheck, state.todo_bbs, interp)
                 end
                 compact[idx] = nothing
                 refinish && finish_current_bb!(compact, 0)
@@ -979,7 +947,8 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     if !match.fully_covers
         # type-intersection was not able to give us a simple list of types, so
         # ir_inline_unionsplit won't be able to deal with inlining this
-        if !(spec_types isa DataType && length(spec_types.parameters) == length(argtypes) && !isvarargtype(spec_types.parameters[end]))
+        if !(spec_types isa DataType && length(spec_types.parameters) == npassedargs &&
+             !isvarargtype(spec_types.parameters[end]))
             return nothing
         end
     end
@@ -996,19 +965,20 @@ end
 
 function retrieve_ir_for_inlining(cached_result::CodeInstance, src::MaybeCompressed)
     src = _uncompressed_ir(cached_result, src)::CodeInfo
-    return inflate_ir!(src, cached_result.def)
+    return inflate_ir!(src, cached_result.def), src.debuginfo
 end
 function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool)
     if preserve_local_sources
         src = copy(src)
     end
-    return inflate_ir!(src, mi)
+    return inflate_ir!(src, mi), src.debuginfo
 end
 function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local_sources::Bool)
     if preserve_local_sources
         ir = copy(ir)
     end
-    return ir
+    ir.debuginfo.def = mi
+    return ir, DebugInfo(ir.debuginfo, length(ir.stmts))
 end
 
 function handle_single_case!(todo::Vector{Pair{Int,Any}},
@@ -1256,7 +1226,8 @@ end
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, state::InliningState)
+function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, flag::UInt32,
+                         state::InliningState)
     inst = ir[SSAValue(idx)]
     stmt = inst[:stmt]
     if !(stmt isa Expr)
@@ -1287,7 +1258,7 @@ function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stat
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
-    earlyres = early_inline_special_case(ir, stmt, rt, sig, state)
+    earlyres = early_inline_special_case(ir, stmt, flag, rt, sig, state)
     if isa(earlyres, SomeCase)
         inst[:stmt] = earlyres.val
         return nothing
@@ -1316,7 +1287,7 @@ function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stat
     end
 
     # Special case inliners for regular functions
-    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state)
+    lateres = late_inline_special_case!(ir, idx, stmt, flag, rt, sig, state)
     if isa(lateres, SomeCase)
         inst[:stmt] = lateres.val
         add_inst_flag!(inst, ir, state)
@@ -1329,16 +1300,16 @@ end
 function handle_any_const_result!(cases::Vector{InliningCase},
     @nospecialize(result), match::MethodMatch, argtypes::Vector{Any},
     @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
+    allow_typevars::Bool)
     if isa(result, ConcreteResult)
-        return handle_concrete_result!(cases, result, info, state)
+        return handle_concrete_result!(cases, result, match, info, state)
     elseif isa(result, SemiConcreteResult)
-        return handle_semi_concrete_result!(cases, result, info, flag, state; allow_abstract)
+        return handle_semi_concrete_result!(cases, result, match, info, flag, state)
     elseif isa(result, ConstPropResult)
-        return handle_const_prop_result!(cases, result, info, flag, state; allow_abstract, allow_typevars)
+        return handle_const_prop_result!(cases, result, match, info, flag, state; allow_typevars)
     else
         @assert result === nothing || result isa VolatileInferenceResult
-        return handle_match!(cases, match, argtypes, info, flag, state; allow_abstract, allow_typevars, volatile_inf_result = result)
+        return handle_match!(cases, match, argtypes, info, flag, state; allow_typevars, volatile_inf_result = result)
     end
 end
 
@@ -1367,12 +1338,10 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig
     nunion === nothing && return nothing
     cases = InliningCase[]
     argtypes = sig.argtypes
-    local handled_all_cases::Bool = true
-    local revisit_idx = local only_method = nothing
-    local meth::MethodLookupResult
+    local handled_all_cases = local fully_covered = true
+    local revisit_idx = nothing
     local all_result_count = 0
-    local joint_effects::Effects = EFFECTS_TOTAL
-    local fully_covered::Bool = true
+    local joint_effects = EFFECTS_TOTAL
     for i = 1:nunion
         meth = getsplit(info, i)
         if meth.ambig
@@ -1383,37 +1352,29 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig
             # No applicable methods; try next union split
             handled_all_cases = false
             continue
-        else
-            if length(meth) == 1 && only_method !== missing
-                if only_method === nothing
-                    only_method = meth[1].method
-                elseif only_method !== meth[1].method
-                    only_method = missing
-                end
-            else
-                only_method = missing
-            end
         end
-        local split_fully_covered::Bool = false
+        local split_fully_covered = false
         for (j, match) in enumerate(meth)
             all_result_count += 1
             result = getresult(info, all_result_count)
             joint_effects = merge_effects(joint_effects, info_effects(result, match, state))
             split_fully_covered |= match.fully_covers
             if !validate_sparams(match.sparams)
-                if !match.fully_covers
-                    handled_all_cases = false
-                    continue
-                end
-                if revisit_idx === nothing
-                    revisit_idx = (i, j, all_result_count)
+                if match.fully_covers
+                    if revisit_idx === nothing
+                        revisit_idx = (i, j, all_result_count)
+                    else
+                        handled_all_cases = false
+                        revisit_idx = nothing
+                    end
                 else
                     handled_all_cases = false
-                    revisit_idx = nothing
                 end
+            elseif !(match.spec_types <: match.method.sig) # the requirement for correct union-split
+                handled_all_cases = false
             else
                 handled_all_cases &= handle_any_const_result!(cases,
-                    result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=false)
+                    result, match, argtypes, info, flag, state; allow_typevars=false)
             end
         end
         fully_covered &= split_fully_covered
@@ -1421,33 +1382,17 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig
 
     (handled_all_cases & fully_covered) || (joint_effects = Effects(joint_effects; nothrow=false))
 
-    if handled_all_cases && revisit_idx !== nothing
-        # we handled everything except one match with unmatched sparams,
-        # so try to handle it by bypassing validate_sparams
-        (i, j, k) = revisit_idx
-        match = getsplit(info, i)[j]
-        result = getresult(info, k)
-        handled_all_cases &= handle_any_const_result!(cases,
-            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
-    elseif length(cases) == 0 && only_method isa Method
-        # if the signature is fully covered and there is only one applicable method,
-        # we can try to inline it even in the presence of unmatched sparams
-        # -- But don't try it if we already tried to handle the match in the revisit_idx
-        # case, because that'll (necessarily) be the same method.
-        if nsplit(info)::Int > 1
-            atype = argtypes_to_type(argtypes)
-            (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), atype, only_method.sig)::SimpleVector
-            match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
-            result = nothing
-        else
-            @assert length(meth) == 1
-            match = meth[1]
-            result = getresult(info, 1)
+    if handled_all_cases
+        if revisit_idx !== nothing
+            # we handled everything except one match with unmatched sparams,
+            # so try to handle it by bypassing validate_sparams
+            (i, j, k) = revisit_idx
+            match = getsplit(info, i)[j]
+            result = getresult(info, k)
+            handled_all_cases &= handle_any_const_result!(cases,
+                result, match, argtypes, info, flag, state; allow_typevars=true)
         end
-        handle_any_const_result!(cases,
-            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
-        fully_covered = handled_all_cases = match.fully_covers
-    elseif !handled_all_cases
+    elseif !isempty(cases)
         # if we've not seen all candidates, union split is valid only for dispatch tuples
         filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
@@ -1461,16 +1406,15 @@ function handle_call!(todo::Vector{Pair{Int,Any}},
     cases = compute_inlining_cases(info, flag, sig, state)
     cases === nothing && return nothing
     cases, all_covered, joint_effects = cases
-    handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, joint_effects)
+    atype = argtypes_to_type(sig.argtypes)
+    handle_cases!(todo, ir, idx, stmt, atype, cases, all_covered, joint_effects)
 end
 
 function handle_match!(cases::Vector{InliningCase},
     match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt32,
     state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool, volatile_inf_result::Union{Nothing,VolatileInferenceResult})
+    allow_typevars::Bool, volatile_inf_result::Union{Nothing,VolatileInferenceResult})
     spec_types = match.spec_types
-    allow_abstract || isdispatchtuple(spec_types) || return false
     # We may see duplicated dispatch signatures here when a signature gets widened
     # during abstract interpretation: for the purpose of inlining, we can just skip
     # processing this dispatch candidate (unless unmatched type parameters are present)
@@ -1481,12 +1425,11 @@ function handle_match!(cases::Vector{InliningCase},
     return true
 end
 
-function handle_const_prop_result!(cases::Vector{InliningCase},
-    result::ConstPropResult, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
+function handle_const_prop_result!(cases::Vector{InliningCase}, result::ConstPropResult,
+    match::MethodMatch, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_typevars::Bool)
     mi = result.result.linfo
-    spec_types = mi.specTypes
-    allow_abstract || isdispatchtuple(spec_types) || return false
+    spec_types = match.spec_types
     if !validate_sparams(mi.sparam_vals)
         (allow_typevars && !may_have_fcalls(mi.def::Method)) || return false
     end
@@ -1520,11 +1463,9 @@ function semiconcrete_result_item(result::SemiConcreteResult,
 end
 
 function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult,
-        @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
-        allow_abstract::Bool)
+    match::MethodMatch, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
     mi = result.mi
-    spec_types = mi.specTypes
-    allow_abstract || isdispatchtuple(spec_types) || return false
+    spec_types = match.spec_types
     validate_sparams(mi.sparam_vals) || return false
     item = semiconcrete_result_item(result, info, flag, state)
     item === nothing && return false
@@ -1532,10 +1473,11 @@ function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiC
     return true
 end
 
-function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
+function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult,
+    match::MethodMatch, @nospecialize(info::CallInfo), state::InliningState)
     case = concrete_result_item(result, info, state)
     case === nothing && return false
-    push!(cases, InliningCase(result.mi.specTypes, case))
+    push!(cases, InliningCase(match.spec_types, case))
     return true
 end
 
@@ -1554,19 +1496,19 @@ function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallIn
 end
 
 function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr,
-    @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool,
+    @nospecialize(atype), cases::Vector{InliningCase}, all_covered::Bool,
     joint_effects::Effects)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
-    if fully_covered && length(cases) == 1
+    if all_covered && length(cases) == 1
         handle_single_case!(todo, ir, idx, stmt, cases[1].item)
     elseif length(cases) > 0
         isa(atype, DataType) || return nothing
         for case in cases
             isa(case.sig, DataType) || return nothing
         end
-        push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
+        push!(todo, idx=>UnionSplit(all_covered, atype, cases))
     else
         add_flag!(ir[SSAValue(idx)], flags_for_effects(joint_effects))
     end
@@ -1675,11 +1617,12 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
     todo = Pair{Int, Any}[]
 
     for idx in 1:length(ir.stmts)
-        simpleres = process_simple!(todo, ir, idx, state)
+        flag = ir.stmts[idx][:flag]
+
+        simpleres = process_simple!(todo, ir, idx, flag, state)
         simpleres === nothing && continue
         stmt, sig = simpleres
 
-        flag = ir.stmts[idx][:flag]
         info = ir.stmts[idx][:info]
 
         # `NativeInterpreter` won't need this, but provide a support for `:invoke` exprs here
@@ -1725,9 +1668,8 @@ function linear_inline_eligible(ir::IRCode)
     return true
 end
 
-function early_inline_special_case(
-    ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
-    state::InliningState)
+function early_inline_special_case(ir::IRCode, stmt::Expr, flag::UInt32,
+                                   @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
 
@@ -1735,13 +1677,13 @@ function early_inline_special_case(
         val = type.val
         is_inlineable_constant(val) || return nothing
         if isa(f, IntrinsicFunction)
-            if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
+            if is_pure_intrinsic_infer(f) && has_flag(flag, IR_FLAG_NOTHROW)
                 return SomeCase(quoted(val))
             end
         elseif contains_is(_PURE_BUILTINS, f)
             return SomeCase(quoted(val))
         elseif contains_is(_EFFECT_FREE_BUILTINS, f)
-            if _builtin_nothrow(optimizer_lattice(state.interp), f, argtypes[2:end], type)
+            if has_flag(flag, IR_FLAG_NOTHROW)
                 return SomeCase(quoted(val))
             end
         elseif f === Core.get_binding_type
@@ -1782,12 +1724,11 @@ end
 # special-case some regular method calls whose results are not folded within `abstract_call_known`
 # (and thus `early_inline_special_case` doesn't handle them yet)
 # NOTE we manually inline the method bodies, and so the logic here needs to precisely sync with their definitions
-function late_inline_special_case!(
-    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
-    state::InliningState)
+function late_inline_special_case!(ir::IRCode, idx::Int, stmt::Expr, flag::UInt32,
+                                   @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
+    if length(argtypes) == 3 && f === Core.:(!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
         if isa(type, Const)
@@ -1797,10 +1738,10 @@ function late_inline_special_case!(
         cmp_call_ssa = insert_node!(ir, idx, removable_if_unused(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
         return SomeCase(not_call)
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
+    elseif length(argtypes) == 3 && f === Core.:(>:)
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(type, Const) && _builtin_nothrow(optimizer_lattice(state.interp), <:, Any[argtypes[3], argtypes[2]], type)
+        if isa(type, Const) && has_flag(flag, IR_FLAG_NOTHROW)
             return SomeCase(quoted(type.val))
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
@@ -1828,15 +1769,7 @@ struct SSASubstitute
     mi::MethodInstance
     arg_replacements::Vector{Any}
     spvals_ssa::Union{Nothing,SSAValue}
-    linetable_offset::Int32
-end
-
-function ssa_substitute!(insert_node!::Inserter, subst_inst::Instruction, @nospecialize(val),
-                         ssa_substitute::SSASubstitute)
-    if subst_inst[:line] != 0
-        subst_inst[:line] += ssa_substitute.linetable_offset
-    end
-    return ssa_substitute_op!(insert_node!, subst_inst, val, ssa_substitute)
+    inlined_at::NTuple{3,Int32} # TODO: add a map also, so that ssaidx doesn't need to equal inlined_idx?
 end
 
 function insert_spval!(insert_node!::Inserter, spvals_ssa::SSAValue, spidx::Int, do_isdefined::Bool)
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 80d6d5fde556d..960da88ddffc8 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -2,7 +2,8 @@
 
 Core.PhiNode() = Core.PhiNode(Int32[], Any[])
 
-isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) || isa(stmt, EnterNode) || isexpr(stmt, :leave)
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) ||
+    isa(stmt, ReturnNode) || isa(stmt, EnterNode) || isexpr(stmt, :leave)
 
 struct CFG
     blocks::Vector{BasicBlock}
@@ -174,6 +175,52 @@ function first_insert_for_bb(code::Vector{Any}, cfg::CFG, block::Int)
     return lastnonphiidx
 end
 
+# mutable version of the compressed DebugInfo
+mutable struct DebugInfoStream
+    def::Union{MethodInstance,Symbol,Nothing}
+    linetable::Union{Nothing,DebugInfo}
+    edges::Vector{DebugInfo}
+    firstline::Int32 # the starting line for this block (specified by having an index of 0)
+    codelocs::Vector{Int32} # for each statement:
+        # index into linetable (if defined), else a line number (in the file represented by def)
+        # then index into edges
+        # then index into edges[linetable]
+    function DebugInfoStream(codelocs::Vector{Int32})
+        return new(nothing, nothing, DebugInfo[], 0, codelocs)
+    end
+    # DebugInfoStream(def::Union{MethodInstance,Nothing}, di::DebugInfo, nstmts::Int) =
+    #     if debuginfo_file1(di.def) === debuginfo_file1(di.def)
+    #         new(def, di.linetable, Core.svec(di.edges...), getdebugidx(di, 0),
+    #             ccall(:jl_uncompress_codelocs, Any, (Any, Int), di.codelocs, nstmts)::Vector{Int32})
+    #     else
+    function DebugInfoStream(def::Union{MethodInstance,Nothing}, di::DebugInfo, nstmts::Int)
+        codelocs = zeros(Int32, nstmts * 3)
+        for i = 1:nstmts
+            codelocs[3i - 2] = i
+        end
+        return new(def, di, DebugInfo[], 0, codelocs)
+    end
+    global copy(di::DebugInfoStream) = new(di.def, di.linetable, di.edges, di.firstline, di.codelocs)
+end
+
+Core.DebugInfo(di::DebugInfoStream, nstmts::Int) =
+    DebugInfo(something(di.def), di.linetable, Core.svec(di.edges...),
+        ccall(:jl_compress_codelocs, Any, (Int32, Any, Int), di.firstline, di.codelocs, nstmts)::String)
+
+getdebugidx(debuginfo::DebugInfo, pc::Int) =
+    ccall(:jl_uncompress1_codeloc, NTuple{3,Int32}, (Any, Int), debuginfo.codelocs, pc)
+
+function getdebugidx(debuginfo::DebugInfoStream, pc::Int)
+    if 3 <= 3pc <= length(debuginfo.codelocs)
+        return (debuginfo.codelocs[3pc-2], debuginfo.codelocs[3pc-1], debuginfo.codelocs[3pc-0])
+    elseif pc == 0
+        return (Int32(debuginfo.firstline), Int32(0), Int32(0))
+    else
+        return (Int32(-1), Int32(0), Int32(0))
+    end
+end
+
+
 # SSA values that need renaming
 struct OldSSAValue
     id::Int
@@ -203,7 +250,6 @@ end
 
 const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
 
-
 # SSA-indexed nodes
 struct InstructionStream
     stmt::Vector{Any}
@@ -211,13 +257,16 @@ struct InstructionStream
     info::Vector{CallInfo}
     line::Vector{Int32}
     flag::Vector{UInt32}
+    function InstructionStream(stmts::Vector{Any}, type::Vector{Any}, info::Vector{CallInfo}, line::Vector{Int32}, flag::Vector{UInt32})
+        return new(stmts, type, info, line, flag)
+    end
 end
 function InstructionStream(len::Int)
     stmts = Vector{Any}(undef, len)
     types = Vector{Any}(undef, len)
     info = Vector{CallInfo}(undef, len)
     fill!(info, NoCallInfo())
-    lines = fill(Int32(0), len)
+    lines = fill(Int32(0), 3len)
     flags = fill(IR_FLAG_NULL, len)
     return InstructionStream(stmts, types, info, lines, flags)
 end
@@ -242,10 +291,10 @@ function resize!(stmts::InstructionStream, len)
     resize!(stmts.stmt, len)
     resize!(stmts.type, len)
     resize!(stmts.info, len)
-    resize!(stmts.line, len)
+    resize!(stmts.line, 3len)
     resize!(stmts.flag, len)
     for i in (old_length + 1):len
-        stmts.line[i] = 0
+        stmts.line[3i-2], stmts.line[3i-1], stmts.line[3i] = NoLineUpdate
         stmts.flag[i] = IR_FLAG_NULL
         stmts.info[i] = NoCallInfo()
     end
@@ -261,11 +310,20 @@ Instruction(is::InstructionStream) = Instruction(is, add_new_idx!(is))
 @inline function getindex(node::Instruction, fld::Symbol)
     (fld === :inst) && (fld = :stmt) # deprecated
     isdefined(node, fld) && return getfield(node, fld)
-    return getfield(getfield(node, :data), fld)[getfield(node, :idx)]
+    fldarray = getfield(getfield(node, :data), fld)
+    fldidx = getfield(node, :idx)
+    (fld === :line) && return (fldarray[3fldidx-2], fldarray[3fldidx-1], fldarray[3fldidx-0])
+    return fldarray[fldidx]
 end
 @inline function setindex!(node::Instruction, @nospecialize(val), fld::Symbol)
     (fld === :inst) && (fld = :stmt) # deprecated
-    getfield(getfield(node, :data), fld)[getfield(node, :idx)] = val
+    fldarray = getfield(getfield(node, :data), fld)
+    fldidx = getfield(node, :idx)
+    if fld === :line
+        (fldarray[3fldidx-2], fldarray[3fldidx-1], fldarray[3fldidx-0]) = val::NTuple{3,Int32}
+    else
+        fldarray[fldidx] = val
+    end
     return node
 end
 
@@ -274,7 +332,7 @@ function setindex!(is::InstructionStream, newval::Instruction, idx::Int)
     is.stmt[idx] = newval[:stmt]
     is.type[idx] = newval[:type]
     is.info[idx] = newval[:info]
-    is.line[idx] = newval[:line]
+    (is.line[3idx-2], is.line[3idx-1], is.line[3idx-0]) = newval[:line]
     is.flag[idx] = newval[:flag]
     return is
 end
@@ -314,14 +372,15 @@ struct NewInstruction
     stmt::Any
     type::Any
     info::CallInfo
-    line::Union{Int32,Nothing} # if nothing, copy the line from previous statement in the insertion location
+    line::Union{NTuple{3,Int32},Nothing} # if nothing, copy the line from previous statement in the insertion location
     flag::Union{UInt32,Nothing} # if nothing, IR flags will be recomputed on insertion
     function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
-                            line::Union{Int32,Nothing}, flag::Union{UInt32,Nothing})
+                            line::Union{NTuple{3,Int32},Int32,Nothing}, flag::Union{UInt32,Nothing})
+        line isa Int32 && (line = (line, zero(Int32), zero(Int32)))
         return new(stmt, type, info, line, flag)
     end
 end
-function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Int32,Nothing}=nothing)
+function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{NTuple{3,Int32},Int32,Nothing}=nothing)
     return NewInstruction(stmt, type, NoCallInfo(), line, nothing)
 end
 @nospecialize
@@ -329,7 +388,7 @@ function NewInstruction(newinst::NewInstruction;
     stmt::Any=newinst.stmt,
     type::Any=newinst.type,
     info::CallInfo=newinst.info,
-    line::Union{Int32,Nothing}=newinst.line,
+    line::Union{NTuple{3,Int32},Int32,Nothing}=newinst.line,
     flag::Union{UInt32,Nothing}=newinst.flag)
     return NewInstruction(stmt, type, info, line, flag)
 end
@@ -337,7 +396,7 @@ function NewInstruction(inst::Instruction;
     stmt::Any=inst[:stmt],
     type::Any=inst[:type],
     info::CallInfo=inst[:info],
-    line::Union{Int32,Nothing}=inst[:line],
+    line::Union{NTuple{3,Int32},Int32,Nothing}=inst[:line],
     flag::Union{UInt32,Nothing}=inst[:flag])
     return NewInstruction(stmt, type, info, line, flag)
 end
@@ -366,19 +425,27 @@ struct IRCode
     stmts::InstructionStream
     argtypes::Vector{Any}
     sptypes::Vector{VarState}
-    linetable::Vector{LineInfoNode}
+    debuginfo::DebugInfoStream
     cfg::CFG
     new_nodes::NewNodeStream
     meta::Vector{Expr}
 
-    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState})
-        return new(stmts, argtypes, sptypes, linetable, cfg, NewNodeStream(), meta)
+    function IRCode(stmts::InstructionStream, cfg::CFG, debuginfo::DebugInfoStream, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState})
+        return new(stmts, argtypes, sptypes, debuginfo, cfg, NewNodeStream(), meta)
     end
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
-        return new(stmts, ir.argtypes, ir.sptypes, ir.linetable, cfg, new_nodes, ir.meta)
+        di = ir.debuginfo
+        @assert di.codelocs === stmts.line
+        return new(stmts, ir.argtypes, ir.sptypes, di, cfg, new_nodes, ir.meta)
+    end
+    global function copy(ir::IRCode)
+        di = ir.debuginfo
+        stmts = copy(ir.stmts)
+        di = copy(di)
+        di.edges = copy(di.edges)
+        di.codelocs = stmts.line
+        return new(stmts, copy(ir.argtypes), copy(ir.sptypes), di, copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
     end
-    global copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
-        copy(ir.linetable), copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
 end
 
 """
@@ -389,14 +456,23 @@ for debugging and unit testing of IRCode APIs. The compiler itself should genera
 from the frontend or one of the caches.
 """
 function IRCode()
-    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], VarState[])
+    stmts = InstructionStream(1)
+    debuginfo = DebugInfoStream(stmts.line)
+    stmts.line[1] = 1
+    ir = IRCode(stmts, CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), debuginfo, Any[], Expr[], VarState[])
     ir[SSAValue(1)][:stmt] = ReturnNode(nothing)
     ir[SSAValue(1)][:type] = Nothing
     ir[SSAValue(1)][:flag] = 0x00
-    ir[SSAValue(1)][:line] = Int32(0)
+    ir[SSAValue(1)][:line] = NoLineUpdate
     return ir
 end
 
+construct_domtree(ir::IRCode) = construct_domtree(ir.cfg)
+construct_domtree(cfg::CFG) = construct_domtree(cfg.blocks)
+
+construct_postdomtree(ir::IRCode) = construct_postdomtree(ir.cfg)
+construct_postdomtree(cfg::CFG) = construct_postdomtree(cfg.blocks)
+
 function block_for_inst(ir::IRCode, inst::Int)
     if inst > length(ir.stmts)
         inst = ir.new_nodes.info[inst - length(ir.stmts)].pos
@@ -684,6 +760,7 @@ mutable struct IncrementalCompact
         perm = sort!(collect(eachindex(info)); by=i::Int->(2info[i].pos+info[i].attach_after, i))
         new_len = length(code.stmts) + length(info)
         result = InstructionStream(new_len)
+        code.debuginfo.codelocs = result.line
         used_ssas = fill(0, new_len)
         new_new_used_ssas = Vector{Int}()
         blocks = code.cfg.blocks
@@ -864,7 +941,7 @@ function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
 end
 
 function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
-    newline::Int32=newinst.line::Int32, newflag::UInt32=newinst.flag::UInt32)
+    newline::NTuple{3,Int32}=newinst.line::NTuple{3,Int32}, newflag::UInt32=newinst.flag::UInt32)
     node[:stmt] = newinst.stmt
     node[:type] = newinst.type
     node[:info] = newinst.info
@@ -942,7 +1019,8 @@ function did_just_finish_bb(compact)
     result_idx = compact.result_idx
     result_bbs = compact.cfg_transform.result_bbs
     (compact.active_result_bb == length(result_bbs) + 1) ||
-    result_idx == first(result_bbs[compact.active_result_bb].stmts)
+    (result_idx == first(result_bbs[compact.active_result_bb].stmts) &&
+     compact.active_result_bb != 1)
 end
 
 function maybe_reopen_bb!(compact)
@@ -954,7 +1032,7 @@ function maybe_reopen_bb!(compact)
 end
 
 function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
-    newline = newinst.line::Int32
+    newline = newinst.line::NTuple{3,Int32}
     refinish = false
     result_idx = compact.result_idx
     result_bbs = compact.cfg_transform.result_bbs
@@ -1525,17 +1603,15 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         end
 
         values = process_phinode_values(values, late_fixup, already_inserted_phi_arg, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
-        # Don't remove the phi node if it is before the definition of its value
-        # because doing so can create forward references. This should only
-        # happen with dead loops, but can cause problems when optimization
-        # passes look at all code, dead or not. This check should be
-        # unnecessary when DCE can remove those dead loops entirely, so this is
-        # just to be safe.
-        before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
-        if length(edges) == 1 && isassigned(values, 1) && !before_def &&
-                length(cfg_transforms_enabled ?
-                    result_bbs[bb_rename_succ[active_bb]].preds :
-                    compact.ir.cfg.blocks[active_bb].preds) == 1
+
+        # Quick egality check for PhiNode that may be replaced with its incoming
+        # value without needing to set the `Refined` flag. We can't do the actual
+        # refinement check, because we do not have access to the lattice here.
+        # Users may call `reprocess_phi_node!` inside the compaction loop to
+        # revisit PhiNodes with the proper lattice refinement check.
+        if may_replace_phi(values, cfg_transforms_enabled ?
+                result_bbs[bb_rename_succ[active_bb]] :
+                compact.ir.cfg.blocks[active_bb], idx) && argextype(values[1], compact) === inst[:type]
             # There's only one predecessor left - just replace it
             v = values[1]
             @assert !isa(v, NewSSAValue)
@@ -1586,6 +1662,38 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     return result_idx
 end
 
+function may_replace_phi(values::Vector{Any}, phi_bb::BasicBlock, idx::Int)
+    length(values) == 1 || return false
+    isassigned(values, 1) || return false
+    length(phi_bb.preds) == 1 || return false
+
+    # Don't remove the phi node if it is before the definition of its value
+    # because doing so can create forward references. This should only
+    # happen with dead loops, but can cause problems when optimization
+    # passes look at all code, dead or not. This check should be
+    # unnecessary when DCE can remove those dead loops entirely, so this is
+    # just to be safe.
+    v = values[1]
+    before_def = isa(v, OldSSAValue) && idx < v.id
+    return !before_def
+end
+
+function reprocess_phi_node!(𝕃ₒ::AbstractLattice, compact::IncrementalCompact, phi::PhiNode, old_idx::Int)
+    phi_bb = compact.active_result_bb
+    did_just_finish_bb(compact) && (phi_bb -= 1)
+    may_replace_phi(phi.values, compact.cfg_transform.result_bbs[phi_bb], compact.idx) || return false
+
+    # There's only one predecessor left - just replace it
+    v = phi.values[1]
+    if !⊑(𝕃ₒ, compact[compact.ssa_rename[old_idx]][:type], argextype(v, compact))
+        v = Refined(v)
+    end
+    compact.ssa_rename[old_idx] = v
+
+    delete_inst_here!(compact)
+    return true
+end
+
 function resize!(compact::IncrementalCompact, nnewnodes::Int)
     old_length = length(compact.result)
     resize!(compact.result, nnewnodes)
@@ -1596,6 +1704,8 @@ function resize!(compact::IncrementalCompact, nnewnodes::Int)
     return compact
 end
 
+const NoLineUpdate = (Int32(0), Int32(0), Int32(0))
+
 function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
                             old_result_idx::Int=compact.result_idx, unreachable::Bool=false)
     (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
@@ -1612,9 +1722,9 @@ function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
             length(compact.result) < old_result_idx && resize!(compact, old_result_idx)
             node = compact.result[old_result_idx]
             if unreachable
-                node[:stmt], node[:type], node[:line] = ReturnNode(), Union{}, 0
+                node[:stmt], node[:type], node[:line] = ReturnNode(), Union{}, NoLineUpdate
             else
-                node[:stmt], node[:type], node[:line], node[:flag] = nothing, Nothing, 0, IR_FLAGS_EFFECTS
+                node[:stmt], node[:type], node[:line], node[:flag] = nothing, Nothing, NoLineUpdate, IR_FLAGS_EFFECTS
             end
             compact.result_idx = old_result_idx + 1
         elseif cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
@@ -1825,7 +1935,10 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id
     stmt = inst[:stmt]
     stmt === nothing && return false
     inst[:type] === Bottom && return false
-    has_flag(inst, IR_FLAGS_REMOVABLE) || return false
+    if !has_flag(inst, IR_FLAGS_REMOVABLE)
+        add_flag!(inst, IR_FLAG_UNUSED)
+        return false
+    end
     foreachssa(stmt) do val::SSAValue
         if compact.used_ssas[val.id] == 1
             if val.id < idx || in_worklist
@@ -1956,9 +2069,11 @@ function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
     just_fixup!(compact)
-    bb = compact.cfg_transform.result_bbs[end]
-    compact.cfg_transform.result_bbs[end] = BasicBlock(bb,
-                StmtRange(first(bb.stmts), result_idx-1))
+    if !did_just_finish_bb(compact)
+        # Finish the bb now
+        finish_current_bb!(compact, 0)
+    end
+    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
     compact.renamed_new_nodes = true
     nothing
 end
@@ -1970,7 +2085,7 @@ function finish(compact::IncrementalCompact)
 end
 
 function complete(compact::IncrementalCompact)
-    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
+    result_bbs = compact.cfg_transform.result_bbs
     cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
     if should_check_ssa_counts()
         oracle_check(compact)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
index 3b6cfa9019d76..1aeb87accbcd7 100644
--- a/base/compiler/ssair/irinterp.jl
+++ b/base/compiler/ssair/irinterp.jl
@@ -5,39 +5,44 @@ function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
     return typ
 end
 
-function concrete_eval_invoke(interp::AbstractInterpreter,
-    inst::Expr, mi::MethodInstance, irsv::IRInterpretationState)
-    world = frame_world(irsv)
-    mi_cache = WorldView(code_cache(interp), world)
-    code = get(mi_cache, mi, nothing)
-    code === nothing && return Pair{Any,Tuple{Bool,Bool}}(nothing, (false, false))
-    argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv)
-    argtypes === nothing && return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, false))
-    effects = decode_effects(code.ipo_purity_bits)
+function concrete_eval_invoke(interp::AbstractInterpreter, ci::CodeInstance, argtypes::Vector{Any}, parent::IRInterpretationState)
+    world = frame_world(parent)
+    effects = decode_effects(ci.ipo_purity_bits)
     if (is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1) &&
         (is_nonoverlayed(interp) || is_nonoverlayed(effects)))
         args = collect_const_args(argtypes, #=start=#1)
-        value = let world = get_inference_world(interp)
-            try
-                Core._call_in_world_total(world, args...)
-            catch
-                return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, is_noub(effects)))
-            end
+        value = try
+            Core._call_in_world_total(world, args...)
+        catch
+            return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, is_noub(effects)))
         end
         return Pair{Any,Tuple{Bool,Bool}}(Const(value), (true, true))
     else
-        if is_constprop_edge_recursed(mi, irsv)
+        mi = ci.def
+        if is_constprop_edge_recursed(mi, parent)
             return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
         end
-        newirsv = IRInterpretationState(interp, code, mi, argtypes, world)
+        newirsv = IRInterpretationState(interp, ci, mi, argtypes, world)
         if newirsv !== nothing
-            newirsv.parent = irsv
+            assign_parentchild!(newirsv, parent)
             return ir_abstract_constant_propagation(interp, newirsv)
         end
         return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
     end
 end
 
+function abstract_eval_invoke_inst(interp::AbstractInterpreter, inst::Instruction, irsv::IRInterpretationState)
+    stmt = inst[:stmt]
+    mi = stmt.args[1]::MethodInstance
+    world = frame_world(irsv)
+    mi_cache = WorldView(code_cache(interp), world)
+    code = get(mi_cache, mi, nothing)
+    code === nothing && return Pair{Any,Tuple{Bool,Bool}}(nothing, (false, false))
+    argtypes = collect_argtypes(interp, stmt.args[2:end], nothing, irsv)
+    argtypes === nothing && return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, false))
+    return concrete_eval_invoke(interp, code, argtypes, irsv)
+end
+
 abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
 
 function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
@@ -46,9 +51,9 @@ end
 
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
     si = StmtInfo(true) # TODO better job here?
-    (; rt, exct, effects, info) = abstract_call(interp, arginfo, si, irsv)
-    irsv.ir.stmts[irsv.curridx][:info] = info
-    return RTEffects(rt, exct, effects)
+    call = abstract_call(interp, arginfo, si, irsv)
+    irsv.ir.stmts[irsv.curridx][:info] = call.info
+    return call
 end
 
 function kill_block!(ir::IRCode, bb::Int)
@@ -124,7 +129,6 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
             add_flag!(inst, IR_FLAG_NOTHROW)
             if condval
                 inst[:stmt] = nothing
-                inst[:type] = Any
                 kill_edge!(irsv, bb, stmt.dest)
             else
                 inst[:stmt] = GotoNode(stmt.dest)
@@ -137,11 +141,12 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
     rt = nothing
     if isa(stmt, Expr)
         head = stmt.head
-        if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined || head === :boundscheck
+        if (head === :call || head === :foreigncall || head === :new || head === :splatnew ||
+            head === :static_parameter || head === :isdefined || head === :boundscheck)
             (; rt, effects) = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
             add_flag!(inst, flags_for_effects(effects))
         elseif head === :invoke
-            rt, (nothrow, noub) = concrete_eval_invoke(interp, stmt, stmt.args[1]::MethodInstance, irsv)
+            rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv)
             if nothrow
                 add_flag!(inst, IR_FLAG_NOTHROW)
             end
@@ -163,6 +168,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
         elseif head === :leave
             return false
         else
+            Core.println(stmt)
             error("reprocess_instruction!: unhandled expression found")
         end
     elseif isa(stmt, PhiNode)
@@ -172,6 +178,9 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
     elseif isa(stmt, PhiCNode)
         # Currently not modeled
         return false
+    elseif isa(stmt, EnterNode)
+        # TODO: Propagate scope type changes
+        return false
     elseif isa(stmt, ReturnNode)
         # Handled at the very end
         return false
@@ -185,6 +194,13 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
         rt = argextype(stmt, irsv.ir)
     end
     if rt !== nothing
+        if has_flag(inst, IR_FLAG_UNUSED)
+            # Don't bother checking the type if we know it's unused
+            if has_flag(inst, IR_FLAGS_REMOVABLE)
+                inst[:stmt] = nothing
+            end
+            return false
+        end
         if isa(rt, Const)
             inst[:type] = rt
             if is_inlineable_constant(rt.val) && has_flag(inst, IR_FLAGS_REMOVABLE)
@@ -319,7 +335,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
             delete!(ssa_refined, idx)
         end
         check_ret!(stmt, idx)
-        is_terminator_or_phi = (isa(stmt, PhiNode) || isterminator(stmt))
+        is_terminator_or_phi = (isa(stmt, PhiNode) || stmt === nothing || isterminator(stmt))
         if typ === Bottom && !(idx == lstmt && is_terminator_or_phi)
             return true
         end
@@ -425,6 +441,12 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
         store_backedges(frame_instance(irsv), irsv.edges)
     end
 
+    if irsv.frameid != 0
+        callstack = irsv.callstack::Vector{AbsIntState}
+        @assert callstack[end] === irsv && length(callstack) == irsv.frameid
+        pop!(callstack)
+    end
+
     return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
 end
 
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 3e9a4e2a746dc..2b0721b8d2408 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -1,16 +1,22 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    inflate_ir!(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
+    inflate_ir!(ci::CodeInfo, mi::MethodInstance) -> ir::IRCode
     inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Inflates `ci::CodeInfo`-IR to `ir::IRCode`-format.
 This should be used with caution as it is a in-place transformation where the fields of
 the original `ci::CodeInfo` are modified.
 """
-function inflate_ir!(ci::CodeInfo, linfo::MethodInstance)
-    sptypes = sptypes_from_meth_instance(linfo)
-    argtypes, _ = matching_cache_argtypes(fallback_lattice, linfo)
+function inflate_ir!(ci::CodeInfo, mi::MethodInstance)
+    sptypes = sptypes_from_meth_instance(mi)
+    if ci.slottypes === nothing
+        argtypes = va_process_argtypes(fallback_lattice,
+            matching_cache_argtypes(fallback_lattice, mi),
+            ci.nargs, ci.isva)
+    else
+        argtypes = ci.slottypes[1:ci.nargs]
+    end
     return inflate_ir!(ci, sptypes, argtypes)
 end
 function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any})
@@ -35,24 +41,23 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{A
         ssavaluetypes = Any[ Any for i = 1:ssavaluetypes::Int ]
     end
     info = CallInfo[NoCallInfo() for i = 1:nstmts]
-    stmts = InstructionStream(code, ssavaluetypes, info, ci.codelocs, ci.ssaflags)
-    linetable = ci.linetable
-    if !isa(linetable, Vector{LineInfoNode})
-        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
-    end
+    di = DebugInfoStream(nothing, ci.debuginfo, nstmts)
+    stmts = InstructionStream(code, ssavaluetypes, info, di.codelocs, ci.ssaflags)
     meta = Expr[]
-    return IRCode(stmts, cfg, linetable, argtypes, meta, sptypes)
+    return IRCode(stmts, cfg, di, argtypes, meta, sptypes)
 end
 
 """
-    inflate_ir(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
     inflate_ir(ci::CodeInfo) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, mi::MethodInstance) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Non-destructive version of `inflate_ir!`.
 Mainly used for testing or interactive use.
 """
-inflate_ir(ci::CodeInfo, linfo::MethodInstance) = inflate_ir!(copy(ci), linfo)
+inflate_ir(ci::CodeInfo, mi::MethodInstance) = inflate_ir!(copy(ci), mi)
+inflate_ir(ci::CodeInfo, argtypes::Vector{Any}) = inflate_ir(ci, VarState[], argtypes)
 inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
 function inflate_ir(ci::CodeInfo)
     parent = ci.parent
@@ -73,15 +78,17 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
     stmts = ir.stmts
     code = ci.code = stmts.stmt
     ssavaluetypes = ci.ssavaluetypes = stmts.type
-    codelocs = ci.codelocs = stmts.line
+    codelocs = stmts.line
     ssaflags = ci.ssaflags = stmts.flag
-    linetable = ci.linetable = ir.linetable
+    debuginfo = ir.debuginfo
     for metanode in ir.meta
         push!(code, metanode)
-        push!(codelocs, 1)
+        push!(codelocs, 1, 0, 0)
         push!(ssavaluetypes, Any)
         push!(ssaflags, IR_FLAG_NULL)
     end
+    @assert debuginfo.codelocs === stmts.line "line table not from debuginfo"
+    ci.debuginfo = DebugInfo(debuginfo, length(code))
     # Translate BB Edges to statement edges
     # (and undo normalization for now)
     for i = 1:length(code)
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 0d855f38cd286..37d79e2bd7b0c 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -79,7 +79,7 @@ end
 function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
-    while !(curblock in allblocks) && curblock !== 0
+    while curblock ∉ allblocks && curblock ≠ 0
         curblock = domtree.idoms_bb[curblock]
     end
     return curblock
@@ -190,18 +190,21 @@ function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospec
     return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ)
 end
 
-function trivial_walker(@nospecialize(pi), @nospecialize(idx))
-    return nothing
-end
+abstract type WalkerCallback end
+
+struct TrivialWalker <: WalkerCallback end
+(::TrivialWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue)) = nothing
 
-function pi_walker(@nospecialize(pi), @nospecialize(idx))
-    if isa(pi, PiNode)
-        return LiftedValue(pi.val)
+struct PiWalker <: WalkerCallback end
+function (::PiWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        return LiftedValue(def.val)
     end
     return nothing
 end
 
-function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), callback=trivial_walker)
+function simple_walk(compact::IncrementalCompact, @nospecialize(defssa::AnySSAValue),
+                     walker_callback::WalkerCallback=TrivialWalker())
     while true
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
@@ -218,7 +221,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         end
         def = compact[defssa][:stmt]
         if isa(def, AnySSAValue)
-            callback(def, defssa)
+            walker_callback(def, defssa)
             if isa(def, SSAValue)
                 is_old(compact, defssa) && (def = OldSSAValue(def.id))
             end
@@ -226,7 +229,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         elseif isa(def, Union{PhiNode, PhiCNode, GlobalRef})
             return defssa
         else
-            new_def = callback(def, defssa)
+            new_def = walker_callback(def, defssa)
             if new_def === nothing
                 return defssa
             end
@@ -241,16 +244,21 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
     end
 end
 
-function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
-                                @nospecialize(typeconstraint))
-    callback = function (@nospecialize(pi), @nospecialize(idx))
-        if isa(pi, PiNode)
-            typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))
-            return LiftedValue(pi.val)
-        end
-        return nothing
+mutable struct TypeConstrainingWalker <: WalkerCallback
+    typeconstraint::Any
+    TypeConstrainingWalker(@nospecialize(typeconstraint::Any)) = new(typeconstraint)
+end
+function (walker_callback::TypeConstrainingWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        walker_callback.typeconstraint =
+            typeintersect(walker_callback.typeconstraint, widenconst(def.typ))
+        return LiftedValue(def.val)
     end
-    def = simple_walk(compact, defssa, callback)
+    return nothing
+end
+function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(val::AnySSAValue),
+                                @nospecialize(typeconstraint))
+    def = simple_walk(compact, val, TypeConstrainingWalker(typeconstraint))
     return Pair{Any, Any}(def, typeconstraint)
 end
 
@@ -443,8 +451,8 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
                     ocleaf = simple_walk(compact, ocleaf)
                 end
                 ocdef, _ = walk_to_def(compact, ocleaf)
-                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-4
-                    lift_arg!(compact, leaf, cache_key, ocdef, 4+field, lifted_leaves)
+                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-5
+                    lift_arg!(compact, leaf, cache_key, ocdef, 5+field, lifted_leaves)
                     continue
                 end
                 return nothing
@@ -638,15 +646,17 @@ end
 
 struct SkipToken end; const SKIP_TOKEN = SkipToken()
 
-function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value),
-                      lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int},
-                      walker_callback)
+function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa::AnySSAValue),
+                      @nospecialize(old_value), lifted_philikes::Vector{LiftedPhilike},
+                      lifted_leaves::Union{LiftedLeaves, LiftedDefs},
+                      reverse_mapping::IdDict{AnySSAValue, Int},
+                      walker_callback::WalkerCallback)
     val = old_value
     if is_old(compact, old_node_ssa) && isa(val, SSAValue)
         val = OldSSAValue(val.id)
     end
     if isa(val, AnySSAValue)
-        val = simple_walk(compact, val, def_walker(lifted_leaves, reverse_mapping, walker_callback))
+        val = simple_walk(compact, val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
     if val in keys(lifted_leaves)
         lifted_val = lifted_leaves[val]
@@ -656,7 +666,7 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=:
         lifted_val === nothing && return UNDEF_TOKEN
         val = lifted_val.val
         if isa(val, AnySSAValue)
-            val = simple_walk(compact, val, pi_walker)
+            val = simple_walk(compact, val, PiWalker())
         end
         return val
     elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
@@ -673,7 +683,7 @@ function is_old(compact, @nospecialize(old_node_ssa))
     return true
 end
 
-struct PhiNest{C}
+struct PhiNest{C<:WalkerCallback}
     visited_philikes::Vector{AnySSAValue}
     lifted_philikes::Vector{LiftedPhilike}
     lifted_leaves::Union{LiftedLeaves, LiftedDefs}
@@ -743,20 +753,29 @@ function finish_phi_nest!(compact::IncrementalCompact, nest::PhiNest)
     end
 end
 
-function def_walker(lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int}, walker_callback)
-    function (@nospecialize(walk_def), @nospecialize(defssa))
-        if (defssa in keys(lifted_leaves)) || (isa(defssa, AnySSAValue) && defssa in keys(reverse_mapping))
-            return nothing
-        end
-        isa(walk_def, PiNode) && return LiftedValue(walk_def.val)
-        return walker_callback(walk_def, defssa)
+struct LiftedLeaveWalker{C<:WalkerCallback} <: WalkerCallback
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
+    inner_walker_callback::C
+    function LiftedLeaveWalker(@nospecialize(lifted_leaves::Union{LiftedLeaves, LiftedDefs}),
+                               @nospecialize(reverse_mapping::IdDict{AnySSAValue, Int}),
+                               inner_walker_callback::C) where C<:WalkerCallback
+        return new{C}(lifted_leaves, reverse_mapping, inner_walker_callback)
+    end
+end
+function (walker_callback::LiftedLeaveWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    (; lifted_leaves, reverse_mapping, inner_walker_callback) = walker_callback
+    if defssa in keys(lifted_leaves) || defssa in keys(reverse_mapping)
+        return nothing
     end
+    isa(def, PiNode) && return LiftedValue(def.val)
+    return inner_walker_callback(def, defssa)
 end
 
 function perform_lifting!(compact::IncrementalCompact,
         visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
         @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
-        lazydomtree::Union{LazyDomtree,Nothing}, walker_callback = trivial_walker)
+        lazydomtree::Union{LazyDomtree,Nothing}, walker_callback::WalkerCallback = TrivialWalker())
     reverse_mapping = IdDict{AnySSAValue, Int}()
     for id in 1:length(visited_philikes)
         reverse_mapping[visited_philikes[id]] = id
@@ -839,7 +858,7 @@ function perform_lifting!(compact::IncrementalCompact,
 
     # Fixup the stmt itself
     if isa(stmt_val, Union{SSAValue, OldSSAValue})
-        stmt_val = simple_walk(compact, stmt_val, def_walker(lifted_leaves, reverse_mapping, walker_callback))
+        stmt_val = simple_walk(compact, stmt_val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
 
     if stmt_val in keys(lifted_leaves)
@@ -948,6 +967,17 @@ function keyvalue_predecessors(@nospecialize(key), 𝕃ₒ::AbstractLattice)
     end
 end
 
+struct KeyValueWalker <: WalkerCallback
+    compact::IncrementalCompact
+end
+function (walker_callback::KeyValueWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, walker_callback.compact)
+        @assert length(def.args) in (5, 6)
+        return LiftedValue(def.args[end-2])
+    end
+    return nothing
+end
+
 function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     collection = stmt.args[end-1]
     key = stmt.args[end]
@@ -964,16 +994,9 @@ function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 
         result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
     end
 
-    function keyvalue_walker(@nospecialize(def), _)
-        if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
-            @assert length(def.args) in (5, 6)
-            return LiftedValue(def.args[end-2])
-        end
-        return nothing
-    end
     (lifted_val, nest) = perform_lifting!(compact,
         visited_philikes, key, result_t, lifted_leaves, collection, nothing,
-        keyvalue_walker)
+        KeyValueWalker(compact))
 
     compact[idx] = lifted_val === nothing ? nothing : Expr(:call, GlobalRef(Core, :tuple), lifted_val.val)
     finish_phi_nest!(compact, nest)
@@ -1139,12 +1162,15 @@ end
 # which can be very large sometimes, and program counters in question are often very sparse
 const SPCSet = IdSet{Int}
 
-struct IntermediaryCollector
+struct IntermediaryCollector <: WalkerCallback
     intermediaries::SPCSet
 end
-function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa))
-    if !isa(pi, Expr)
-        push!(this.intermediaries, ssa.id)
+function (walker_callback::IntermediaryCollector)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if !(def isa Expr)
+        push!(walker_callback.intermediaries, defssa.id)
+        if def isa PiNode
+            return LiftedValue(def.val)
+        end
     end
     return nothing
 end
@@ -1225,13 +1251,24 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             bb = compact.active_result_bb - 1
             bbs = scope_mapping[bb]
             if isexpr(stmt, :leave) && bbs != SSAValue(0)
-                update_scope_mapping!(scope_mapping, bb+1, scope_mapping[block_for_inst(compact, bbs)])
-            else
-                update_scope_mapping!(scope_mapping, bb+1, bbs)
+                # Here we want to count the number of scopes that we're leaving,
+                # which is the same as the number of EnterNodes being referenced
+                # by `stmt.args`. Which have :scope set. In practice, the frontend
+                # does emit these in order, so we could simply go to the last one,
+                # but we want to avoid making that semantic assumption.
+                for i = 1:length(stmt.args)
+                    scope = stmt.args[i]
+                    scope === nothing && continue
+                    enter = compact[scope][:inst]
+                    @assert isa(enter, EnterNode)
+                    isdefined(enter, :scope) || continue
+                    bbs = scope_mapping[block_for_inst(compact, bbs)]
+                end
             end
+            update_scope_mapping!(scope_mapping, bb+1, bbs)
         end
         # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
-        is_setfield = is_isdefined = is_finalizer = is_keyvalue_get = false
+        is_setfield = is_isdefined = is_finalizer = false
         field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
             4 <= length(stmt.args) <= 5 || continue
@@ -1360,8 +1397,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         if ismutabletypename(struct_typ_name)
             isa(val, SSAValue) || continue
             let intermediaries = SPCSet()
-                callback = IntermediaryCollector(intermediaries)
-                def = simple_walk(compact, val, callback)
+                def = simple_walk(compact, val, IntermediaryCollector(intermediaries))
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
                 if defuses === nothing
@@ -1496,7 +1532,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
     end
 
     src_inlining_policy(inlining.interp, src, info, IR_FLAG_NULL) || return false
-    src = retrieve_ir_for_inlining(code, src)
+    src, di = retrieve_ir_for_inlining(code, src)
 
     # For now: Require finalizer to only have one basic block
     length(src.cfg.blocks) == 1 || return false
@@ -1505,8 +1541,8 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
     add_inlining_backedge!(et, mi)
 
     # TODO: Should there be a special line number node for inlined finalizers?
-    inlined_at = ir[SSAValue(idx)][:line]
-    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, mi, inlined_at, argexprs)
+    inline_at = ir[SSAValue(idx)][:line]
+    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, di, mi, inline_at, argexprs)
 
     # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
     ssa_rename = Vector{Any}(undef, length(src.stmts))
@@ -1518,12 +1554,8 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
             ssa_rename[ssa.id]
         end
         stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, ssa_substitute)
-        newline = inst[:line]
-        if newline != 0
-            newline += ssa_substitute.linetable_offset
-        end
         ssa_rename[idx′] = insert_node!(ir, idx,
-            NewInstruction(inst; stmt=stmt′, line=newline),
+            NewInstruction(inst; stmt=stmt′, line=(ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(idx′))),
             attach_after)
     end
 
@@ -1607,6 +1639,7 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
     end
     all(check_defuse, defuse.uses) || return nothing
     all(check_defuse, defuse.defs) || return nothing
+    bb_insert_block != 0 || return nothing # verify post-dominator of all uses exists
 
     # Check #3
     dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
@@ -1672,7 +1705,7 @@ end
 function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     lazypostdomtree = LazyPostDomtree(ir)
-    for (idx, (intermediaries, defuse)) in defuses
+    for (defidx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
         # Check if there are any uses we did not account for. If so, the variable
         # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
@@ -1680,16 +1713,15 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # show up in the nuses_total count.
         nleaves = length(defuse.uses) + length(defuse.defs)
         nuses = 0
-        for idx in intermediaries
-            nuses += used_ssas[idx]
+        for iidx in intermediaries
+            nuses += used_ssas[iidx]
         end
-        nuses_total = used_ssas[idx] + nuses - length(intermediaries)
+        nuses_total = used_ssas[defidx] + nuses - length(intermediaries)
         nleaves == nuses_total || continue
         # Find the type for this allocation
-        defexpr = ir[SSAValue(idx)][:stmt]
+        defexpr = ir[SSAValue(defidx)][:stmt]
         isexpr(defexpr, :new) || continue
-        newidx = idx
-        typ = unwrap_unionall(ir.stmts[newidx][:type])
+        typ = unwrap_unionall(ir.stmts[defidx][:type])
         # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
         typ = widenconst(typ)
@@ -1705,7 +1737,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             end
         end
         if finalizer_idx !== nothing && inlining !== nothing
-            try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining,
+            try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining,
                 lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
             continue
         end
@@ -1744,11 +1776,11 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
         ndefuse = length(fielddefuse)
-        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse)
+        blocks = Vector{Tuple{#=phiblocks=#Vector{Int},#=allblocks=#BitSet}}(undef, ndefuse)
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
             isempty(du.uses) && continue
-            push!(du.defs, newidx)
+            push!(du.defs, defidx)
             ldu = compute_live_ins(ir.cfg, du)
             if isempty(ldu.live_in_bbs)
                 phiblocks = Int[]
@@ -1761,7 +1793,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx)
+                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx)
                             ir[SSAValue(use.idx)][:stmt] = true
                         else
                             all_eliminated = false
@@ -1774,7 +1806,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                             continue
                         end
                     end
-                    has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
+                    has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx) || @goto skip
                 end
             else # always have some definition at the allocation site
                 for i = 1:length(du.uses)
@@ -1841,19 +1873,19 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # all "usages" (i.e. `getfield` and `isdefined` calls) are eliminated,
             # now eliminate "definitions" (i.e. `setfield!`) calls
             # (NOTE the allocation itself will be eliminated by DCE pass later)
-            for idx in du.defs
-                idx == newidx && continue # this is allocation
+            for didx in du.defs
+                didx == defidx && continue # this is allocation
                 # verify this statement won't throw, otherwise it can't be eliminated safely
-                ssa = SSAValue(idx)
-                if is_nothrow(ir, ssa)
-                    ir[ssa][:stmt] = nothing
+                setfield_ssa = SSAValue(didx)
+                if is_nothrow(ir, setfield_ssa)
+                    ir[setfield_ssa][:stmt] = nothing
                 else
                     # We can't eliminate this statement, because it might still
                     # throw an error, but we can mark it as effect-free since we
                     # know we have removed all uses of the mutable allocation.
                     # As a result, if we ever do prove nothrow, we can delete
                     # this statement then.
-                    add_flag!(ir[ssa], IR_FLAG_EFFECT_FREE)
+                    add_flag!(ir[setfield_ssa], IR_FLAG_EFFECT_FREE)
                 end
             end
         end
@@ -1862,7 +1894,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # this means all ccall preserves have been replaced with forwarded loads
             # so we can potentially eliminate the allocation, otherwise we must preserve
             # the whole allocation.
-            push!(intermediaries, newidx)
+            push!(intermediaries, defidx)
         end
         # Insert the new preserves
         for (useidx, new_preserves) in preserve_uses
@@ -2002,8 +2034,13 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     unionphis = Pair{Int,Any}[] # sorted
     compact = IncrementalCompact(ir, true)
     made_changes = false
-    for ((_, idx), stmt) in compact
+    for ((old_idx, idx), stmt) in compact
         if isa(stmt, PhiNode)
+            if reprocess_phi_node!(𝕃ₒ, compact, stmt, old_idx)
+                # Phi node has a single predecessor and was deleted
+                made_changes = true
+                continue
+            end
             push!(all_phis, idx)
             if is_some_union(compact.result[idx][:type])
                 push!(unionphis, Pair{Int,Any}(idx, Union{}))
@@ -2262,6 +2299,15 @@ function cfg_simplify!(ir::IRCode)
             elseif merge_into[idx] == 0 && is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
                 # If this BB is empty, we can still merge it as long as none of our successor's phi nodes
                 # reference our predecessors.
+                #
+                # This is for situations like:
+                #   #1 - ...
+                #        goto #3 if not ...
+                #   #2 - (empty)
+                #   #3 - ϕ(#2 => true, #1 => false)
+                #
+                # where we rely on the empty basic block to disambiguate the ϕ-node's value
+
                 found_interference = false
                 preds = Int[ascend_eliminated_preds(bbs, pred) for pred in bb.preds]
                 for idx in bbs[succ].stmts
@@ -2319,10 +2365,9 @@ function cfg_simplify!(ir::IRCode)
                     end
                     curr = merged_succ[curr]
                 end
-                terminator = ir[SSAValue(ir.cfg.blocks[curr].stmts[end])][:stmt]
-                if isa(terminator, GotoNode) || isa(terminator, ReturnNode)
-                    break
-                elseif isa(terminator, GotoIfNot)
+                terminator = ir[SSAValue(bbs[curr].stmts[end])][:stmt]
+
+                if isa(terminator, GotoIfNot)
                     if bb_rename_succ[terminator.dest] == 0
                         push!(worklist, terminator.dest)
                     end
@@ -2331,6 +2376,20 @@ function cfg_simplify!(ir::IRCode)
                     if bb_rename_succ[catchbb] == 0
                         push!(worklist, catchbb)
                     end
+                elseif isa(terminator, GotoNode) || isa(terminator, ReturnNode)
+                    # No implicit fall through. Schedule from work list.
+                    break
+                else
+                    is_bottom = ir[SSAValue(bbs[curr].stmts[end])][:type] === Union{}
+                    if is_bottom && !isa(terminator, PhiNode) && terminator !== nothing
+                        # If this is a regular statement (not PhiNode/GotoNode/GotoIfNot
+                        # or the `nothing` special case deletion marker),
+                        # and the type is Union{}, then this may be a terminator.
+                        # Ordinarily we normalize with ReturnNode(), but this is not
+                        # required. In any case, we do not fall through, so we
+                        # do not need to schedule the fall-through block.
+                        break
+                    end
                 end
                 ncurr = curr + 1
                 while !isempty(searchsorted(dropped_bbs, ncurr))
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 3936a82a6560e..7d936a1688aba 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -52,12 +52,12 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         stmt = stmt::Expr
         # TODO: why is this here, and not in Base.show_unquoted
         print(io, "invoke ")
-        linfo = stmt.args[1]::Core.MethodInstance
+        mi = stmt.args[1]::Core.MethodInstance
         show_unquoted(io, stmt.args[2], indent)
         print(io, "(")
         # XXX: this is wrong if `sig` is not a concretetype method
         # more correct would be to use `fieldtype(sig, i)`, but that would obscure / discard Varargs information in show
-        sig = linfo.specTypes == Tuple ? Core.svec() : Base.unwrap_unionall(linfo.specTypes).parameters::Core.SimpleVector
+        sig = mi.specTypes == Tuple ? Core.svec() : Base.unwrap_unionall(mi.specTypes).parameters::Core.SimpleVector
         print_arg(i) = sprint(; context=io) do io
             show_unquoted(io, stmt.args[i], indent)
             if (i - 1) <= length(sig)
@@ -145,17 +145,6 @@ function show_unquoted_gotoifnot(io::IO, stmt::GotoIfNot, indent::Int, prefix::S
     show_unquoted(io, stmt.cond, indent)
 end
 
-function compute_inlining_depth(linetable::Vector, iline::Int32)
-    iline == 0 && return 1
-    depth = -1
-    while iline != 0
-        depth += 1
-        lineinfo = linetable[iline]::LineInfoNode
-        iline = lineinfo.inlined_at
-    end
-    return depth
-end
-
 function should_print_ssa_type(@nospecialize node)
     if isa(node, Expr)
         return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :leave))
@@ -171,32 +160,27 @@ function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show
     return nothing
 end
 
-function normalize_method_name(m)
+function method_name(@nospecialize m)
+    if m isa LineInfoNode
+        m = m.method
+    end
+    if m isa MethodInstance
+        m = m.def
+    end
     if m isa Method
-        return m.name
-    elseif m isa MethodInstance
-        return (m.def::Method).name
-    elseif m isa Symbol
+        m = m.name
+    end
+    if m isa Module
+        return :var"top-level scope"
+    end
+    if m isa Symbol
         return m
-    else
-        return Symbol("")
     end
+    return :var""
 end
-@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
-
-# converts the linetable for line numbers
-# into a list in the form:
-#   1 outer-most-frame
-#   2   inlined-frame
-#   3     innermost-frame
-function compute_loc_stack(linetable::Vector, line::Int32)
-    stack = Int[]
-    while line != 0
-        entry = linetable[line]::LineInfoNode
-        pushfirst!(stack, line)
-        line = entry.inlined_at
-    end
-    return stack
+@noinline function normalize_method_name(@nospecialize m)
+    name = method_name(m)
+    return name === :var"" ? :none : name
 end
 
 """
@@ -265,83 +249,72 @@ function compute_ir_line_annotations(code::IRCode)
     loc_lineno = String[]
     cur_group = 1
     last_lineno = 0
-    last_stack = Int[]
+    last_stack = LineInfoNode[] # nb. only file, line, and method are populated in this
     last_printed_depth = 0
-    linetable = code.linetable
-    lines = code.stmts.line
-    last_line = zero(eltype(lines))
-    for idx in 1:length(lines)
+    debuginfo = code.debuginfo
+    def = :var"unknown scope"
+    for idx in 1:length(code.stmts)
         buf = IOBuffer()
-        line = lines[idx]
         print(buf, "│")
-        depth = compute_inlining_depth(linetable, line)
-        iline = line
-        lineno = 0
+        stack = buildLineInfoNode(debuginfo, def, idx)
+        lineno::Int = 0
         loc_method = ""
-        if line != 0
-            stack = compute_loc_stack(linetable, line)
-            lineno = linetable[stack[1]].line
+        isempty(stack) && (stack = last_stack)
+        if !isempty(stack)
+            lineno = stack[1].line
             x = min(length(last_stack), length(stack))
-            if length(stack) != 0
-                # Compute the last depth that was in common
-                first_mismatch = let last_stack=last_stack
-                    findfirst(i->last_stack[i] != stack[i], 1:x)
-                end
-                # If the first mismatch is the last stack frame, that might just
-                # be a line number mismatch in inner most frame. Ignore those
-                if length(last_stack) == length(stack) && first_mismatch == length(stack)
-                    last_entry, entry = linetable[last_stack[end]], linetable[stack[end]]
-                    if method_name(last_entry) === method_name(entry) && last_entry.file === entry.file
-                        first_mismatch = nothing
-                    end
+            depth = length(stack) - 1
+            # Compute the last depth that was in common
+            first_mismatch = let last_stack=last_stack
+                findfirst(i->last_stack[i] != stack[i], 1:x)
+            end
+            # If the first mismatch is the last stack frame, that might just
+            # be a line number mismatch in inner most frame. Ignore those
+            if length(last_stack) == length(stack) && first_mismatch == length(stack)
+                last_entry, entry = last_stack[end], stack[end]
+                if method_name(last_entry) === method_name(entry) && last_entry.file === entry.file
+                    first_mismatch = nothing
                 end
-                last_depth = something(first_mismatch, x+1)-1
-                if min(depth, last_depth) > last_printed_depth
-                    printing_depth = min(depth, last_printed_depth + 1)
-                    last_printed_depth = printing_depth
-                elseif length(stack) > length(last_stack) || first_mismatch !== nothing
-                    printing_depth = min(depth, last_depth + 1)
-                    last_printed_depth = printing_depth
-                else
-                    printing_depth = 0
+            end
+            last_depth = something(first_mismatch, x+1)-1
+            if min(depth, last_depth) > last_printed_depth
+                printing_depth = min(depth, last_printed_depth + 1)
+                last_printed_depth = printing_depth
+            elseif length(stack) > length(last_stack) || first_mismatch !== nothing
+                printing_depth = min(depth, last_depth + 1)
+                last_printed_depth = printing_depth
+            else
+                printing_depth = 0
+            end
+            stole_one = false
+            if printing_depth != 0
+                for _ in 1:(printing_depth-1)
+                    print(buf, "│")
                 end
-                stole_one = false
-                if printing_depth != 0
-                    for _ in 1:(printing_depth-1)
+                if printing_depth <= last_depth-1 && first_mismatch === nothing
+                    print(buf, "┃")
+                    for _ in printing_depth+1:min(depth, last_depth)
                         print(buf, "│")
                     end
-                    if printing_depth <= last_depth-1 && first_mismatch === nothing
-                        print(buf, "┃")
-                        for _ in printing_depth+1:min(depth, last_depth)
-                            print(buf, "│")
-                        end
-                    else
-                        stole_one = true
-                        print(buf, "╻")
-                    end
                 else
-                    for _ in 1:min(depth, last_depth)
-                        print(buf, "│")
-                    end
+                    stole_one = true
+                    print(buf, "╻")
                 end
-                print(buf, "╷"^max(0, depth - last_depth - stole_one))
-                if printing_depth != 0
-                    if length(stack) == printing_depth
-                        loc_method = line
-                    else
-                        loc_method = stack[printing_depth + 1]
-                    end
-                    loc_method = method_name(linetable[loc_method])
+            else
+                for _ in 1:min(depth, last_depth)
+                    print(buf, "│")
                 end
-                loc_method = string(" "^printing_depth, loc_method)
             end
+            print(buf, "╷"^max(0, depth - last_depth - stole_one))
+            if printing_depth != 0
+                loc_method = normalize_method_name(stack[printing_depth + 1])
+            end
+            loc_method = string(" "^printing_depth, loc_method)
             last_stack = stack
-            entry = linetable[line]
         end
         push!(loc_annotations, String(take!(buf)))
         push!(loc_lineno, (lineno != 0 && lineno != last_lineno) ? string(lineno) : "")
         push!(loc_methods, loc_method)
-        last_line = line
         (lineno != 0) && (last_lineno = lineno)
         nothing
     end
@@ -350,19 +323,87 @@ end
 
 Base.show(io::IO, code::Union{IRCode, IncrementalCompact}) = show_ir(io, code)
 
+# A line_info_preprinter for disabling line info printing
 lineinfo_disabled(io::IO, linestart::String, idx::Int) = ""
 
-function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
+# utility function to extract the file name from a DebugInfo object
+function debuginfo_file1(debuginfo::Union{DebugInfo,DebugInfoStream})
+    def = debuginfo.def
+    if def isa MethodInstance
+        def = def.def
+    end
+    if def isa Method
+        def = def.file
+    end
+    if def isa Symbol
+        return def
+    end
+    return :var"<unknown>"
+end
+
+# utility function to extract the first line number and file of a block of code
+function debuginfo_firstline(debuginfo::Union{DebugInfo,DebugInfoStream})
+    linetable = debuginfo.linetable
+    while linetable != nothing
+        debuginfo = linetable
+        linetable = debuginfo.linetable
+    end
+    codeloc = getdebugidx(debuginfo, 0)
+    return debuginfo_file1(debuginfo), codeloc[1]
+end
+
+struct LineInfoNode
+    method # ::Union{Method,MethodInstance,Symbol}
+    file::Symbol
+    line::Int32
+end
+
+# utility function for converting a debuginfo object a particular pc to list of LineInfoNodes representing the inlining info at that pc for function `def`
+# which is either `nothing` (macro-expand), a module (top-level), a Method (unspecialized code) or a MethodInstance (specialized code)
+# Returns `false` if the line info should not be updated with this info because this
+# statement has no effect on the line numbers. The `scopes` will still be populated however
+# with as much information as was available about the inlining at that statement.
+function append_scopes!(scopes::Vector{LineInfoNode}, pc::Int, debuginfo, @nospecialize(def))
+    doupdate = true
+    while true
+        debuginfo.def isa Symbol || (def = debuginfo.def)
+        codeloc = getdebugidx(debuginfo, pc)
+        line::Int = codeloc[1]
+        inl_to::Int = codeloc[2]
+        doupdate &= line != 0 || inl_to != 0 # disabled debug info--no update
+        if debuginfo.linetable === nothing || pc <= 0 || line < 0
+            line < 0 && (doupdate = false; line = 0) # broken debug info
+            push!(scopes, LineInfoNode(def, debuginfo_file1(debuginfo), Int32(line)))
+        else
+            doupdate = append_scopes!(scopes, line, debuginfo.linetable::DebugInfo, def) && doupdate
+        end
+        inl_to == 0 && return doupdate
+        def = :var"macro expansion"
+        debuginfo = debuginfo.edges[inl_to]
+        pc::Int = codeloc[3]
+    end
+end
+
+# utility wrapper around `append_scopes!` that returns an empty list instead of false
+# when there is no applicable line update
+function buildLineInfoNode(debuginfo, @nospecialize(def), pc::Int)
+    DI = LineInfoNode[]
+    append_scopes!(DI, pc, debuginfo, def) || empty!(DI)
+    return DI
+end
+
+# A default line_info_preprinter for printing accurate line number information
+function DILineInfoPrinter(debuginfo, def, showtypes::Bool=false)
     context = LineInfoNode[]
     context_depth = Ref(0)
     indent(s::String) = s^(max(context_depth[], 1) - 1)
-    function emit_lineinfo_update(io::IO, linestart::String, lineidx::Int32)
+    function emit_lineinfo_update(io::IO, linestart::String, pc::Int)
         # internal configuration options:
         linecolor = :yellow
         collapse = showtypes ? false : true
         indent_all = true
-        # convert lineidx to a vector
-        if lineidx == typemin(Int32)
+        # convert pc to a vector
+        if pc == 0
             # sentinel value: reset internal (and external) state
             pops = indent("└")
             if !isempty(pops)
@@ -372,13 +413,10 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
             end
             empty!(context)
             context_depth[] = 0
-        elseif lineidx > 0 # just skip over lines with no debug info at all
-            DI = LineInfoNode[]
-            while lineidx != 0
-                entry = linetable[lineidx]::LineInfoNode
-                push!(DI, entry)
-                lineidx = entry.inlined_at
-            end
+            return ""
+        end
+        DI = reverse!(buildLineInfoNode(debuginfo, def, pc))
+        if !isempty(DI)
             # FOR DEBUGGING, or if you just like very excessive output:
             # this prints out the context in full for every statement
             #empty!(context)
@@ -508,6 +546,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
             #end
         end
         indent_all || return ""
+        context_depth[] <= 1 && return ""
         return sprint(io -> printstyled(io, indent("│"), color=linecolor), context=io)
     end
     return emit_lineinfo_update
@@ -803,18 +842,8 @@ end
 
 _strip_color(s::String) = replace(s, r"\e\[\d+m"a => "")
 
-function statementidx_lineinfo_printer(f, code::IRCode)
-    printer = f(code.linetable)
-    function (io::IO, indent::String, idx::Int)
-        printer(io, indent, idx > 0 ? code.stmts[idx][:line] : typemin(Int32))
-    end
-end
-function statementidx_lineinfo_printer(f, code::CodeInfo)
-    printer = f(code.linetable)
-    function (io::IO, indent::String, idx::Int)
-        printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
-    end
-end
+statementidx_lineinfo_printer(f, code::IRCode) = f(code.debuginfo, :var"unknown scope")
+statementidx_lineinfo_printer(f, code::CodeInfo) = f(code.debuginfo, :var"unknown scope")
 statementidx_lineinfo_printer(code) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
 
 function stmts_used(io::IO, code::IRCode, warn_unset_entry=true)
@@ -1019,8 +1048,11 @@ function Base.show(io::IO, e::Effects)
     printstyled(io, effectbits_letter(e, :inaccessiblememonly, 'm'); color=effectbits_color(e, :inaccessiblememonly))
     print(io, ',')
     printstyled(io, effectbits_letter(e, :noub, 'u'); color=effectbits_color(e, :noub))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nonoverlayed, 'o'); color=effectbits_color(e, :nonoverlayed))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nortcall, 'r'); color=effectbits_color(e, :nortcall))
     print(io, ')')
-    e.nonoverlayed || printstyled(io, '′'; color=:red)
 end
 
 @specialize
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 7ee07585fae9b..e70633ffecf6a 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -88,6 +88,9 @@ function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecializ
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
         return UNDEF_TOKEN
+    elseif has_flag(ir.stmts[idx], IR_FLAG_NOTHROW)
+        # if the `isdefined`-ness of this slot is guaranteed by abstract interpretation,
+        # there is no need to form a `:throw_undef_if_not`
     elseif def_ssa !== true
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any))
@@ -153,46 +156,12 @@ end
 
 function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa))
     for use in uses
-        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, stmt::SlotNumber->(ssa, true), ir, ci, use, code[use])
+        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, ::SlotNumber->Pair{Any,Any}(ssa, true), ir, ci, use, code[use])
     end
 end
 
 function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}})
-    return fixemup!(stmt::SlotNumber->true, stmt::SlotNumber->renames[slot_id(stmt)], ir, ci, idx, stmt)
-end
-
-function strip_trailing_junk!(ci::CodeInfo, cfg::CFG, code::Vector{Any}, info::Vector{CallInfo})
-    # Remove `nothing`s at the end, we don't handle them well
-    # (we expect the last instruction to be a terminator)
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    (; codelocs, ssaflags) = ci
-    for i = length(code):-1:1
-        if code[i] !== nothing
-            resize!(code, i)
-            resize!(ssavaluetypes, i)
-            resize!(codelocs, i)
-            resize!(info, i)
-            resize!(ssaflags, i)
-            break
-        end
-    end
-    # If the last instruction is not a terminator, add one. This can
-    # happen for implicit return on dead branches.
-    term = code[end]
-    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
-        push!(code, ReturnNode())
-        push!(ssavaluetypes, Union{})
-        push!(codelocs, 0)
-        push!(info, NoCallInfo())
-        push!(ssaflags, IR_FLAG_NOTHROW)
-
-        # Update CFG to include appended terminator
-        old_range = cfg.blocks[end].stmts
-        new_range = StmtRange(first(old_range), last(old_range) + 1)
-        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
-        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
-    end
-    nothing
+    return fixemup!(::SlotNumber->true, x::SlotNumber->renames[slot_id(x)], ir, ci, idx, stmt)
 end
 
 # maybe use expr_type?
@@ -464,7 +433,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
                 # Add an explicit goto node in the next basic block (we accounted for this above)
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate
             end
             result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
         elseif !isa(terminator, ReturnNode)
@@ -475,7 +444,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
                 # Add an explicit goto node
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate
                 inst_range = first(inst_range):(last(inst_range) + 1)
             end
         end
@@ -504,6 +473,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         end
         new_node[:stmt] = renumber_ssa!(new_node_inst, inst_rename, true)
     end
+    ir.debuginfo.codelocs = result.line
     new_ir = IRCode(ir, result, cfg, new_new_nodes)
     return new_ir
 end
@@ -582,7 +552,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
     end
 
     # Record the correct exception handler for all critical sections
-    handler_at, handlers = compute_trycatch(code, BitSet())
+    handler_info = compute_trycatch(code)
 
     phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
     live_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
@@ -689,7 +659,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
     visited = BitSet()
     new_nodes = ir.new_nodes
     @timeit "SSA Rename" while !isempty(worklist)
-        (item::Int, pred, incoming_vals) = pop!(worklist)
+        (item, pred, incoming_vals) = pop!(worklist)
         if sv.bb_vartables[item] === nothing
             continue
         end
@@ -813,8 +783,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
                         incoming_vals[id] = Pair{Any, Any}(thisval, thisdef)
                         has_pinode[id] = false
                         enter_idx = idx
-                        while handler_at[enter_idx][1] != 0
-                            (; enter_idx) = handlers[handler_at[enter_idx][1]]
+                        while (handler = gethandler(handler_info, enter_idx)) !== nothing
+                            (; enter_idx) = handler
                             leave_block = block_for_inst(cfg, (code[enter_idx]::EnterNode).catch_dest)
                             cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id, new_phic_nodes[leave_block])
                             if cidx !== nothing
diff --git a/base/compiler/ssair/tarjan.jl b/base/compiler/ssair/tarjan.jl
index bc0500901fb03..3727fe218dc1d 100644
--- a/base/compiler/ssair/tarjan.jl
+++ b/base/compiler/ssair/tarjan.jl
@@ -247,6 +247,9 @@ function enqueue_if_unreachable!(reach::CFGReachability, cfg::CFG, bb::Int)
         # target is a reducible CFG node
         # this node lives iff it still has an incoming forward edge
         for pred in cfg.blocks[bb].preds
+            # virtual edge does not count - if the enter is dead, that edge is
+            # not taken.
+            pred == 0 && continue
             !dominates(domtree, bb, pred) && return false # forward-edge
         end
         scc[bb] = 0
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index ae294990b40d0..a4286177e93a4 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -111,8 +111,8 @@ function verify_ir(ir::IRCode, print::Bool=true,
         @verify_error "IR info length is invalid $(length(ir.stmts.info)) / $(length(ir.stmts))"
         error("")
     end
-    if length(ir.stmts.line) != length(ir.stmts)
-        @verify_error "IR line length is invalid $(length(ir.stmts.line)) / $(length(ir.stmts))"
+    if length(ir.stmts.line) != length(ir.stmts) * 3
+        @verify_error "IR line length is invalid $(length(ir.stmts.line)) / $(length(ir.stmts) * 3)"
         error("")
     end
     if length(ir.stmts.flag) != length(ir.stmts)
@@ -325,24 +325,16 @@ function verify_ir(ir::IRCode, print::Bool=true,
                     error("")
                 end
             end
-        elseif isterminator(stmt) && idx != last(ir.cfg.blocks[bb].stmts)
-            @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
-            error("")
-        else
-            if isa(stmt, Expr) || isa(stmt, ReturnNode) # TODO: make sure everything has line info
-                if (stmt isa ReturnNode)
-                    if isdefined(stmt, :val)
-                        # TODO: Disallow unreachable returns?
-                        # bb_unreachable(domtree, Int64(edge))
-                    else
-                        #@verify_error "Missing line number information for statement $idx of $ir"
-                    end
-                end
-                if !(stmt isa ReturnNode && !isdefined(stmt, :val)) # not actually a return node, but an unreachable marker
-                    if ir.stmts[idx][:line] <= 0
-                    end
-                end
+        elseif isterminator(stmt)
+            if idx != last(ir.cfg.blocks[bb].stmts)
+                @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
+                error("")
             end
+            if !isa(stmt, ReturnNode) && ir[SSAValue(idx)][:type] !== Any
+                @verify_error "Explicit terminators (other than ReturnNode) must have `Any` type"
+                error("")
+            end
+        else
             isforeigncall = false
             if isa(stmt, Expr)
                 if stmt.head === :(=)
@@ -354,6 +346,15 @@ function verify_ir(ir::IRCode, print::Bool=true,
                         # undefined GlobalRef as assignment RHS is OK
                         continue
                     end
+                elseif stmt.head === :isdefined
+                    if length(stmt.args) > 2 || (length(stmt.args) == 2 && !isa(stmt.args[2], Bool))
+                        @verify_error "malformed isdefined"
+                        error("")
+                    end
+                    if stmt.args[1] isa GlobalRef
+                        # undefined GlobalRef is OK in isdefined
+                        continue
+                    end
                 elseif stmt.head === :gc_preserve_end
                     # We allow gc_preserve_end tokens to span across try/catch
                     # blocks, which isn't allowed for regular SSA values, so
@@ -397,12 +398,12 @@ function verify_ir(ir::IRCode, print::Bool=true,
     end
 end
 
-function verify_linetable(linetable::Vector{LineInfoNode}, print::Bool=true)
-    for i in 1:length(linetable)
-        line = linetable[i]
-        if i <= line.inlined_at
-            @verify_error "Misordered linetable"
-            error("")
+function verify_linetable(di::DebugInfoStream, nstmts::Int, print::Bool=true)
+    @assert 3nstmts == length(di.codelocs)
+    for i in 1:nstmts
+        edge = di.codelocs[3i-1]
+        if !(edge == 0 || get(di.edges, edge, nothing) isa DebugInfo)
+            @verify_error "Malformed debuginfo index into edges"
         end
     end
 end
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 25f5bb894eaa9..69d2ac7ae45a0 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -13,6 +13,12 @@ struct CallMeta
     exct::Any
     effects::Effects
     info::CallInfo
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function CallMeta(rt::Any, exct::Any, effects::Effects, info::CallInfo,
+                      refinements=nothing)
+        @nospecialize rt exct info
+        return new(rt, exct, effects, info, refinements)
+    end
 end
 
 struct NoCallInfo <: CallInfo end
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index b817c2af9c49c..64a93bd07c2fa 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -89,6 +89,7 @@ function add_tfunc(@nospecialize(f::Builtin), minarg::Int, maxarg::Int, @nospeci
 end
 
 add_tfunc(throw, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
+add_tfunc(Core.throw_methoderror, 1, INT_INF, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
 
 # the inverse of typeof_tfunc
 # returns (type, isexact, isconcrete, istype)
@@ -184,8 +185,6 @@ add_tfunc(sdiv_int, 2, 2, math_tfunc, 20)
 add_tfunc(udiv_int, 2, 2, math_tfunc, 20)
 add_tfunc(srem_int, 2, 2, math_tfunc, 20)
 add_tfunc(urem_int, 2, 2, math_tfunc, 20)
-add_tfunc(add_ptr, 2, 2, math_tfunc, 1)
-add_tfunc(sub_ptr, 2, 2, math_tfunc, 1)
 add_tfunc(neg_float, 1, 1, math_tfunc, 1)
 add_tfunc(add_float, 2, 2, math_tfunc, 2)
 add_tfunc(sub_float, 2, 2, math_tfunc, 2)
@@ -229,10 +228,19 @@ end
 @nospecs shift_tfunc(𝕃::AbstractLattice, x, y) = shift_tfunc(widenlattice(𝕃), x, y)
 @nospecs shift_tfunc(::JLTypeLattice, x, y) = widenconst(x)
 
+function not_tfunc(𝕃::AbstractLattice, @nospecialize(b))
+    if isa(b, Conditional)
+        return Conditional(b.slot, b.elsetype, b.thentype)
+    elseif isa(b, Const)
+        return Const(not_int(b.val))
+    end
+    return math_tfunc(𝕃, b)
+end
+
 add_tfunc(and_int, 2, 2, and_int_tfunc, 1)
 add_tfunc(or_int, 2, 2, or_int_tfunc, 1)
 add_tfunc(xor_int, 2, 2, math_tfunc, 1)
-add_tfunc(not_int, 1, 1, math_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
+add_tfunc(not_int, 1, 1, not_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
 add_tfunc(shl_int, 2, 2, shift_tfunc, 1)
 add_tfunc(lshr_int, 2, 2, shift_tfunc, 1)
 add_tfunc(ashr_int, 2, 2, shift_tfunc, 1)
@@ -394,20 +402,13 @@ end
     return isdefined_tfunc(𝕃, arg1, sym)
 end
 @nospecs function isdefined_tfunc(𝕃::AbstractLattice, arg1, sym)
-    if isa(arg1, Const)
-        arg1t = typeof(arg1.val)
-    else
-        arg1t = widenconst(arg1)
-    end
-    if isType(arg1t)
-        return Bool
-    end
+    arg1t = arg1 isa Const ? typeof(arg1.val) : isconstType(arg1) ? typeof(arg1.parameters[1]) : widenconst(arg1)
     a1 = unwrap_unionall(arg1t)
     if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
             hasintersect(widenconst(sym), Symbol) || return Bottom
             if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) &&
-               isdefined(arg1.val::Module, sym.val::Symbol)
+               isdefinedconst_globalref(GlobalRef(arg1.val::Module, sym.val::Symbol))
                 return Const(true)
             end
         elseif isa(sym, Const)
@@ -419,7 +420,7 @@ end
             else
                 return Bottom
             end
-            if 1 <= idx <= datatype_min_ninitialized(a1)
+            if 1 ≤ idx ≤ datatype_min_ninitialized(a1)
                 return Const(true)
             elseif a1.name === _NAMEDTUPLE_NAME
                 if isconcretetype(a1)
@@ -427,15 +428,20 @@ end
                 else
                     ns = a1.parameters[1]
                     if isa(ns, Tuple)
-                        return Const(1 <= idx <= length(ns))
+                        return Const(1 ≤ idx ≤ length(ns))
                     end
                 end
-            elseif idx <= 0 || (!isvatuple(a1) && idx > fieldcount(a1))
+            elseif idx ≤ 0 || (!isvatuple(a1) && idx > fieldcount(a1))
                 return Const(false)
             elseif isa(arg1, Const)
-                arg1v = (arg1::Const).val
-                if !ismutable(arg1v) || isdefined(arg1v, idx) || isconst(typeof(arg1v), idx)
-                    return Const(isdefined(arg1v, idx))
+                if !ismutabletype(a1) || isconst(a1, idx)
+                    return Const(isdefined(arg1.val, idx))
+                end
+            elseif isa(arg1, PartialStruct)
+                if !isvarargtype(arg1.fields[end])
+                    if 1 ≤ idx ≤ length(arg1.fields)
+                        return Const(true)
+                    end
                 end
             elseif !isvatuple(a1)
                 fieldT = fieldtype(a1, idx)
@@ -662,6 +668,9 @@ function pointer_eltype(@nospecialize(ptr))
     return Any
 end
 
+@nospecs function pointerarith_tfunc(𝕃::AbstractLattice, ptr, offset)
+    return ptr
+end
 @nospecs function pointerref_tfunc(𝕃::AbstractLattice, a, i, align)
     return pointer_eltype(a)
 end
@@ -705,6 +714,8 @@ end
     end
     return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
 end
+add_tfunc(add_ptr, 2, 2, pointerarith_tfunc, 1)
+add_tfunc(sub_ptr, 2, 2, pointerarith_tfunc, 1)
 add_tfunc(pointerref, 3, 3, pointerref_tfunc, 4)
 add_tfunc(pointerset, 4, 4, pointerset_tfunc, 5)
 add_tfunc(atomic_fence, 1, 1, atomic_fence_tfunc, 4)
@@ -985,27 +996,40 @@ end
     ⊑ = partialorder(𝕃)
 
     # If we have s00 being a const, we can potentially refine our type-based analysis above
-    if isa(s00, Const) || isconstType(s00)
-        if !isa(s00, Const)
-            sv = s00.parameters[1]
-        else
+    if isa(s00, Const) || isconstType(s00) || isa(s00, PartialStruct)
+        if isa(s00, Const)
             sv = s00.val
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
+        elseif isa(s00, PartialStruct)
+            sty = unwrap_unionall(s00.typ)
+            nflds = fieldcount_noerror(sty)
+            ismod = false
+        else
+            sv = (s00::DataType).parameters[1]
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
         end
         if isa(name, Const)
             nval = name.val
             if !isa(nval, Symbol)
-                isa(sv, Module) && return false
+                ismod && return false
                 isa(nval, Int) || return false
             end
-            return isdefined(sv, nval)
+            return isdefined_tfunc(𝕃, s00, name) === Const(true)
         end
-        boundscheck && return false
+
         # If bounds checking is disabled and all fields are assigned,
         # we may assume that we don't throw
-        isa(sv, Module) && return false
+        @assert !boundscheck
+        ismod && return false
         name ⊑ Int || name ⊑ Symbol || return false
-        for i = 1:fieldcount(typeof(sv))
-            isdefined(sv, i) || return false
+        sty.name.n_uninitialized == 0 && return true
+        nflds === nothing && return false
+        for i = (datatype_min_ninitialized(sty)+1):nflds
+            isdefined_tfunc(𝕃, s00, Const(i)) === Const(true) || return false
         end
         return true
     end
@@ -1102,7 +1126,7 @@ end
 end
 
 @nospecs function _getfield_tfunc(𝕃::AnyMustAliasesLattice, s00, name, setfield::Bool)
-    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), name, setfield)
+    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), widenmustalias(name), setfield)
 end
 
 @nospecs function _getfield_tfunc(𝕃::PartialsLattice, s00, name, setfield::Bool)
@@ -1244,59 +1268,6 @@ end
     return rewrap_unionall(R, s00)
 end
 
-@nospecs function getfield_notundefined(typ0, name)
-    if isa(typ0, Const) && isa(name, Const)
-        typv = typ0.val
-        namev = name.val
-        isa(typv, Module) && return true
-        if isa(namev, Symbol) || isa(namev, Int)
-            # Fields are not allowed to transition from defined to undefined, so
-            # even if the field is not const, all we need to check here is that
-            # it is defined here.
-            return isdefined(typv, namev)
-        end
-    end
-    typ0 = widenconst(typ0)
-    typ = unwrap_unionall(typ0)
-    if isa(typ, Union)
-        return getfield_notundefined(rewrap_unionall(typ.a, typ0), name) &&
-               getfield_notundefined(rewrap_unionall(typ.b, typ0), name)
-    end
-    isa(typ, DataType) || return false
-    if typ.name === Tuple.name || typ.name === _NAMEDTUPLE_NAME
-        # tuples and named tuples can't be instantiated with undefined fields,
-        # so we don't need to be conservative here
-        return true
-    end
-    if !isa(name, Const)
-        isvarargtype(name) && return false
-        if !hasintersect(widenconst(name), Union{Int,Symbol})
-            return true # no undefined behavior if thrown
-        end
-        # field isn't known precisely, but let's check if all the fields can't be
-        # initialized with undefined value so to avoid being too conservative
-        fcnt = fieldcount_noerror(typ)
-        fcnt === nothing && return false
-        all(i::Int->is_undefref_fieldtype(fieldtype(typ,i)), (datatype_min_ninitialized(typ)+1):fcnt) && return true
-        return false
-    end
-    name = name.val
-    if isa(name, Symbol)
-        fidx = fieldindex(typ, name, false)
-        fidx === nothing && return true # no undefined behavior if thrown
-    elseif isa(name, Int)
-        fidx = name
-    else
-        return true # no undefined behavior if thrown
-    end
-    fcnt = fieldcount_noerror(typ)
-    fcnt === nothing && return false
-    0 < fidx ≤ fcnt || return true # no undefined behavior if thrown
-    fidx ≤ datatype_min_ninitialized(typ) && return true # always defined
-    ftyp = fieldtype(typ, fidx)
-    is_undefref_fieldtype(ftyp) && return true # always initialized
-    return false
-end
 # checks if a field of this type is guaranteed to be defined to a value
 # and that access to an uninitialized field will cause an `UndefRefError` or return zero
 # - is_undefref_fieldtype(String) === true
@@ -2051,7 +2022,6 @@ add_tfunc(Core.memoryrefmodify!, 5, 5, memoryrefmodify!_tfunc, 20)
 add_tfunc(Core.memoryrefreplace!, 6, 6, memoryrefreplace!_tfunc, 20)
 add_tfunc(Core.memoryrefsetonce!, 5, 5, memoryrefsetonce!_tfunc, 20)
 
-
 @nospecs function memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
     return _memoryref_isassigned_tfunc(𝕃, mem, order, boundscheck)
 end
@@ -2086,7 +2056,7 @@ end
     hasintersect(widenconst(idx), Int) || return Bottom
     return ref
 end
-add_tfunc(memoryref, 1, 3, memoryref_tfunc, 1)
+add_tfunc(memoryrefnew, 1, 3, memoryref_tfunc, 1)
 
 @nospecs function memoryrefoffset_tfunc(𝕃::AbstractLattice, mem)
     hasintersect(widenconst(mem), GenericMemoryRef) || return Bottom
@@ -2134,7 +2104,7 @@ end
     return Type
 end
 
-@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, linfo::MethodInstance)
+@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, mi::MethodInstance)
     argt, argt_exact = instanceof_tfunc(arg)
     lbt, lb_exact = instanceof_tfunc(lb)
     if !lb_exact
@@ -2148,7 +2118,7 @@ end
 
     (isa(source, Const) && isa(source.val, Method)) || return t
 
-    return PartialOpaque(t, tuple_tfunc(𝕃, env), linfo, source.val)
+    return PartialOpaque(t, tuple_tfunc(𝕃, env), mi, source.val)
 end
 
 # whether getindex for the elements can potentially throw UndefRef
@@ -2233,10 +2203,13 @@ end
     return boundscheck ⊑ Bool && memtype ⊑ GenericMemoryRef && order ⊑ Symbol
 end
 
-# Query whether the given builtin is guaranteed not to throw given the argtypes
-@nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt)
+# Query whether the given builtin is guaranteed not to throw given the `argtypes`.
+# `argtypes` can be assumed not to contain varargs.
+function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any},
+                          @nospecialize(rt))
     ⊑ = partialorder(𝕃)
-    if f === memoryref
+    na = length(argtypes)
+    if f === memoryrefnew
         return memoryref_builtin_common_nothrow(argtypes)
     elseif f === memoryrefoffset
         length(argtypes) == 1 || return false
@@ -2251,13 +2224,7 @@ end
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
         return argtypes[1] ⊑ Symbol
-    end
-
-    # These builtins are not-vararg, so if we have varars, here, we can't guarantee
-    # the correct number of arguments.
-    na = length(argtypes)
-    (na ≠ 0 && isvarargtype(argtypes[end])) && return false
-    if f === Core._typevar
+    elseif f === Core._typevar
         na == 3 || return false
         return typevar_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
     elseif f === invoke
@@ -2282,8 +2249,6 @@ end
     elseif f === (<:)
         na == 2 || return false
         return subtype_nothrow(𝕃, argtypes[1], argtypes[2])
-    elseif f === UnionAll
-        return na == 2 && (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
     elseif f === isdefined
         return isdefined_nothrow(𝕃, argtypes)
     elseif f === Core.sizeof
@@ -2349,6 +2314,7 @@ const _CONSISTENT_BUILTINS = Any[
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     setfield!,
     donotdelete
 ]
@@ -2360,7 +2326,7 @@ const _EFFECT_FREE_BUILTINS = [
     isa,
     UnionAll,
     getfield,
-    memoryref,
+    memoryrefnew,
     memoryrefoffset,
     memoryrefget,
     memoryref_isassigned,
@@ -2371,6 +2337,7 @@ const _EFFECT_FREE_BUILTINS = [
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     getglobal,
     compilerbarrier,
 ]
@@ -2386,16 +2353,17 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
     isa,
     nfields,
     throw,
+    Core.throw_methoderror,
     tuple,
     typeassert,
     typeof,
     compilerbarrier,
     Core._typevar,
-    donotdelete
+    donotdelete,
 ]
 
 const _ARGMEM_BUILTINS = Any[
-    memoryref,
+    memoryrefnew,
     memoryrefoffset,
     memoryrefget,
     memoryref_isassigned,
@@ -2436,25 +2404,16 @@ function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
     # consistent if the first arg is immutable
     na = length(argtypes)
     2 ≤ na ≤ 3 || return EFFECTS_THROWS
-    obj, sym = argtypes
-    wobj = unwrapva(obj)
+    wobj, sym = argtypes
+    wobj = unwrapva(wobj)
+    sym = unwrapva(sym)
     consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
     if is_immutable_argtype(wobj)
         consistent = ALWAYS_TRUE
-    else
-        # Bindings/fields are not allowed to transition from defined to undefined, so even
-        # if the object is not immutable, we can prove `:consistent`-cy if it is defined:
-        if isa(wobj, Const) && isa(sym, Const)
-            objval = wobj.val
-            symval = sym.val
-            if isa(objval, Module)
-                if isa(symval, Symbol) && isdefined(objval, symval)
-                    consistent = ALWAYS_TRUE
-                end
-            elseif (isa(symval, Symbol) || isa(symval, Int)) && isdefined(objval, symval)
-                consistent = ALWAYS_TRUE
-            end
-        end
+    elseif isdefined_tfunc(𝕃, wobj, sym) isa Const
+        # Some bindings/fields are not allowed to transition from defined to undefined or the reverse, so even
+        # if the object is not immutable, we can prove `:consistent`-cy of this:
+        consistent = ALWAYS_TRUE
     end
     nothrow = isdefined_nothrow(𝕃, argtypes)
     if hasintersect(widenconst(wobj), Module)
@@ -2480,16 +2439,6 @@ function getfield_effects(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospeci
     # :consistent if the argtype is immutable
     consistent = (is_immutable_argtype(obj) || is_mutation_free_argtype(obj)) ?
         ALWAYS_TRUE : CONSISTENT_IF_INACCESSIBLEMEMONLY
-    # taint `:consistent` if accessing `isbitstype`-type object field that may be initialized
-    # with undefined value: note that we don't need to taint `:consistent` if accessing
-    # uninitialized non-`isbitstype` field since it will simply throw `UndefRefError`
-    # NOTE `getfield_notundefined` conservatively checks if this field is never initialized
-    # with undefined value to avoid tainting `:consistent` too aggressively
-    # TODO this should probably taint `:noub`, however, it would hinder concrete eval for
-    # `REPLInterpreter` that can ignore `:consistent-cy`, causing worse completions
-    if !(length(argtypes) ≥ 2 && getfield_notundefined(obj, argtypes[2]))
-        consistent = ALWAYS_FALSE
-    end
     noub = ALWAYS_TRUE
     bcheck = getfield_boundscheck(argtypes)
     nothrow = getfield_nothrow(𝕃, argtypes, bcheck)
@@ -2581,12 +2530,11 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
         return Effects(EFFECTS_TOTAL;
             consistent = ALWAYS_FALSE,
             notaskstate = false,
-            nothrow
-        )
+            nothrow)
     else
         if contains_is(_CONSISTENT_BUILTINS, f)
             consistent = ALWAYS_TRUE
-        elseif f === memoryref || f === memoryrefoffset
+        elseif f === memoryrefnew || f === memoryrefoffset
             consistent = ALWAYS_TRUE
         elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
             consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
@@ -2602,7 +2550,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
         else
             effect_free = ALWAYS_FALSE
         end
-        nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt)
+        nothrow = builtin_nothrow(𝕃, f, argtypes, rt)
         if contains_is(_INACCESSIBLEMEM_BUILTINS, f)
             inaccessiblememonly = ALWAYS_TRUE
         elseif contains_is(_ARGMEM_BUILTINS, f)
@@ -2610,7 +2558,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
         else
             inaccessiblememonly = ALWAYS_FALSE
         end
-        if f === memoryref || f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
+        if f === memoryrefnew || f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
             noub = memoryop_noub(f, argtypes) ? ALWAYS_TRUE : ALWAYS_FALSE
         else
             noub = ALWAYS_TRUE
@@ -2619,13 +2567,12 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
     end
 end
 
-
 function memoryop_noub(@nospecialize(f), argtypes::Vector{Any})
     nargs = length(argtypes)
     nargs == 0 && return true # must throw and noub
     lastargtype = argtypes[end]
     isva = isvarargtype(lastargtype)
-    if f === memoryref
+    if f === memoryrefnew
         if nargs == 1 && !isva
             return true
         elseif nargs == 2 && !isva
@@ -2652,13 +2599,13 @@ end
 
 function current_scope_tfunc(interp::AbstractInterpreter, sv::InferenceState)
     pc = sv.currpc
+    handler_info = sv.handler_info
     while true
-        handleridx = sv.handler_at[pc][1]
-        if handleridx == 0
+        pchandler = gethandler(sv, pc)
+        if pchandler === nothing
             # No local scope available - inherited from the outside
             return Any
         end
-        pchandler = sv.handlers[handleridx]
         # Remember that we looked at this handler, so we get re-scheduled
         # if the scope information changes
         isdefined(pchandler, :scope_uses) || (pchandler.scope_uses = Int[])
@@ -2676,6 +2623,8 @@ function current_scope_tfunc(interp::AbstractInterpreter, sv::InferenceState)
 end
 current_scope_tfunc(interp::AbstractInterpreter, sv) = Any
 
+hasvarargtype(argtypes::Vector{Any}) = !isempty(argtypes) && isvarargtype(argtypes[end])
+
 """
     builtin_nothrow(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt) -> Bool
 
@@ -2683,7 +2632,13 @@ Compute throw-ness of a builtin function call. `argtypes` should not include `f`
 """
 function builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f), argtypes::Vector{Any}, @nospecialize(rt))
     rt === Bottom && return false
-    contains_is(_PURE_BUILTINS, f) && return true
+    if f === tuple || f === svec
+        return true
+    elseif hasvarargtype(argtypes)
+        return false
+    elseif contains_is(_PURE_BUILTINS, f)
+        return true
+    end
     return _builtin_nothrow(𝕃, f, argtypes, rt)
 end
 
@@ -2708,7 +2663,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
                 return Bottom
             end
         end
-        iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+        iidx = Int(reinterpret(Int32, f)) + 1
         if iidx < 0 || iidx > length(T_IFUNC)
             # unknown intrinsic
             return Any
@@ -2732,8 +2687,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         end
         tf = T_FFUNC_VAL[fidx]
     end
-    tf = tf::Tuple{Int, Int, Any}
-    if !isempty(argtypes) && isvarargtype(argtypes[end])
+    if hasvarargtype(argtypes)
         if length(argtypes) - 1 > tf[2]
             # definitely too many arguments
             return Bottom
@@ -2763,8 +2717,6 @@ _iszero(@nospecialize x) = x === Intrinsics.xor_int(x, x)
 _isneg1(@nospecialize x) = _iszero(Intrinsics.not_int(x))
 _istypemin(@nospecialize x) = !_iszero(x) && Intrinsics.neg_int(x) === x
 
-
-
 function builtin_exct(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any}, @nospecialize(rt))
     if isa(f, IntrinsicFunction)
         return intrinsic_exct(𝕃, f, argtypes)
@@ -2774,7 +2726,6 @@ end
 
 function div_nothrow(f::IntrinsicFunction, @nospecialize(arg1), @nospecialize(arg2))
     isa(arg2, Const) || return false
-
     den_val = arg2.val
     _iszero(den_val) && return false
     f !== Intrinsics.checked_sdiv_int && return true
@@ -2789,18 +2740,17 @@ function known_is_valid_intrinsic_elptr(𝕃::AbstractLattice, @nospecialize(ptr
 end
 
 function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::Vector{Any})
-    if !isempty(argtypes) && isvarargtype(argtypes[end])
+    if hasvarargtype(argtypes)
         return Any
     end
 
     # First check that we have the correct number of arguments
-    iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+    iidx = Int(reinterpret(Int32, f)) + 1
     if iidx < 1 || iidx > length(T_IFUNC)
         # invalid intrinsic (system will crash)
         return Any
     end
     tf = T_IFUNC[iidx]
-    tf = tf::Tuple{Int, Int, Any}
     if !(tf[1] <= length(argtypes) <= tf[2])
         # wrong # of args
         return ArgumentError
@@ -2812,7 +2762,8 @@ function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::V
     # that it won't
     f === Intrinsics.llvmcall && return Any
 
-    if f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int || f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int
+    if (f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int ||
+        f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int)
         # Nothrow as long as the second argument is guaranteed not to be zero
         arg1 = argtypes[1]
         arg2 = argtypes[2]
@@ -2864,8 +2815,8 @@ function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::V
     end
 
     if f in (Intrinsics.sext_int, Intrinsics.zext_int, Intrinsics.trunc_int,
-        Intrinsics.fptoui, Intrinsics.fptosi, Intrinsics.uitofp,
-        Intrinsics.sitofp, Intrinsics.fptrunc, Intrinsics.fpext)
+             Intrinsics.fptoui, Intrinsics.fptosi, Intrinsics.uitofp,
+             Intrinsics.sitofp, Intrinsics.fptrunc, Intrinsics.fpext)
         # If !isconcrete, `ty` may be Union{} at runtime even if we have
         # isprimitivetype(ty).
         ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
@@ -2927,14 +2878,13 @@ function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
         # llvmcall can do arbitrary things
         return Effects()
     end
-
     if contains_is(_INCONSISTENT_INTRINSICS, f)
         consistent = ALWAYS_FALSE
     else
         consistent = ALWAYS_TRUE
     end
     effect_free = !(f === Intrinsics.pointerset) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && intrinsic_nothrow(f, argtypes)
+    nothrow = intrinsic_nothrow(f, argtypes)
     inaccessiblememonly = ALWAYS_TRUE
     return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
 end
@@ -2943,7 +2893,7 @@ end
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
 function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
-    UNKNOWN = CallMeta(Type, Any, EFFECTS_THROWS, NoCallInfo())
+    UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo())
     if !(2 <= length(argtypes) <= 3)
         return UNKNOWN
     end
@@ -2971,8 +2921,12 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
         return UNKNOWN
     end
 
+    # effects are not an issue if we know this statement will get removed, but if it does not get removed,
+    # then this could be recursively re-entering inference (via concrete-eval), which will not terminate
+    RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false)
+
     if contains_is(argtypes_vec, Union{})
-        return CallMeta(Const(Union{}), Union{}, EFFECTS_TOTAL, NoCallInfo())
+        return CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo())
     end
 
     # Run the abstract_call without restricting abstract call
@@ -2990,25 +2944,25 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     rt = widenslotwrapper(call.rt)
     if isa(rt, Const)
         # output was computed to be constant
-        return CallMeta(Const(typeof(rt.val)), Union{}, EFFECTS_TOTAL, info)
+        return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
     end
     rt = widenconst(rt)
     if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
         # output cannot be improved so it is known for certain
-        return CallMeta(Const(rt), Union{}, EFFECTS_TOTAL, info)
+        return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
     elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
         # conservatively express uncertainty of this result
         # in two ways: both as being a subtype of this, and
         # because of LimitedAccuracy causes
-        return CallMeta(Type{<:rt}, Union{}, EFFECTS_TOTAL, info)
+        return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
     elseif isa(tt, Const) || isconstType(tt)
         # input arguments were known for certain
         # XXX: this doesn't imply we know anything about rt
-        return CallMeta(Const(rt), Union{}, EFFECTS_TOTAL, info)
+        return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
     elseif isType(rt)
-        return CallMeta(Type{rt}, Union{}, EFFECTS_TOTAL, info)
+        return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
     else
-        return CallMeta(Type{<:rt}, Union{}, EFFECTS_TOTAL, info)
+        return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
     end
 end
 
@@ -3016,11 +2970,10 @@ end
 function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
                              sv::AbsIntState, max_methods::Int)
     length(argtypes) < 2 && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-    isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_UNKNOWN, NoCallInfo())
+    isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
     argtypes = argtypes[2:end]
     atype = argtypes_to_type(argtypes)
-    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
-        InferenceParams(interp).max_union_splitting, max_methods)
+    matches = find_method_matches(interp, argtypes, atype; max_methods)
     if isa(matches, FailedMethodMatch)
         rt = Bool # too many matches to analyze
     else
@@ -3123,7 +3076,7 @@ end
     if M isa Const && s isa Const
         M, s = M.val, s.val
         if M isa Module && s isa Symbol
-            return isdefined(M, s)
+            return isdefinedconst_globalref(GlobalRef(M, s))
         end
     end
     return false
@@ -3180,7 +3133,6 @@ add_tfunc(Core.modifyglobal!, 4, 5, modifyglobal!_tfunc, 3)
 add_tfunc(Core.replaceglobal!, 4, 6, replaceglobal!_tfunc, 3)
 add_tfunc(Core.setglobalonce!, 3, 5, setglobalonce!_tfunc, 3)
 
-
 @nospecs function setglobal!_nothrow(M, s, newty, o)
     global_order_nothrow(o, #=loading=#false, #=storing=#true) || return false
     return setglobal!_nothrow(M, s, newty)
@@ -3196,9 +3148,9 @@ end
 end
 
 function global_assignment_nothrow(M::Module, s::Symbol, @nospecialize(newty))
-    if isdefined(M, s) && !isconst(M, s)
+    if !isconst(M, s)
         ty = ccall(:jl_get_binding_type, Any, (Any, Any), M, s)
-        return ty === nothing || newty ⊑ ty
+        return ty isa Type && widenconst(newty) <: ty
     end
     return false
 end
@@ -3214,7 +3166,7 @@ end
 end
 @nospecs function get_binding_type_tfunc(𝕃::AbstractLattice, M, s)
     if get_binding_type_effect_free(M, s)
-        return Const(Core.get_binding_type((M::Const).val, (s::Const).val))
+        return Const(Core.get_binding_type((M::Const).val::Module, (s::Const).val::Symbol))
     end
     return Type
 end
@@ -3243,6 +3195,11 @@ function foreigncall_effects(@specialize(abstract_eval), e::Expr)
     elseif name === :jl_genericmemory_copy_slice
         return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow=false)
     end
+    # `:foreigncall` can potentially perform all sorts of operations, including calling
+    # overlay methods, but the `:foreigncall` itself is not dispatched, and there is no
+    # concern that the method calls that potentially occur within the `:foreigncall` will
+    # be executed using the wrong method table due to concrete evaluation, so using
+    # `EFFECTS_UNKNOWN` here and not tainting with `:nonoverlayed` is fine
     return EFFECTS_UNKNOWN
 end
 
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 5af074498ca7e..315a068e611fe 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -4,14 +4,6 @@
 const track_newly_inferred = RefValue{Bool}(false)
 const newly_inferred = CodeInstance[]
 
-# build (and start inferring) the inference frame for the top-level MethodInstance
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache_mode::Symbol)
-    frame = InferenceState(result, cache_mode, interp)
-    frame === nothing && return false
-    cache_mode === :global && lock_mi_inference(interp, result.linfo)
-    return typeinf(interp, frame)
-end
-
 """
 The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
 can be used to measure the exclusive time spent inferring each method instance that is
@@ -204,9 +196,8 @@ If set to `true`, record per-method-instance timings within type inference in th
 __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
 const __measure_typeinf__ = fill(false)
 
-# Wrapper around `_typeinf` that optionally records the exclusive time for
-# each inference performed by `NativeInterpreter`.
-function typeinf(interp::NativeInterpreter, frame::InferenceState)
+# Wrapper around `_typeinf` that optionally records the exclusive time for each invocation.
+function typeinf(interp::AbstractInterpreter, frame::InferenceState)
     if __measure_typeinf__[]
         Timings.enter_new_timer(frame)
         v = _typeinf(interp, frame)
@@ -216,9 +207,9 @@ function typeinf(interp::NativeInterpreter, frame::InferenceState)
         return _typeinf(interp, frame)
     end
 end
-typeinf(interp::AbstractInterpreter, frame::InferenceState) = _typeinf(interp, frame)
 
-function finish!(interp::AbstractInterpreter, caller::InferenceState)
+function finish!(interp::AbstractInterpreter, caller::InferenceState;
+                 can_discard_trees::Bool=may_discard_trees(interp))
     result = caller.result
     valid_worlds = result.valid_worlds
     if last(valid_worlds) >= get_world_counter()
@@ -230,12 +221,39 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState)
     if opt isa OptimizationState
         result.src = opt = ir_to_codeinf!(opt)
     end
-    if opt isa CodeInfo
-        caller.src = opt
-    else
-        # In this case `caller.src` is invalid for clients (such as `typeinf_ext`) to use
-        # but that is what's permitted by `caller.cache_mode`.
-        # This is hopefully unreachable from such clients using `NativeInterpreter`.
+    if isdefined(result, :ci)
+        ci = result.ci
+        inferred_result = nothing
+        relocatability = 0x1
+        const_flag = is_result_constabi_eligible(result)
+        if !can_discard_trees || (is_cached(caller) && !const_flag)
+            inferred_result = transform_result_for_cache(interp, result.linfo, result.valid_worlds, result, can_discard_trees)
+            relocatability = 0x0
+            if inferred_result isa CodeInfo
+                edges = inferred_result.debuginfo
+                uncompressed = inferred_result
+                inferred_result = maybe_compress_codeinfo(interp, result.linfo, inferred_result, can_discard_trees)
+                result.is_src_volatile |= uncompressed !== inferred_result
+            elseif ci.owner === nothing
+                # The global cache can only handle objects that codegen understands
+                inferred_result = nothing
+            end
+            if isa(inferred_result, String)
+                t = @_gc_preserve_begin inferred_result
+                relocatability = unsafe_load(unsafe_convert(Ptr{UInt8}, inferred_result), Core.sizeof(inferred_result))
+                @_gc_preserve_end t
+            end
+        end
+        # n.b. relocatability = isa(inferred_result, String) && inferred_result[end]
+        if !@isdefined edges
+            edges = DebugInfo(result.linfo)
+        end
+        ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, UInt8, Any),
+                ci, inferred_result, const_flag,
+                first(result.valid_worlds), last(result.valid_worlds),
+                encode_effects(result.ipo_effects), result.analysis_results,
+                relocatability, edges)
+        engine_reject(interp, ci)
     end
     return nothing
 end
@@ -243,117 +261,98 @@ end
 function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
     # with no active ip's, frame is done
-    frames = frame.callers_in_cycle
-    isempty(frames) && push!(frames, frame)
-    valid_worlds = WorldRange()
-    for caller in frames
-        @assert !(caller.dont_work_on_me)
-        caller.dont_work_on_me = true
-        # might might not fully intersect these earlier, so do that now
-        valid_worlds = intersect(caller.valid_worlds, valid_worlds)
-    end
-    for caller in frames
-        caller.valid_worlds = valid_worlds
-        finish(caller, caller.interp)
-    end
-    for caller in frames
+    frames = frame.callstack::Vector{AbsIntState}
+    if length(frames) == frame.cycleid
+        finish_nocycle(interp, frame)
+    else
+        @assert frame.cycleid != 0
+        finish_cycle(interp, frames, frame.cycleid)
+    end
+    return true
+end
+
+function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
+    finishinfer!(frame, frame.interp)
+    opt = frame.result.src
+    if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
+        optimize(frame.interp, opt, frame.result)
+    end
+    finish!(frame.interp, frame)
+    if frame.cycleid != 0
+        frames = frame.callstack::Vector{AbsIntState}
+        @assert frames[end] === frame
+        pop!(frames)
+    end
+    return nothing
+end
+
+function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int)
+    cycle_valid_worlds = WorldRange()
+    cycle_valid_effects = EFFECTS_TOTAL
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
+        @assert caller.cycleid == cycleid
+        # converge the world age range and effects for this cycle here:
+        # all frames in the cycle should have the same bits of `valid_worlds` and `effects`
+        # that are simply the intersection of each partial computation, without having
+        # dependencies on each other (unlike rt and exct)
+        cycle_valid_worlds = intersect(cycle_valid_worlds, caller.valid_worlds)
+        cycle_valid_effects = merge_effects(cycle_valid_effects, caller.ipo_effects)
+    end
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
+        adjust_cycle_frame!(caller, cycle_valid_worlds, cycle_valid_effects)
+        finishinfer!(caller, caller.interp)
+    end
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
         opt = caller.result.src
         if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
             optimize(caller.interp, opt, caller.result)
         end
     end
-    for caller in frames
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
         finish!(caller.interp, caller)
-        if is_cached(caller)
-            cache_result!(caller.interp, caller.result)
-        end
     end
-    empty!(frames)
-    return true
+    resize!(frames, cycleid - 1)
+    return nothing
+end
+
+function adjust_cycle_frame!(sv::InferenceState, cycle_valid_worlds::WorldRange, cycle_valid_effects::Effects)
+    sv.valid_worlds = cycle_valid_worlds
+    sv.ipo_effects = cycle_valid_effects
+    # traverse the callees of this cycle that are tracked within `sv.cycle_backedges`
+    # and adjust their statements so that they are consistent with the new `cycle_valid_effects`
+    new_flags = flags_for_effects(cycle_valid_effects)
+    for (callee, pc) in sv.cycle_backedges
+        old_currpc = callee.currpc
+        callee.currpc = pc
+        set_curr_ssaflag!(callee, new_flags, IR_FLAGS_EFFECTS)
+        callee.currpc = old_currpc
+    end
+    return nothing
 end
 
 function is_result_constabi_eligible(result::InferenceResult)
     result_type = result.result
     return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
 end
-function CodeInstance(interp::AbstractInterpreter, result::InferenceResult;
-        can_discard_trees::Bool=may_discard_trees(interp))
-    local const_flags::Int32
-    result_type = result.result
-    @assert !(result_type === nothing || result_type isa LimitedAccuracy)
-    if is_result_constabi_eligible(result)
-        # use constant calling convention
-        rettype_const = result_type.val
-        const_flags = 0x3
-    else
-        if isa(result_type, Const)
-            rettype_const = result_type.val
-            const_flags = 0x2
-        elseif isa(result_type, PartialOpaque)
-            rettype_const = result_type
-            const_flags = 0x2
-        elseif isconstType(result_type)
-            rettype_const = result_type.parameters[1]
-            const_flags = 0x2
-        elseif isa(result_type, PartialStruct)
-            rettype_const = result_type.fields
-            const_flags = 0x2
-        elseif isa(result_type, InterConditional)
-            rettype_const = result_type
-            const_flags = 0x2
-        elseif isa(result_type, InterMustAlias)
-            rettype_const = result_type
-            const_flags = 0x2
-        else
-            rettype_const = nothing
-            const_flags = 0x00
-        end
-    end
-    relocatability = 0x0
-    owner = cache_owner(interp)
-    if const_flags == 0x3 && can_discard_trees
-        inferred_result = nothing
-        relocatability = 0x1
-    else
-        inferred_result = transform_result_for_cache(interp, result.linfo, result.valid_worlds, result, can_discard_trees)
-        if inferred_result isa CodeInfo
-            uncompressed = inferred_result
-            inferred_result = maybe_compress_codeinfo(interp, result.linfo, inferred_result, can_discard_trees)
-            result.is_src_volatile |= uncompressed !== inferred_result
-        elseif owner === nothing
-            # The global cache can only handle objects that codegen understands
-            inferred_result = nothing
-        end
-        if isa(inferred_result, String)
-            t = @_gc_preserve_begin inferred_result
-            relocatability = unsafe_load(unsafe_convert(Ptr{UInt8}, inferred_result), Core.sizeof(inferred_result))
-            @_gc_preserve_end t
-        elseif inferred_result === nothing
-            relocatability = 0x1
-        end
-    end
-    # n.b. relocatability = (isa(inferred_result, String) && inferred_result[end]) || inferred_result === nothing
-    return CodeInstance(result.linfo, owner,
-        widenconst(result_type), widenconst(result.exc_result), rettype_const, inferred_result,
-        const_flags, first(result.valid_worlds), last(result.valid_worlds),
-        # TODO: Actually do something with non-IPO effects
-        encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.analysis_results,
-        relocatability)
-end
+
 
 function transform_result_for_cache(interp::AbstractInterpreter,
-        linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult,
+        ::MethodInstance, valid_worlds::WorldRange, result::InferenceResult,
         can_discard_trees::Bool=may_discard_trees(interp))
     return result.src
 end
 
-function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInstance, ci::CodeInfo,
-        can_discard_trees::Bool=may_discard_trees(interp))
-    def = linfo.def
+function maybe_compress_codeinfo(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInfo,
+                                 can_discard_trees::Bool=may_discard_trees(interp))
+    def = mi.def
     isa(def, Method) || return ci # don't compress toplevel code
     cache_the_tree = true
     if can_discard_trees
-        cache_the_tree = is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, linfo.sparam_vals, def)
+        cache_the_tree = is_inlineable(ci) || isa_compileable_sig(mi.specTypes, mi.sparam_vals, def)
     end
     if cache_the_tree
         if may_compress(interp)
@@ -375,46 +374,45 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
         # we can now widen our applicability in the global cache too
         result.valid_worlds = WorldRange(first(result.valid_worlds), typemax(UInt))
     end
+    @assert isdefined(result.ci, :inferred)
     # check if the existing linfo metadata is also sufficient to describe the current inference result
-    # to decide if it is worth caching this
+    # to decide if it is worth caching this right now
     mi = result.linfo
-    already_inferred = already_inferred_quick_test(interp, mi)
+    cache_results = true
     cache = WorldView(code_cache(interp), result.valid_worlds)
-    if !already_inferred && haskey(cache, mi)
+    if cache_results && haskey(cache, mi)
         ci = cache[mi]
-        # Even if we already have a CI for this, it's possible that the new CI has more
-        # information (E.g. because the source was limited before, but is no longer - this
-        # happens during bootstrap). In that case, allow the result to be recached.
-        if result.src === nothing || (ci.inferred !== nothing || ci.invoke != C_NULL)
-            already_inferred = true
-        end
+        # n.b.: accurate edge representation might cause the CodeInstance for this to be constructed later
+        @assert isdefined(ci, :inferred)
+        cache_results = false
     end
 
-    # TODO: also don't store inferred code if we've previously decided to interpret this function
-    if !already_inferred
-        code_cache(interp)[mi] = ci = CodeInstance(interp, result)
-        result.ci = ci
+    if cache_results
+        code_cache(interp)[mi] = result.ci
         if track_newly_inferred[]
             m = mi.def
             if isa(m, Method) && m.module != Core
-                ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
+                ccall(:jl_push_newly_inferred, Cvoid, (Any,), result.ci)
             end
         end
     end
-    unlock_mi_inference(interp, mi)
-    nothing
+    return cache_results
 end
 
 function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
     if typ isa LimitedAccuracy
-        if sv.parent === nothing
-            # when part of a cycle, we might have unintentionally introduced a limit marker
-            @assert !isempty(sv.callers_in_cycle)
+        if sv.parentid === 0
+            # we might have introduced a limit marker, but we should know it must be sv and other callers_in_cycle
+            #@assert !isempty(callers_in_cycle(sv))
+            #  FIXME: this assert fails, appearing to indicate there is a bug in filtering this list earlier.
+            #  In particular (during doctests for example), during inference of
+            #  show(Base.IOContext{Base.GenericIOBuffer{Memory{UInt8}}}, Base.Multimedia.MIME{:var"text/plain"}, LinearAlgebra.BunchKaufman{Float64, Array{Float64, 2}, Array{Int64, 1}})
+            #  we observed one of the ssavaluetypes here to be Core.Compiler.LimitedAccuracy(typ=Any, causes=Core.Compiler.IdSet(getproperty(LinearAlgebra.BunchKaufman{Float64, Array{Float64, 2}, Array{Int64, 1}}, Symbol)))
             return typ.typ
         end
         causes = copy(typ.causes)
         delete!(causes, sv)
-        for caller in sv.callers_in_cycle
+        for caller in callers_in_cycle(sv)
             delete!(causes, caller)
         end
         if isempty(causes)
@@ -453,6 +451,12 @@ function adjust_effects(ipo_effects::Effects, def::Method)
     elseif is_effect_overridden(override, :noub_if_noinbounds) && ipo_effects.noub !== ALWAYS_TRUE
         ipo_effects = Effects(ipo_effects; noub=NOUB_IF_NOINBOUNDS)
     end
+    if is_effect_overridden(override, :consistent_overlay)
+        ipo_effects = Effects(ipo_effects; nonoverlayed=CONSISTENT_OVERLAY)
+    end
+    if is_effect_overridden(override, :nortcall)
+        ipo_effects = Effects(ipo_effects; nortcall=true)
+    end
     return ipo_effects
 end
 
@@ -520,15 +524,13 @@ end
 
 # inference completed on `me`
 # update the MethodInstance
-function finish(me::InferenceState, interp::AbstractInterpreter)
+function finishinfer!(me::InferenceState, interp::AbstractInterpreter)
     # prepare to run optimization passes on fulltree
-    s_edges = me.stmt_edges[1]
-    if s_edges === nothing
-        s_edges = me.stmt_edges[1] = []
-    end
-    for edges in me.stmt_edges
-        edges === nothing && continue
-        edges === s_edges && continue
+    @assert isempty(me.ip)
+    s_edges = get_stmt_edges!(me, 1)
+    for i = 2:length(me.stmt_edges)
+        isassigned(me.stmt_edges, i) || continue
+        edges = me.stmt_edges[i]
         append!(s_edges, edges)
         empty!(edges)
     end
@@ -545,47 +547,90 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         gt = me.ssavaluetypes
         for j = 1:length(gt)
             gt[j] = gtj = cycle_fix_limited(gt[j], me)
-            if gtj isa LimitedAccuracy && me.parent !== nothing
+            if gtj isa LimitedAccuracy && me.parentid != 0
                 limited_src = true
                 break
             end
         end
     end
-    me.result.valid_worlds = me.valid_worlds
-    me.result.result = bestguess
-    ipo_effects = me.result.ipo_effects = me.ipo_effects = adjust_effects(me)
-    me.result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects)
+    result = me.result
+    result.valid_worlds = me.valid_worlds
+    result.result = bestguess
+    ipo_effects = result.ipo_effects = me.ipo_effects = adjust_effects(me)
+    result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects)
+    me.src.rettype = widenconst(ignorelimited(bestguess))
+    me.src.min_world = first(me.valid_worlds)
+    me.src.max_world = last(me.valid_worlds)
 
     if limited_ret
         # a parent may be cached still, but not this intermediate work:
         # we can throw everything else away now
-        me.result.src = nothing
+        result.src = nothing
         me.cache_mode = CACHE_MODE_NULL
         set_inlineable!(me.src, false)
-        unlock_mi_inference(interp, me.linfo)
     elseif limited_src
         # a type result will be cached still, but not this intermediate work:
         # we can throw everything else away now
-        me.result.src = nothing
+        result.src = nothing
         set_inlineable!(me.src, false)
     else
         # annotate fulltree with type information,
         # either because we are the outermost code, or we might use this later
         type_annotate!(interp, me)
-        doopt = (me.cache_mode != CACHE_MODE_NULL || me.parent !== nothing)
-        # Disable the optimizer if we've already determined that there's nothing for
-        # it to do.
-        if may_discard_trees(interp) && is_result_constabi_eligible(me.result)
-            doopt = false
-        end
-        if doopt && may_optimize(interp)
-            me.result.src = OptimizationState(me, interp)
+        mayopt = may_optimize(interp)
+        doopt = mayopt &&
+                # disable optimization if we don't use this later (because it is not cached)
+                me.cache_mode != CACHE_MODE_NULL &&
+                # disable optimization if we've already obtained very accurate result
+                !result_is_constabi(interp, result)
+        if doopt
+            result.src = OptimizationState(me, interp)
         else
-            me.result.src = me.src # for reflection etc.
+            result.src = me.src # for reflection etc.
         end
     end
 
     maybe_validate_code(me.linfo, me.src, "inferred")
+
+    # finish populating inference results into the CodeInstance if possible, and maybe cache that globally for use elsewhere
+    if isdefined(result, :ci) && !limited_ret
+        result_type = result.result
+        @assert !(result_type === nothing || result_type isa LimitedAccuracy)
+        if isa(result_type, Const)
+            rettype_const = result_type.val
+            const_flags = is_result_constabi_eligible(result) ? 0x3 : 0x2
+        elseif isa(result_type, PartialOpaque)
+            rettype_const = result_type
+            const_flags = 0x2
+        elseif isconstType(result_type)
+            rettype_const = result_type.parameters[1]
+            const_flags = 0x2
+        elseif isa(result_type, PartialStruct)
+            rettype_const = result_type.fields
+            const_flags = 0x2
+        elseif isa(result_type, InterConditional)
+            rettype_const = result_type
+            const_flags = 0x2
+        elseif isa(result_type, InterMustAlias)
+            rettype_const = result_type
+            const_flags = 0x2
+        else
+            rettype_const = nothing
+            const_flags = 0x00
+        end
+        relocatability = 0x0
+        edges = nothing
+        ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any),
+                result.ci, widenconst(result_type), widenconst(result.exc_result), rettype_const, const_flags,
+                first(result.valid_worlds), last(result.valid_worlds),
+                encode_effects(result.ipo_effects), result.analysis_results, edges)
+        if is_cached(me)
+            cached_results = cache_result!(me.interp, me.result)
+            if !cached_results
+                me.cache_mode = CACHE_MODE_NULL
+            end
+        end
+    end
     nothing
 end
 
@@ -707,46 +752,34 @@ function type_annotate!(interp::AbstractInterpreter, sv::InferenceState)
     return nothing
 end
 
-# at the end, all items in b's cycle
-# will now be added to a's cycle
-function union_caller_cycle!(a::InferenceState, b::InferenceState)
-    callers_in_cycle = b.callers_in_cycle
-    b.parent = a.parent
-    b.callers_in_cycle = a.callers_in_cycle
-    contains_is(a.callers_in_cycle, b) || push!(a.callers_in_cycle, b)
-    if callers_in_cycle !== a.callers_in_cycle
-        for caller in callers_in_cycle
-            if caller !== b
-                caller.parent = a.parent
-                caller.callers_in_cycle = a.callers_in_cycle
-                push!(a.callers_in_cycle, caller)
-            end
-        end
-    end
-    return
-end
-
-function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, ancestor::InferenceState, child::InferenceState)
+function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, child::InferenceState)
     # add backedge of parent <- child
     # then add all backedges of parent <- parent.parent
-    # and merge all of the callers into ancestor.callers_in_cycle
-    # and ensure that walking the parent list will get the same result (DAG) from everywhere
+    frames = parent.callstack::Vector{AbsIntState}
+    @assert child.callstack === frames
+    ancestorid = child.cycleid
     while true
-        add_cycle_backedge!(parent, child, parent.currpc)
-        union_caller_cycle!(ancestor, child)
+        add_cycle_backedge!(parent, child)
+        parent.cycleid === ancestorid && break
         child = parent
-        child === ancestor && break
         parent = frame_parent(child)
         while !isa(parent, InferenceState)
             # XXX we may miss some edges here?
             parent = frame_parent(parent::IRInterpretationState)
         end
-        parent = parent::InferenceState
+    end
+    # ensure that walking the callstack has the same cycleid (DAG)
+    for frame = reverse(ancestorid:length(frames))
+        frame = frames[frame]
+        frame isa InferenceState || continue
+        frame.cycleid == ancestorid && break
+        @assert frame.cycleid > ancestorid
+        frame.cycleid = ancestorid
     end
 end
 
 function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
-    return mi === frame_instance(frame)
+    return mi === frame_instance(frame) && cache_owner(interp) === cache_owner(frame.interp)
 end
 
 function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
@@ -757,8 +790,8 @@ end
 # Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
 # frame matching `mi` is encountered, then there is a cycle in the call graph
 # (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
-# we "resolve" it by merging the call chain, which entails unioning each intermediary
-# frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
+# we "resolve" it by merging the call chain, which entails updating each intermediary
+# frame's `cycleid` field and adding the appropriate backedges. Finally,
 # we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
 # returned instead.
 function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
@@ -766,10 +799,11 @@ function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, pa
     # This works just because currently the `:terminate` condition guarantees that
     # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
     # We should revisit this once we have a better story for handling cycles in irinterp.
-    isa(parent, InferenceState) || return false
-    frame = parent
+    frames = parent.callstack::Vector{AbsIntState}
     uncached = false
-    while isa(frame, InferenceState)
+    for frame = reverse(1:length(frames))
+        frame = frames[frame]
+        isa(frame, InferenceState) || break
         uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
         if is_same_frame(interp, mi, frame)
             if uncached
@@ -779,20 +813,9 @@ function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, pa
                 poison_callstack!(parent, frame)
                 return true
             end
-            merge_call_chain!(interp, parent, frame, frame)
+            merge_call_chain!(interp, parent, frame)
             return frame
         end
-        for caller in callers_in_cycle(frame)
-            if is_same_frame(interp, mi, caller)
-                if uncached
-                    poison_callstack!(parent, frame)
-                    return true
-                end
-                merge_call_chain!(interp, parent, frame, caller)
-                return caller
-            end
-        end
-        frame = frame_parent(frame)
     end
     return false
 end
@@ -813,7 +836,7 @@ struct EdgeCallResult
     end
 end
 
-# return cached regular inference result
+# return cached result of regular inference
 function return_cached_result(::AbstractInterpreter, codeinst::CodeInstance, caller::AbsIntState)
     rt = cached_return_type(codeinst)
     effects = ipo_effects(codeinst)
@@ -824,24 +847,24 @@ end
 # compute (and cache) an inferred AST and return the current best estimate of the result type
 function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
     mi = specialize_method(method, atype, sparams)::MethodInstance
-    codeinst = get(code_cache(interp), mi, nothing)
+    cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
     force_inline = is_stmt_inline(get_curr_ssaflag(caller))
-    if codeinst isa CodeInstance # return existing rettype if the code is already inferred
-        inferred = @atomic :monotonic codeinst.inferred
-        if inferred === nothing && force_inline
-            # we already inferred this edge before and decided to discard the inferred code,
-            # nevertheless we re-infer it here again in order to propagate the re-inferred
-            # source to the inliner as a volatile result
-            cache_mode = CACHE_MODE_VOLATILE
-        else
-            @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-            return return_cached_result(interp, codeinst, caller)
+    let codeinst = get(code_cache(interp), mi, nothing)
+        if codeinst isa CodeInstance # return existing rettype if the code is already inferred
+            inferred = @atomic :monotonic codeinst.inferred
+            if inferred === nothing && force_inline
+                # we already inferred this edge before and decided to discard the inferred code,
+                # nevertheless we re-infer it here again in order to propagate the re-inferred
+                # source to the inliner as a volatile result
+                cache_mode = CACHE_MODE_VOLATILE
+            else
+                @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                return return_cached_result(interp, codeinst, caller)
+            end
         end
-    else
-        cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
-        add_remark!(interp, caller, "Inference is disabled for the target module")
+        add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module")
         return EdgeCallResult(Any, Any, nothing, Effects())
     end
     if !is_cached(caller) && frame_parent(caller) === nothing
@@ -852,42 +875,65 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         frame = resolve_call_cycle!(interp, mi, caller)
     end
     if frame === false
-        # completely new
-        lock_mi_inference(interp, mi)
+        # completely new, but check again after reserving in the engine
+        if cache_mode == CACHE_MODE_GLOBAL
+            ci = engine_reserve(interp, mi)
+            let codeinst = get(code_cache(interp), mi, nothing)
+                if codeinst isa CodeInstance # return existing rettype if the code is already inferred
+                    engine_reject(interp, ci)
+                    inferred = @atomic :monotonic codeinst.inferred
+                    if inferred === nothing && force_inline
+                        cache_mode = CACHE_MODE_VOLATILE
+                    else
+                        @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                        return return_cached_result(interp, codeinst, caller)
+                    end
+                end
+            end
+        end
         result = InferenceResult(mi, typeinf_lattice(interp))
+        if cache_mode == CACHE_MODE_GLOBAL
+            result.ci = ci
+        end
         frame = InferenceState(result, cache_mode, interp) # always use the cache for edge targets
         if frame === nothing
-            add_remark!(interp, caller, "Failed to retrieve source")
+            add_remark!(interp, caller, "[typeinf_edge] Failed to retrieve source")
             # can't get the source for this, so we know nothing
-            unlock_mi_inference(interp, mi)
+            if cache_mode == CACHE_MODE_GLOBAL
+                engine_reject(interp, ci)
+            end
             return EdgeCallResult(Any, Any, nothing, Effects())
         end
-        if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
-            frame.parent = caller
-        end
+        assign_parentchild!(frame, caller)
         typeinf(interp, frame)
         update_valid_age!(caller, frame.valid_worlds)
         isinferred = is_inferred(frame)
         edge = isinferred ? mi : nothing
-        effects = isinferred ? frame.result.ipo_effects : adjust_effects(Effects(), method) # effects are adjusted already within `finish` for ipo_effects
+        effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
+            adjust_effects(effects_for_cycle(frame.ipo_effects), method)
         exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
         # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
-        # note that this result is cached globally exclusively, we can use this local result destructively
-        volatile_inf_result = isinferred && (force_inline || src_inlining_policy(interp, result.src, NoCallInfo(), IR_FLAG_NULL) !== nothing) ?
-            VolatileInferenceResult(result) : nothing
+        # note that this result is cached globally exclusively, so we can use this local result destructively
+        volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
         return EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
     elseif frame === true
         # unresolvable cycle
+        add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle")
         return EdgeCallResult(Any, Any, nothing, Effects())
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
     update_valid_age!(caller, frame.valid_worlds)
-    effects = adjust_effects(Effects(), method)
+    effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
     exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
     return EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
 end
 
+# The `:terminates` effect bit must be conservatively tainted unless recursion cycle has
+# been fully resolved. As for other effects, there's no need to taint them at this moment
+# because they will be tainted as we try to resolve the cycle.
+effects_for_cycle(effects::Effects) = Effects(effects; terminates=false)
+
 function cached_return_type(code::CodeInstance)
     rettype = code.rettype
     isdefined(code, :rettype_const) || return rettype
@@ -925,9 +971,9 @@ function codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, @no
     tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
     tree.slotflags = fill(0x00, nargs)
     tree.ssavaluetypes = 1
-    tree.codelocs = Int32[1]
-    tree.linetable = LineInfoNode[LineInfoNode(method.module, method.name, method.file, method.line, Int32(0))]
+    tree.debuginfo = DebugInfo(mi)
     tree.ssaflags = UInt32[0]
+    tree.rettype = Core.Typeof(val)
     set_inlineable!(tree, true)
     tree.parent = mi
     return tree
@@ -944,12 +990,12 @@ function codeinstance_for_const_with_code(interp::AbstractInterpreter, code::Cod
     src = codeinfo_for_const(interp, code.def, code.rettype_const)
     return CodeInstance(code.def, cache_owner(interp), code.rettype, code.exctype, code.rettype_const, src,
         Int32(0x3), code.min_world, code.max_world,
-        code.ipo_purity_bits, code.purity_bits, code.analysis_results,
-        code.relocatability)
+        code.ipo_purity_bits, code.analysis_results,
+        code.relocatability, src.debuginfo)
 end
 
-result_is_constabi(interp::AbstractInterpreter, run_optimizer::Bool, result::InferenceResult) =
-    run_optimizer && may_discard_trees(interp) && is_result_constabi_eligible(result)
+result_is_constabi(interp::AbstractInterpreter, result::InferenceResult) =
+    may_discard_trees(interp) && is_result_constabi_eligible(result)
 
 # compute an inferred AST and return type
 typeinf_code(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
@@ -959,15 +1005,8 @@ typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype),
     typeinf_code(interp, specialize_method(method, atype, sparams), run_optimizer)
 function typeinf_code(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
     frame = typeinf_frame(interp, mi, run_optimizer)
-    frame === nothing && return nothing, Any
-    is_inferred(frame) || return nothing, Any
-    if result_is_constabi(interp, run_optimizer, frame.result)
-        rt = frame.result.result::Const
-        return codeinfo_for_const(interp, frame.linfo, rt.val), widenconst(rt)
-    end
-    code = frame.src
-    rt = widenconst(ignorelimited(frame.result.result))
-    return code, rt
+    frame === nothing && return nothing
+    return frame.src
 end
 
 """
@@ -989,17 +1028,14 @@ typeinf_ircode(interp::AbstractInterpreter, method::Method, @nospecialize(atype)
     typeinf_ircode(interp, specialize_method(method, atype, sparams), optimize_until)
 function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
                         optimize_until::Union{Integer,AbstractString,Nothing})
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
     frame = typeinf_frame(interp, mi, false)
     if frame === nothing
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
         return nothing, Any
     end
     (; result) = frame
     opt = OptimizationState(frame, interp)
     ir = run_passes_ipo_safe(opt.src, opt, result, optimize_until)
     rt = widenconst(ignorelimited(result.result))
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     return ir, rt
 end
 
@@ -1010,13 +1046,22 @@ typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype),
               run_optimizer::Bool) =
     typeinf_frame(interp, specialize_method(method, atype, sparams), run_optimizer)
 function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
     result = InferenceResult(mi, typeinf_lattice(interp))
-    cache_mode = run_optimizer ? :global : :no
-    frame = InferenceState(result, cache_mode, interp)
+    frame = InferenceState(result, #=cache_mode=#:no, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+    is_inferred(frame) || return nothing
+    if run_optimizer
+        if result_is_constabi(interp, frame.result)
+            rt = frame.result.result::Const
+            opt = codeinfo_for_const(interp, frame.linfo, rt.val)
+        else
+            opt = OptimizationState(frame, interp)
+            optimize(interp, opt, frame.result)
+            opt = ir_to_codeinf!(opt)
+        end
+        result.src = frame.src = opt
+    end
     return frame
 end
 
@@ -1057,18 +1102,9 @@ N.B.: The same caching considerations as SOURCE_MODE_ABI apply.
 """
 const SOURCE_MODE_FORCE_SOURCE = 0x2
 
-"""
-    SOURCE_MODE_FORCE_SOURCE_UNCACHED
-
-Like `SOURCE_MODE_FORCE_SOURCE`, but ensures that the resulting code instance is
-not part of the cache hierarchy, so the `->inferred` field may be safely used
-without the possibility of deletion by the compiler.
-"""
-const SOURCE_MODE_FORCE_SOURCE_UNCACHED = 0x3
-
 function ci_has_source(code::CodeInstance)
     inf = @atomic :monotonic code.inferred
-    return isa(inf, CodeInfo) || isa(inf, String)
+    return code.owner === nothing ? (isa(inf, CodeInfo) || isa(inf, String)) : inf !== nothing
 end
 
 """
@@ -1083,70 +1119,93 @@ function ci_has_abi(code::CodeInstance)
     return code.invoke !== C_NULL
 end
 
-function ci_meets_requirement(code::CodeInstance, source_mode::UInt8, ci_is_cached::Bool)
+function ci_meets_requirement(code::CodeInstance, source_mode::UInt8)
     source_mode == SOURCE_MODE_NOT_REQUIRED && return true
     source_mode == SOURCE_MODE_ABI && return ci_has_abi(code)
     source_mode == SOURCE_MODE_FORCE_SOURCE && return ci_has_source(code)
-    source_mode == SOURCE_MODE_FORCE_SOURCE_UNCACHED && return (!ci_is_cached && ci_has_source(code))
     return false
 end
 
-_uncompressed_ir(ci::Core.CodeInstance, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
+_uncompressed_ir(codeinst::CodeInstance, s::String) =
+    ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Any, Any), codeinst.def.def::Method, codeinst, s)
 
 # compute (and cache) an inferred AST and return type
 function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8)
     start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance
-        # see if this code already exists in the cache
-        if source_mode in (SOURCE_MODE_FORCE_SOURCE, SOURCE_MODE_FORCE_SOURCE_UNCACHED) && use_const_api(code)
-            code = codeinstance_for_const_with_code(interp, code)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            return code
-        end
-        if ci_meets_requirement(code, source_mode, true)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            return code
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            # see if this code already exists in the cache
+            if source_mode == SOURCE_MODE_FORCE_SOURCE && use_const_api(code)
+                code = codeinstance_for_const_with_code(interp, code)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
+            if ci_meets_requirement(code, source_mode)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
         end
     end
     def = mi.def
     if isa(def, Method)
         if ccall(:jl_get_module_infer, Cint, (Any,), def.module) == 0 && !generating_output(#=incremental=#false)
             src = retrieve_code_info(mi, get_inference_world(interp))
+            src isa CodeInfo || return nothing
             return CodeInstance(mi, cache_owner(interp), Any, Any, nothing, src, Int32(0),
                 get_inference_world(interp), get_inference_world(interp),
-                UInt32(0), UInt32(0), nothing, UInt8(0))
+                UInt32(0), nothing, UInt8(0), src.debuginfo)
+        end
+    end
+    ci = engine_reserve(interp, mi)
+    # check cache again if it is still new after reserving in the engine
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            # see if this code already exists in the cache
+            if source_mode == SOURCE_MODE_FORCE_SOURCE && use_const_api(code)
+                engine_reject(interp, ci)
+                code = codeinstance_for_const_with_code(interp, code)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
+            if ci_meets_requirement(code, source_mode)
+                engine_reject(interp, ci)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
         end
     end
-    lock_mi_inference(interp, mi)
     result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, #=cache_mode=#source_mode == SOURCE_MODE_FORCE_SOURCE_UNCACHED ? :volatile : :global, interp)
-    frame === nothing && return nothing
+    result.ci = ci
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
+    if frame === nothing
+        engine_reject(interp, ci)
+        return nothing
+    end
     typeinf(interp, frame)
     ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    if isdefined(result, :ci)
-        if ci_meets_requirement(result.ci, source_mode, true)
-            # Inference result was cacheable and is in global cache. Return it.
-            return result.ci
-        elseif use_const_api(result.ci)
-            code = codeinstance_for_const_with_code(interp, result.ci)
-            @assert ci_meets_requirement(code, source_mode, false)
-            return code
-        end
-    end
-    # Inference result is not cacheable or is was cacheable, but we do not want to
-    # store the source in the cache, but the caller wanted it anyway (e.g. for reflection).
-    # We construct a new CodeInstance for it that is not part of the cache hierarchy.
-    code = CodeInstance(interp, result, can_discard_trees=(
-        source_mode != SOURCE_MODE_FORCE_SOURCE && source_mode != SOURCE_MODE_FORCE_SOURCE_UNCACHED))
 
-    # If the caller cares about the code and this is constabi, still use our synthesis function
-    # anyway, because we will have not finished inferring the code inside the CodeInstance once
-    # we realized it was constabi, but we want reflection to pretend that we did.
-    if use_const_api(code) && source_mode in (SOURCE_MODE_FORCE_SOURCE, SOURCE_MODE_FORCE_SOURCE_UNCACHED)
-        return codeinstance_for_const_with_code(interp, code)
+    ci = result.ci # reload from result in case it changed
+    if source_mode == SOURCE_MODE_ABI && frame.cache_mode != CACHE_MODE_GLOBAL
+        # XXX: jl_type_infer somewhat ambiguously assumes this must be cached, while jl_ci_cache_lookup sort of ambiguously re-caches it
+        # XXX: this should be using the CI from the cache, if possible instead: haskey(cache, mi) && (ci = cache[mi])
+        @assert isdefined(ci, :inferred) "interpreter did not fulfill its requirements"
+        code_cache(interp)[mi] = ci
+    end
+    if source_mode == SOURCE_MODE_FORCE_SOURCE && use_const_api(ci)
+        # If the caller cares about the code and this is constabi, still use our synthesis function
+        # anyway, because we will have not finished inferring the code inside the CodeInstance once
+        # we realized it was constabi, but we want reflection to pretend that we did.
+        # XXX: the one user of this does not actually want this behavior, but it is required by the flag definition currently
+        ci = codeinstance_for_const_with_code(interp, ci)
+        @assert ci_meets_requirement(ci, source_mode)
+        return ci
+    end
+    if !ci_meets_requirement(ci, source_mode)
+        can_discard_trees = false
+        finish!(interp, frame; can_discard_trees) # redo finish! with the correct can_discard_trees parameter value
+        @assert ci_meets_requirement(ci, source_mode)
     end
-    return code
+    return ci
 end
 
 # compute (and cache) an inferred AST and return the inferred return type
@@ -1159,15 +1218,32 @@ end
 typeinf_type(interp::AbstractInterpreter, match::MethodMatch) =
     typeinf_type(interp, specialize_method(match))
 function typeinf_type(interp::AbstractInterpreter, mi::MethodInstance)
+    # n.b.: this could be replaced with @something(typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED), return nothing).rettype
     start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance
-        # see if this rettype already exists in the cache
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        return code.rettype
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            # see if this rettype already exists in the cache
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            return code.rettype
+        end
+    end
+    ci = engine_reserve(interp, mi)
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            engine_reject(interp, ci)
+            # see if this rettype already exists in the cache
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            return code.rettype
+        end
     end
     result = InferenceResult(mi, typeinf_lattice(interp))
-    typeinf(interp, result, :global)
+    result.ci = ci
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
+    if frame === nothing
+        engine_reject(interp, ci)
+        return nothing
+    end
+    typeinf(interp, frame)
     ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     is_inferred(result) || return nothing
     return widenconst(ignorelimited(result.result))
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 5987c30be2b91..86fa8af21615f 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -6,17 +6,42 @@
 
 # N.B.: Const/PartialStruct/InterConditional are defined in Core, to allow them to be used
 # inside the global code cache.
-#
-# # The type of a value might be constant
-# struct Const
-#     val
-# end
-#
-# struct PartialStruct
-#     typ
-#     fields::Vector{Any} # elements are other type lattice members
-# end
+
 import Core: Const, PartialStruct
+
+"""
+    struct Const
+        val
+    end
+
+The type representing a constant value.
+"""
+:(Const)
+
+"""
+    struct PartialStruct
+        typ
+        fields::Vector{Any} # elements are other type lattice members
+    end
+
+This extended lattice element is introduced when we have information about an object's
+fields beyond what can be obtained from the object type. E.g. it represents a tuple where
+some elements are known to be constants or a struct whose `Any`-typed field is initialized
+with `Int` values.
+
+- `typ` indicates the type of the object
+- `fields` holds the lattice elements corresponding to each field of the object
+
+If `typ` is a struct, `fields` represents the fields of the struct that are guaranteed to be
+initialized. For instance, if the length of `fields` of `PartialStruct` representing a
+struct with 4 fields is 3, the 4th field may not be initialized. If the length is 4, all
+fields are guaranteed to be initialized.
+
+If `typ` is a tuple, the last element of `fields` may be `Vararg`. In this case, it is
+guaranteed that the number of elements in the tuple is at least `length(fields)-1`, but the
+exact number of elements is unknown.
+"""
+:(PartialStruct)
 function PartialStruct(@nospecialize(typ), fields::Vector{Any})
     for i = 1:length(fields)
         assert_nested_slotwrapper(fields[i])
@@ -48,17 +73,27 @@ struct Conditional
     slot::Int
     thentype
     elsetype
-    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype))
+    # `isdefined` indicates this `Conditional` is from `@isdefined slot`, implying that
+    # the `undef` information of `slot` can be improved in the then branch.
+    # Since this is only beneficial for local inference, it is not translated into `InterConditional`.
+    isdefined::Bool
+    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype);
+                         isdefined::Bool=false)
         assert_nested_slotwrapper(thentype)
         assert_nested_slotwrapper(elsetype)
-        return new(slot, thentype, elsetype)
+        return new(slot, thentype, elsetype, isdefined)
     end
 end
-Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
-    Conditional(slot_id(var), thentype, elsetype)
+Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype); isdefined::Bool=false) =
+    Conditional(slot_id(var), thentype, elsetype; isdefined)
 
+import Core: InterConditional
 """
-    cnd::InterConditional
+    struct InterConditional
+        slot::Int
+        thentype
+        elsetype
+    end
 
 Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
 This is separate from `Conditional` to catch logic errors: the lattice element name is `InterConditional`
@@ -66,14 +101,6 @@ while processing a call, then `Conditional` everywhere else. Thus `InterConditio
 `CompilerTypes`—these type's usages are disjoint—though we define the lattice for `InterConditional`.
 """
 :(InterConditional)
-import Core: InterConditional
-# struct InterConditional
-#     slot::Int
-#     thentype
-#     elsetype
-#     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) =
-#         new(slot, thentype, elsetype)
-# end
 InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
     InterConditional(slot_id(var), thentype, elsetype)
 
@@ -120,8 +147,6 @@ end
 MustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
     MustAlias(slot_id(var), vartyp, fldidx, fldtyp)
 
-_uniontypes(x::MustAlias, ts) = _uniontypes(widenconst(x), ts)
-
 """
     alias::InterMustAlias
 
@@ -159,8 +184,8 @@ end
 struct StateUpdate
     var::SlotNumber
     vtype::VarState
-    state::VarTable
     conditional::Bool
+    StateUpdate(var::SlotNumber, vtype::VarState, conditional::Bool=false) = new(var, vtype, conditional)
 end
 
 """
@@ -286,11 +311,17 @@ end
 
 # `Conditional` and `InterConditional` are valid in opposite contexts
 # (i.e. local inference and inter-procedural call), as such they will never be compared
-@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional}
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::Conditional, b::Conditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#true)
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::InterConditional, b::InterConditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#false)
+@nospecializeinfer function _issubconditional(𝕃::AbstractLattice, a::C, b::C, check_isdefined::Bool) where C<:AnyConditional
     if is_same_conditionals(a, b)
-        if ⊑(lattice, a.thentype, b.thentype)
-            if ⊑(lattice, a.elsetype, b.elsetype)
-                return true
+        if ⊑(𝕃, a.thentype, b.thentype)
+            if ⊑(𝕃, a.elsetype, b.elsetype)
+                if !check_isdefined || a.isdefined ≥ b.isdefined
+                    return true
+                end
             end
         end
     end
@@ -394,8 +425,8 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ
 # =============
 
 @nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
-    r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b))
-    r || return false
+    ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b)) || return false
+
     isa(b, LimitedAccuracy) || return true
 
     # We've found that ignorelimited(a) ⊑ ignorelimited(b).
@@ -448,8 +479,13 @@ end
 @nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, PartialStruct)
         if isa(b, PartialStruct)
-            if !(length(a.fields) == length(b.fields) && a.typ <: b.typ)
-                return false
+            a.typ <: b.typ || return false
+            if length(a.fields) ≠ length(b.fields)
+                if !(isvarargtype(a.fields[end]) || isvarargtype(b.fields[end]))
+                    length(a.fields) ≥ length(b.fields) || return false
+                else
+                    return false
+                end
             end
             for i in 1:length(b.fields)
                 af = a.fields[i]
@@ -472,19 +508,25 @@ end
         return isa(b, Type) && a.typ <: b
     elseif isa(b, PartialStruct)
         if isa(a, Const)
-            nf = nfields(a.val)
-            nf == length(b.fields) || return false
             widea = widenconst(a)::DataType
             wideb = widenconst(b)
             wideb′ = unwrap_unionall(wideb)::DataType
             widea.name === wideb′.name || return false
-            # We can skip the subtype check if b is a Tuple, since in that
-            # case, the ⊑ of the elements is sufficient.
-            if wideb′.name !== Tuple.name && !(widea <: wideb)
-                return false
+            if wideb′.name === Tuple.name
+                # We can skip the subtype check if b is a Tuple, since in that
+                # case, the ⊑ of the elements is sufficient.
+                # But for tuple comparisons, we need their lengths to be the same for now.
+                # TODO improve accuracy for cases when `b` contains vararg element
+                nfields(a.val) == length(b.fields) || return false
+            else
+                widea <: wideb || return false
+                # for structs we need to check that `a` has more information than `b` that may be partially initialized
+                n_initialized(a) ≥ length(b.fields) || return false
             end
+            nf = nfields(a.val)
             for i in 1:nf
                 isdefined(a.val, i) || continue # since ∀ T Union{} ⊑ T
+                i > length(b.fields) && break # `a` has more information than `b` that is partially initialized struct
                 bfᵢ = b.fields[i]
                 if i == nf
                     bfᵢ = unwrapva(bfᵢ)
@@ -724,28 +766,6 @@ function invalidate_slotwrapper(vt::VarState, changeid::Int, ignore_conditional:
     return nothing
 end
 
-function stupdate!(lattice::AbstractLattice, state::VarTable, changes::StateUpdate)
-    changed = false
-    changeid = slot_id(changes.var)
-    for i = 1:length(state)
-        if i == changeid
-            newtype = changes.vtype
-        else
-            newtype = changes.state[i]
-        end
-        invalidated = invalidate_slotwrapper(newtype, changeid, changes.conditional)
-        if invalidated !== nothing
-            newtype = invalidated
-        end
-        oldtype = state[i]
-        if schanged(lattice, newtype, oldtype)
-            state[i] = smerge(lattice, oldtype, newtype)
-            changed = true
-        end
-    end
-    return changed
-end
-
 function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     changed = false
     for i = 1:length(state)
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index 318ac0b5c27e5..91a44d3b117ab 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -321,6 +321,11 @@ end
 # even after complicated recursion and other operations on it elsewhere
 const issimpleenoughtupleelem = issimpleenoughtype
 
+function n_initialized(t::Const)
+    nf = nfields(t.val)
+    return something(findfirst(i::Int->!isdefined(t.val,i), 1:nf), nf+1)-1
+end
+
 # A simplified type_more_complex query over the extended lattice
 # (assumes typeb ⊑ typea)
 @nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
@@ -328,6 +333,13 @@ const issimpleenoughtupleelem = issimpleenoughtype
     typea === typeb && return true
     if typea isa PartialStruct
         aty = widenconst(typea)
+        if typeb isa Const
+            @assert length(typea.fields) ≤ n_initialized(typeb) "typeb ⊑ typea is assumed"
+        elseif typeb isa PartialStruct
+            @assert length(typea.fields) ≤ length(typeb.fields) "typeb ⊑ typea is assumed"
+        else
+            return false
+        end
         for i = 1:length(typea.fields)
             ai = unwrapva(typea.fields[i])
             bi = fieldtype(aty, i)
@@ -572,34 +584,38 @@ end
 
 # N.B. This can also be called with both typea::Const and typeb::Const to
 # to recover PartialStruct from `Const`s with overlapping fields.
-@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
+@nospecializeinfer function tmerge_partial_struct(𝕃::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
     aty = widenconst(typea)
     bty = widenconst(typeb)
     if aty === bty
-        # must have egal here, since we do not create PartialStruct for non-concrete types
-        typea_nfields = nfields_tfunc(lattice, typea)
-        typeb_nfields = nfields_tfunc(lattice, typeb)
-        isa(typea_nfields, Const) || return nothing
-        isa(typeb_nfields, Const) || return nothing
-        type_nfields = typea_nfields.val::Int
-        type_nfields === typeb_nfields.val::Int || return nothing
-        type_nfields == 0 && return nothing
-        fields = Vector{Any}(undef, type_nfields)
-        anyrefine = false
-        for i = 1:type_nfields
-            ai = getfield_tfunc(lattice, typea, Const(i))
-            bi = getfield_tfunc(lattice, typeb, Const(i))
+        if typea isa PartialStruct
+            if typeb isa PartialStruct
+                nflds = min(length(typea.fields), length(typeb.fields))
+            else
+                nflds = min(length(typea.fields), n_initialized(typeb::Const))
+            end
+        elseif typeb isa PartialStruct
+            nflds = min(n_initialized(typea::Const), length(typeb.fields))
+        else
+            nflds = min(n_initialized(typea::Const), n_initialized(typeb::Const))
+        end
+        nflds == 0 && return nothing
+        fields = Vector{Any}(undef, nflds)
+        anyrefine = nflds > datatype_min_ninitialized(unwrap_unionall(aty))
+        for i = 1:nflds
+            ai = getfield_tfunc(𝕃, typea, Const(i))
+            bi = getfield_tfunc(𝕃, typeb, Const(i))
             # N.B.: We're assuming here that !isType(aty), because that case
             # only arises when typea === typeb, which should have been caught
             # before calling this.
             ft = fieldtype(aty, i)
-            if is_lattice_equal(lattice, ai, bi) || is_lattice_equal(lattice, ai, ft)
+            if is_lattice_equal(𝕃, ai, bi) || is_lattice_equal(𝕃, ai, ft)
                 # Since ai===bi, the given type has no restrictions on complexity.
                 # and can be used to refine ft
                 tyi = ai
-            elseif is_lattice_equal(lattice, bi, ft)
+            elseif is_lattice_equal(𝕃, bi, ft)
                 tyi = bi
-            elseif (tyi′ = tmerge_field(lattice, ai, bi); tyi′ !== nothing)
+            elseif (tyi′ = tmerge_field(𝕃, ai, bi); tyi′ !== nothing)
                 # allow external lattice implementation to provide a custom field-merge strategy
                 tyi = tyi′
             else
@@ -621,8 +637,8 @@ end
             end
             fields[i] = tyi
             if !anyrefine
-                anyrefine = has_nontrivial_extended_info(lattice, tyi) || # extended information
-                            ⋤(lattice, tyi, ft) # just a type-level information, but more precise than the declared type
+                anyrefine = has_nontrivial_extended_info(𝕃, tyi) || # extended information
+                            ⋤(𝕃, tyi, ft) # just a type-level information, but more precise than the declared type
             end
         end
         anyrefine && return PartialStruct(aty, fields)
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 64410d231adda..f315b7968fd9b 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -57,8 +57,6 @@ struct VarState
     VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
 end
 
-abstract type ForwardableArgtypes end
-
 struct AnalysisResults
     result
     next::AnalysisResults
@@ -70,16 +68,19 @@ end
 const NULL_ANALYSIS_RESULTS = AnalysisResults(nothing)
 
 """
-    InferenceResult(linfo::MethodInstance, [argtypes::ForwardableArgtypes, 𝕃::AbstractLattice])
+    result::InferenceResult
 
 A type that represents the result of running type inference on a chunk of code.
-
-See also [`matching_cache_argtypes`](@ref).
+There are two constructor available:
+- `InferenceResult(mi::MethodInstance, [𝕃::AbstractLattice])` for regular inference,
+  without extended lattice information included in `result.argtypes`.
+- `InferenceResult(mi::MethodInstance, argtypes::Vector{Any}, overridden_by_const::BitVector)`
+  for constant inference, with extended lattice information included in `result.argtypes`.
 """
 mutable struct InferenceResult
     const linfo::MethodInstance
     const argtypes::Vector{Any}
-    const overridden_by_const::BitVector
+    const overridden_by_const::Union{Nothing,BitVector}
     result                   # extended lattice element if inferred, nothing otherwise
     exc_result               # like `result`, but for the thrown value
     src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
@@ -88,19 +89,16 @@ mutable struct InferenceResult
     effects::Effects         # if optimization is finished
     analysis_results::AnalysisResults # AnalysisResults with e.g. result::ArgEscapeCache if optimized, otherwise NULL_ANALYSIS_RESULTS
     is_src_volatile::Bool    # `src` has been cached globally as the compressed format already, allowing `src` to be used destructively
-    ci::CodeInstance         # CodeInstance if this result has been added to the cache
-    function InferenceResult(linfo::MethodInstance, cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-        # def = linfo.def
-        # nargs = def isa Method ? Int(def.nargs) : 0
-        # @assert length(cache_argtypes) == nargs
-        return new(linfo, cache_argtypes, overridden_by_const, nothing, nothing, nothing,
+    ci::CodeInstance         # CodeInstance if this result may be added to the cache
+    function InferenceResult(mi::MethodInstance, argtypes::Vector{Any}, overridden_by_const::Union{Nothing,BitVector})
+        return new(mi, argtypes, overridden_by_const, nothing, nothing, nothing,
             WorldRange(), Effects(), Effects(), NULL_ANALYSIS_RESULTS, false)
     end
 end
-InferenceResult(linfo::MethodInstance, 𝕃::AbstractLattice=fallback_lattice) =
-    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo)...)
-InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes, 𝕃::AbstractLattice=fallback_lattice) =
-    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo, argtypes)...)
+function InferenceResult(mi::MethodInstance, 𝕃::AbstractLattice=fallback_lattice)
+    argtypes = matching_cache_argtypes(𝕃, mi)
+    return InferenceResult(mi, argtypes, #=overridden_by_const=#nothing)
+end
 
 function stack_analysis_result!(inf_result::InferenceResult, @nospecialize(result))
     return inf_result.analysis_results = AnalysisResults(result, inf_result.analysis_results)
@@ -158,11 +156,6 @@ Parameters that control abstract interpretation-based type inference operation.
   information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
   more fine-grained control on this configuration with per-method annotation basis.
 ---
-- `inf_params.unoptimize_throw_blocks::Bool = true`\\
-  If `true`, skips inferring calls that are in a block that is known to `throw`.
-  It may improve the compiler latency without sacrificing the runtime performance
-  in common situations.
----
 - `inf_params.assume_bindings_static::Bool = false`\\
   If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
   inference time can be assumed to always not exist at runtime (and thus e.g. any access to
@@ -178,7 +171,6 @@ struct InferenceParams
     tuple_complexity_limit_depth::Int
     ipo_constant_propagation::Bool
     aggressive_constant_propagation::Bool
-    unoptimize_throw_blocks::Bool
     assume_bindings_static::Bool
     ignore_recursion_hardlimit::Bool
 
@@ -190,7 +182,6 @@ struct InferenceParams
         tuple_complexity_limit_depth::Int,
         ipo_constant_propagation::Bool,
         aggressive_constant_propagation::Bool,
-        unoptimize_throw_blocks::Bool,
         assume_bindings_static::Bool,
         ignore_recursion_hardlimit::Bool)
         return new(
@@ -201,21 +192,19 @@ struct InferenceParams
             tuple_complexity_limit_depth,
             ipo_constant_propagation,
             aggressive_constant_propagation,
-            unoptimize_throw_blocks,
             assume_bindings_static,
             ignore_recursion_hardlimit)
     end
 end
 function InferenceParams(
     params::InferenceParams = InferenceParams( # default constructor
-        #=max_methods::Int=# 3,
+        #=max_methods::Int=# BuildSettings.MAX_METHODS,
         #=max_union_splitting::Int=# 4,
         #=max_apply_union_enum::Int=# 8,
         #=max_tuple_splat::Int=# 32,
         #=tuple_complexity_limit_depth::Int=# 3,
         #=ipo_constant_propagation::Bool=# true,
         #=aggressive_constant_propagation::Bool=# false,
-        #=unoptimize_throw_blocks::Bool=# true,
         #=assume_bindings_static::Bool=# false,
         #=ignore_recursion_hardlimit::Bool=# false);
     max_methods::Int = params.max_methods,
@@ -225,7 +214,6 @@ function InferenceParams(
     tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
     ipo_constant_propagation::Bool = params.ipo_constant_propagation,
     aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
-    unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
     assume_bindings_static::Bool = params.assume_bindings_static,
     ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
     return InferenceParams(
@@ -236,7 +224,6 @@ function InferenceParams(
         tuple_complexity_limit_depth,
         ipo_constant_propagation,
         aggressive_constant_propagation,
-        unoptimize_throw_blocks,
         assume_bindings_static,
         ignore_recursion_hardlimit)
 end
@@ -261,10 +248,6 @@ Parameters that control optimizer operation.
   tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
   be added to `opt_params.inline_cost_threshold` when making inlining decision.
 ---
-- `opt_params.inline_error_path_cost::Int = 20`\\
-  Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
-  `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
----
 - `opt_params.max_tuple_splat::Int = 32`\\
   When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
   contains more than this many elements.
@@ -291,7 +274,6 @@ struct OptimizationParams
     inline_cost_threshold::Int
     inline_nonleaf_penalty::Int
     inline_tupleret_bonus::Int
-    inline_error_path_cost::Int
     max_tuple_splat::Int
     compilesig_invokes::Bool
     assume_fatal_throw::Bool
@@ -302,7 +284,6 @@ struct OptimizationParams
         inline_cost_threshold::Int,
         inline_nonleaf_penalty::Int,
         inline_tupleret_bonus::Int,
-        inline_error_path_cost::Int,
         max_tuple_splat::Int,
         compilesig_invokes::Bool,
         assume_fatal_throw::Bool,
@@ -312,7 +293,6 @@ struct OptimizationParams
             inline_cost_threshold,
             inline_nonleaf_penalty,
             inline_tupleret_bonus,
-            inline_error_path_cost,
             max_tuple_splat,
             compilesig_invokes,
             assume_fatal_throw,
@@ -325,7 +305,6 @@ function OptimizationParams(
         #=inline_cost_threshold::Int=# 100,
         #=inline_nonleaf_penalty::Int=# 1000,
         #=inline_tupleret_bonus::Int=# 250,
-        #=inline_error_path_cost::Int=# 20,
         #=max_tuple_splat::Int=# 32,
         #=compilesig_invokes::Bool=# true,
         #=assume_fatal_throw::Bool=# false,
@@ -334,7 +313,6 @@ function OptimizationParams(
     inline_cost_threshold::Int = params.inline_cost_threshold,
     inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
     inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
-    inline_error_path_cost::Int = params.inline_error_path_cost,
     max_tuple_splat::Int = params.max_tuple_splat,
     compilesig_invokes::Bool = params.compilesig_invokes,
     assume_fatal_throw::Bool = params.assume_fatal_throw,
@@ -344,7 +322,6 @@ function OptimizationParams(
         inline_cost_threshold,
         inline_nonleaf_penalty,
         inline_tupleret_bonus,
-        inline_error_path_cost,
         max_tuple_splat,
         compilesig_invokes,
         assume_fatal_throw,
@@ -404,36 +381,15 @@ get_inference_world(interp::NativeInterpreter) = interp.world
 get_inference_cache(interp::NativeInterpreter) = interp.inf_cache
 cache_owner(interp::NativeInterpreter) = nothing
 
-"""
-    already_inferred_quick_test(::AbstractInterpreter, ::MethodInstance)
-
-For the `NativeInterpreter`, we don't need to do an actual cache query to know if something
-was already inferred. If we reach this point, but the inference flag has been turned off,
-then it's in the cache. This is purely for a performance optimization.
-"""
-already_inferred_quick_test(interp::NativeInterpreter, mi::MethodInstance) = !mi.inInference
-already_inferred_quick_test(interp::AbstractInterpreter, mi::MethodInstance) = false
+engine_reserve(interp::AbstractInterpreter, mi::MethodInstance) = engine_reserve(mi, cache_owner(interp))
+engine_reserve(mi::MethodInstance, @nospecialize owner) = ccall(:jl_engine_reserve, Any, (Any, Any), mi, owner)::CodeInstance
+# engine_fulfill(::AbstractInterpreter, ci::CodeInstance, src::CodeInfo) = ccall(:jl_engine_fulfill, Cvoid, (Any, Any), ci, src) # currently the same as engine_reject, so just use that one
+engine_reject(::AbstractInterpreter, ci::CodeInstance) = ccall(:jl_engine_fulfill, Cvoid, (Any, Ptr{Cvoid}), ci, C_NULL)
 
-"""
-    lock_mi_inference(::AbstractInterpreter, mi::MethodInstance)
-
-Hint that `mi` is in inference to help accelerate bootstrapping.
-This is particularly used by `NativeInterpreter` and helps us limit the amount of wasted
-work we might do when inference is working on initially inferring itself by letting us
-detect when inference is already in progress and not running a second copy on it.
-This creates a data-race, but the entry point into this code from C (`jl_type_infer`)
-already includes detection and restriction on recursion, so it is hopefully mostly a
-benign problem, since it should really only happen during the first phase of bootstrapping
-that we encounter this flag.
-"""
-lock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
-lock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
 
-"""
-See `lock_mi_inference`.
-"""
-unlock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
-unlock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
+function already_inferred_quick_test end
+function lock_mi_inference end
+function unlock_mi_inference end
 
 """
     add_remark!(::AbstractInterpreter, sv::InferenceState, remark)
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index a4499e003cf2c..577452a873b5e 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -18,7 +18,7 @@ function hasuniquerep(@nospecialize t)
     iskindtype(typeof(t)) || return true # non-types are always compared by egal in the type system
     isconcretetype(t) && return true # these are also interned and pointer comparable
     if isa(t, DataType) && t.name !== Tuple.name && !isvarargtype(t) # invariant DataTypes
-        return _all(hasuniquerep, t.parameters)
+        return all(hasuniquerep, t.parameters)
     end
     return false
 end
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index c2c36920bd574..b3dfd73d53452 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -48,15 +48,6 @@ anymap(f::Function, a::Array{Any,1}) = Any[ f(a[i]) for i in 1:length(a) ]
 
 _topmod(m::Module) = ccall(:jl_base_relative_to, Any, (Any,), m)::Module
 
-function istopfunction(@nospecialize(f), name::Symbol)
-    tn = typeof(f).name
-    if tn.mt.name === name
-        top = _topmod(tn.module)
-        return isdefined(top, name) && isconst(top, name) && f === getglobal(top, name)
-    end
-    return false
-end
-
 #######
 # AST #
 #######
@@ -83,7 +74,27 @@ const MAX_INLINE_CONST_SIZE = 256
 
 function count_const_size(@nospecialize(x), count_self::Bool = true)
     (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
-    ismutable(x) && return MAX_INLINE_CONST_SIZE + 1
+    if ismutable(x)
+        # No definite size
+        (isa(x, GenericMemory) || isa(x, String) || isa(x, SimpleVector)) &&
+            return MAX_INLINE_CONST_SIZE + 1
+        if isa(x, Module)
+            # We allow modules, because we already assume they are externally
+            # rooted, so we count their contents as 0 size.
+            return sizeof(Ptr{Cvoid})
+        end
+        # We allow mutable types with no mutable fields (i.e. those mutable
+        # types used for identity only). The intent of this function is to
+        # prevent the rooting of large amounts of data that may have been
+        # speculatively computed. If the struct can get mutated later, we
+        # cannot assess how much data we might end up rooting. However, if
+        # the struct is mutable only for identity, the query still works.
+        for i = 1:nfields(x)
+            if !isconst(typeof(x), i)
+                return MAX_INLINE_CONST_SIZE + 1
+            end
+        end
+    end
     isbits(x) && return Core.sizeof(x)
     dt = typeof(x)
     sz = count_self ? sizeof(dt) : 0
@@ -118,38 +129,56 @@ use_const_api(li::CodeInstance) = invoke_api(li) == 2
 
 function get_staged(mi::MethodInstance, world::UInt)
     may_invoke_generator(mi) || return nothing
+    cache_ci = (mi.def::Method).generator isa Core.CachedGenerator ?
+        RefValue{CodeInstance}() : nothing
     try
-        # user code might throw errors – ignore them
-        ci = ccall(:jl_code_for_staged, Any, (Any, UInt), mi, world)::CodeInfo
-        return ci
-    catch
+        return call_get_staged(mi, world, cache_ci)
+    catch # user code might throw errors – ignore them
         return nothing
     end
 end
 
-function retrieve_code_info(linfo::MethodInstance, world::UInt)
-    def = linfo.def
+# enable caching of unoptimized generated code if the generator is `CachedGenerator`
+function call_get_staged(mi::MethodInstance, world::UInt, cache_ci::RefValue{CodeInstance})
+    token = @_gc_preserve_begin cache_ci
+    cache_ci_ptr = pointer_from_objref(cache_ci)
+    src = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{CodeInstance}), mi, world, cache_ci_ptr)
+    @_gc_preserve_end token
+    return src
+end
+function call_get_staged(mi::MethodInstance, world::UInt, ::Nothing)
+    return ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), mi, world, C_NULL)
+end
+
+function get_cached_uninferred(mi::MethodInstance, world::UInt)
+    ccall(:jl_cached_uninferred, Any, (Any, UInt), mi.cache, world)::CodeInstance
+end
+
+function retrieve_code_info(mi::MethodInstance, world::UInt)
+    def = mi.def
     if !isa(def, Method)
-        return linfo.uninferred
-    end
-    c = nothing
-    if isdefined(def, :generator)
-        # user code might throw errors – ignore them
-        c = get_staged(linfo, world)
+        ci = get_cached_uninferred(mi, world)
+        src = ci.inferred
+        # Inference may corrupt the src, which is fine, because this is a
+        # (short-lived) top-level thunk, but set it to NULL anyway, so we
+        # can catch it if somebody tries to read it again by accident.
+        # @atomic ci.inferred = C_NULL
+        return src
     end
+    c = hasgenerator(def) ? get_staged(mi, world) : nothing
     if c === nothing && isdefined(def, :source)
         src = def.source
         if src === nothing
             # can happen in images built with --strip-ir
             return nothing
         elseif isa(src, String)
-            c = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), def, C_NULL, src)
+            c = ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Ptr{Cvoid}, Any), def, C_NULL, src)
         else
             c = copy(src::CodeInfo)
         end
     end
     if c isa CodeInfo
-        c.parent = linfo
+        c.parent = mi
         return c
     end
     return nothing
@@ -433,67 +462,6 @@ function find_ssavalue_uses!(uses::Vector{BitSet}, e::PhiNode, line::Int)
     end
 end
 
-function is_throw_call(e::Expr, code::Vector{Any})
-    if e.head === :call
-        f = e.args[1]
-        if isa(f, SSAValue)
-            f = code[f.id]
-        end
-        if isa(f, GlobalRef)
-            ff = abstract_eval_globalref_type(f)
-            if isa(ff, Const) && ff.val === Core.throw
-                return true
-            end
-        end
-    end
-    return false
-end
-
-function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Tuple{Int, Int}})
-    for stmt in find_throw_blocks(src.code, handler_at)
-        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
-    end
-    return nothing
-end
-
-function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Tuple{Int, Int}})
-    stmts = BitSet()
-    n = length(code)
-    for i in n:-1:1
-        s = code[i]
-        if isa(s, Expr)
-            if s.head === :gotoifnot
-                if i+1 in stmts && s.args[2]::Int in stmts
-                    push!(stmts, i)
-                end
-            elseif s.head === :return
-                # see `ReturnNode` handling
-            elseif is_throw_call(s, code)
-                if handler_at[i][1] == 0
-                    push!(stmts, i)
-                end
-            elseif i+1 in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, ReturnNode)
-            # NOTE: it potentially makes sense to treat unreachable nodes
-            # (where !isdefined(s, :val)) as `throw` points, but that can cause
-            # worse codegen around the call site (issue #37558)
-        elseif isa(s, GotoNode)
-            if s.label in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, GotoIfNot)
-            if i+1 in stmts && s.dest in stmts
-                push!(stmts, i)
-            end
-        elseif i+1 in stmts
-            push!(stmts, i)
-        end
-    end
-    return stmts
-end
-
 # using a function to ensure we can infer this
 @inline function slot_id(s)
     isa(s, SlotNumber) && return s.id
@@ -504,8 +472,6 @@ end
 # options #
 ###########
 
-is_root_module(m::Module) = false
-
 inlining_enabled() = (JLOptions().can_inline == 1)
 
 function coverage_enabled(m::Module)
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index fe3d002a43d6e..78db5ef5e4ed8 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -9,11 +9,10 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :(&) => 1:1,
     :(=) => 2:2,
     :method => 1:4,
-    :const => 1:1,
+    :const => 1:2,
     :new => 1:typemax(Int),
     :splatnew => 2:2,
     :the_exception => 0:0,
-    :enter => 1:2,
     :leave => 1:typemax(Int),
     :pop_exception => 1:1,
     :inbounds => 1:1,
@@ -23,9 +22,10 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :copyast => 1:1,
     :meta => 0:typemax(Int),
     :global => 1:1,
+    :globaldecl => 2:2,
     :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, (cconv, effects), args..., roots...
     :cfunction => 5:5,
-    :isdefined => 1:1,
+    :isdefined => 1:2,
     :code_coverage_effect => 0:0,
     :loopinfo => 0:typemax(Int),
     :gc_preserve_begin => 0:typemax(Int),
@@ -34,10 +34,11 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :throw_undef_if_not => 2:2,
     :aliasscope => 0:0,
     :popaliasscope => 0:0,
-    :new_opaque_closure => 4:typemax(Int),
+    :new_opaque_closure => 5:typemax(Int),
     :import => 1:typemax(Int),
     :using => 1:typemax(Int),
     :export => 1:typemax(Int),
+    :public => 1:typemax(Int),
 )
 
 # @enum isn't defined yet, otherwise I'd use it for this
@@ -64,17 +65,17 @@ struct InvalidCodeError <: Exception
 end
 InvalidCodeError(kind::AbstractString) = InvalidCodeError(kind, nothing)
 
-function maybe_validate_code(linfo::MethodInstance, src::CodeInfo, kind::String)
+function maybe_validate_code(mi::MethodInstance, src::CodeInfo, kind::String)
     if is_asserts()
-        errors = validate_code(linfo, src)
+        errors = validate_code(mi, src)
         if !isempty(errors)
             for e in errors
-                if linfo.def isa Method
+                if mi.def isa Method
                     println(stderr, "WARNING: Encountered invalid ", kind, " code for method ",
-                            linfo.def, ": ", e)
+                            mi.def, ": ", e)
                 else
                     println(stderr, "WARNING: Encountered invalid ", kind, " code for top level expression in ",
-                            linfo.def, ": ", e)
+                            mi.def, ": ", e)
                 end
             end
             error("")
@@ -147,7 +148,7 @@ function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_le
             elseif head === :call || head === :invoke || x.head === :invoke_modify ||
                 head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
-                head === :const || head === :enter || head === :leave || head === :pop_exception ||
+                head === :const || head === :leave || head === :pop_exception ||
                 head === :method || head === :global || head === :static_parameter ||
                 head === :new || head === :splatnew || head === :thunk || head === :loopinfo ||
                 head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline
@@ -256,7 +257,9 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast, :new_opaque_closure)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call,
+        :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast,
+        :new_opaque_closure)
         return true
     end
     return false
diff --git a/base/complex.jl b/base/complex.jl
index eb5dbb2fbe629..fbcb8def5a0a0 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -1037,24 +1037,22 @@ end
 function atanh(z::Complex{T}) where T
     z = float(z)
     Tf = float(T)
-    Ω = prevfloat(typemax(Tf))
-    θ = sqrt(Ω)/4
-    ρ = 1/θ
     x, y = reim(z)
     ax = abs(x)
     ay = abs(y)
+    θ = sqrt(floatmax(Tf))/4
     if ax > θ || ay > θ #Prevent overflow
         if isnan(y)
             if isinf(x)
                 return Complex(copysign(zero(x),x), y)
             else
-                return Complex(real(1/z), y)
+                return Complex(real(inv(z)), y)
             end
         end
         if isinf(y)
             return Complex(copysign(zero(x),x), copysign(oftype(y,pi)/2, y))
         end
-        return Complex(real(1/z), copysign(oftype(y,pi)/2, y))
+        return Complex(real(inv(z)), copysign(oftype(y,pi)/2, y))
     end
     β = copysign(one(Tf), x)
     z *= β
@@ -1064,16 +1062,15 @@ function atanh(z::Complex{T}) where T
             ξ = oftype(x, Inf)
             η = y
         else
-            ym = ay+ρ
-            ξ = log(sqrt(sqrt(4+y*y))/sqrt(ym))
-            η = copysign(oftype(y,pi)/2 + atan(ym/2), y)/2
+            ξ = log(sqrt(sqrt(muladd(y, y, 4)))/sqrt(ay))
+            η = copysign(oftype(y,pi)/2 + atan(ay/2), y)/2
         end
     else #Normal case
-        ysq = (ay+ρ)^2
+        ysq = ay^2
         if x == 0
             ξ = x
         else
-            ξ = log1p(4x/((1-x)^2 + ysq))/4
+            ξ = log1p(4x/(muladd(1-x, 1-x, ysq)))/4
         end
         η = angle(Complex((1-x)*(1+x)-ysq, 2y))/2
     end
diff --git a/base/condition.jl b/base/condition.jl
index 9f62593afaf77..fd771c9be346a 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -69,6 +69,8 @@ struct GenericCondition{L<:AbstractLock}
     GenericCondition(l::AbstractLock) = new{typeof(l)}(IntrusiveLinkedList{Task}(), l)
 end
 
+show(io::IO, c::GenericCondition) = print(io, GenericCondition, "(", c.lock, ")")
+
 assert_havelock(c::GenericCondition) = assert_havelock(c.lock)
 lock(c::GenericCondition) = lock(c.lock)
 unlock(c::GenericCondition) = unlock(c.lock)
@@ -103,17 +105,16 @@ end
 """
     wait([x])
 
-Block the current task until some event occurs, depending on the type of the argument:
+Block the current task until some event occurs.
 
 * [`Channel`](@ref): Wait for a value to be appended to the channel.
 * [`Condition`](@ref): Wait for [`notify`](@ref) on a condition and return the `val`
-  parameter passed to `notify`. Waiting on a condition additionally allows passing
-  `first=true` which results in the waiter being put _first_ in line to wake up on `notify`
-  instead of the usual first-in-first-out behavior.
+  parameter passed to `notify`. See the `Condition`-specific docstring of `wait` for
+  the exact behavior.
 * `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process
   can be used to determine success or failure.
-* [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, a
-  `TaskFailedException` (which wraps the failed task) is thrown.
+* [`Task`](@ref): Wait for a `Task` to finish. See the `Task`-specific docstring of `wait` for
+  the exact behavior.
 * [`RawFD`](@ref): Wait for changes on a file descriptor (see the `FileWatching` package).
 
 If no argument is passed, the task blocks for an undefined period. A task can only be
@@ -122,6 +123,16 @@ restarted by an explicit call to [`schedule`](@ref) or [`yieldto`](@ref).
 Often `wait` is called within a `while` loop to ensure a waited-for condition is met before
 proceeding.
 """
+function wait end
+
+"""
+    wait(c::GenericCondition; first::Bool=false)
+
+Wait for [`notify`](@ref) on `c` and return the `val` parameter passed to `notify`.
+
+If the keyword `first` is set to `true`, the waiter will be put _first_
+in line to wake up on `notify`. Otherwise, `wait` has first-in-first-out (FIFO) behavior.
+"""
 function wait(c::GenericCondition; first::Bool=false)
     ct = current_task()
     _wait2(c, ct, first)
@@ -129,7 +140,7 @@ function wait(c::GenericCondition; first::Bool=false)
     try
         return wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     finally
         relockall(c.lock, token)
@@ -185,6 +196,8 @@ This object is NOT thread-safe. See [`Threads.Condition`](@ref) for a thread-saf
 """
 const Condition = GenericCondition{AlwaysLockedST}
 
+show(io::IO, ::Condition) = print(io, Condition, "()")
+
 lock(c::GenericCondition{AlwaysLockedST}) =
     throw(ArgumentError("`Condition` is not thread-safe. Please use `Threads.Condition` instead for multi-threaded code."))
 unlock(c::GenericCondition{AlwaysLockedST}) =
diff --git a/base/cpuid.jl b/base/cpuid.jl
index 48930d8064ba9..f653ba27b4bcd 100644
--- a/base/cpuid.jl
+++ b/base/cpuid.jl
@@ -21,7 +21,7 @@ Base.:<=(a::ISA, b::ISA) = a.features <= b.features
 Base.:<(a::ISA,  b::ISA) = a.features <  b.features
 Base.isless(a::ISA,  b::ISA) = a < b
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
+include(string(Base.BUILDROOT, "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
 
 # Keep in sync with `arch_march_isa_mapping`.
 const ISAs_by_family = Dict(
@@ -64,7 +64,11 @@ const ISAs_by_family = Dict(
     "powerpc64le" => [
         # We have no way to test powerpc64le features yet, so we're only going to declare the lowest ISA:
         "power8" => ISA(Set{UInt32}()),
-    ]
+    ],
+    "riscv64" => [
+        # We have no way to test riscv64 features yet, so we're only going to declare the lowest ISA:
+        "riscv64" => ISA(Set{UInt32}()),
+    ],
 )
 
 # Test a CPU feature exists on the currently-running host
diff --git a/base/deepcopy.jl b/base/deepcopy.jl
index bee1f7994a150..c4f9ae1a6cb10 100644
--- a/base/deepcopy.jl
+++ b/base/deepcopy.jl
@@ -37,7 +37,7 @@ deepcopy_internal(x::Module, stackdict::IdDict) = error("deepcopy of Modules not
 
 function deepcopy_internal(x::SimpleVector, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = Core.svec(Any[deepcopy_internal(x[i], stackdict) for i = 1:length(x)]...)
     stackdict[x] = y
@@ -46,7 +46,7 @@ end
 
 function deepcopy_internal(x::String, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = GC.@preserve x unsafe_string(pointer(x), sizeof(x))
     stackdict[x] = y
@@ -58,7 +58,7 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     nf = nfields(x)
     if ismutable(x)
         if haskey(stackdict, x)
-            return stackdict[x]
+            return stackdict[x]::typeof(x)
         end
         y = ccall(:jl_new_struct_uninit, Any, (Any,), T)
         stackdict[x] = y
@@ -105,16 +105,16 @@ function _deepcopy_memory_t(@nospecialize(x::Memory), T, stackdict::IdDict)
     end
     dest = typeof(x)(undef, length(x))
     stackdict[x] = dest
-    xr = Core.memoryref(x)
-    dr = Core.memoryref(dest)
+    xr = memoryref(x)
+    dr = memoryref(dest)
     for i = 1:length(x)
-        xi = Core.memoryref(xr, i, false)
+        xi = Core.memoryrefnew(xr, i, false)
         if Core.memoryref_isassigned(xi, :not_atomic, false)
             xi = Core.memoryrefget(xi, :not_atomic, false)
             if !isbits(xi)
                 xi = deepcopy_internal(xi, stackdict)::typeof(xi)
             end
-            di = Core.memoryref(dr, i, false)
+            di = Core.memoryrefnew(dr, i, false)
             Core.memoryrefset!(di, xi, :not_atomic, false)
         end
     end
@@ -131,9 +131,9 @@ function deepcopy_internal(x::GenericMemoryRef, stackdict::IdDict)
         return stackdict[x]::typeof(x)
     end
     mem = getfield(x, :mem)
-    dest = GenericMemoryRef(deepcopy_internal(mem, stackdict)::typeof(mem))
+    dest = memoryref(deepcopy_internal(mem, stackdict)::typeof(mem))
     i = memoryrefoffset(x)
-    i == 1 || (dest = Core.memoryref(dest, i, true))
+    i == 1 || (dest = Core.memoryrefnew(dest, i, true))
     return dest
 end
 
@@ -157,7 +157,7 @@ end
 
 function deepcopy_internal(x::AbstractLock, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = typeof(x)()
     stackdict[x] = y
@@ -166,7 +166,7 @@ end
 
 function deepcopy_internal(x::GenericCondition, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = typeof(x)(deepcopy_internal(x.lock, stackdict))
     stackdict[x] = y
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 5cb054f426344..f88a53526aa37 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -1,5 +1,114 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Internal changes mechanism.
+# Instructions for Julia Core Developers:
+# 1. When making a breaking change that is known to be depnedet upon by an
+#    important and closely coupled package, decide on a unique `change_name`
+#    for your PR and add it to the list below. In general, is is better to
+#    err on the side of caution and assign a `change_name` even if it is not
+#    clear that it is required. `change_name`s may also be assigned after the
+#    fact in a separate PR. (Note that this may cause packages to misbehave
+#    on versions in between the change and the assignment of the `change_name`,
+#    but this is often still better than the alternative of misbehaving on unknown
+#    versions).
+
+# Instructions for Release Managers:
+# 1. Upon tagging any release, clear the list of internal changes.
+# 2. Upon tagging an -alpha version
+#    a. On master, set __next_removal_version to v"1.(x+1)-alpha"
+#    b. On the release branch, set __next_removal_version to v"1.x" (no -alpha)
+# 3. Upong tagging a release candidate, clear the list of internal changes and
+#    set __next_removal_version to `nothing`.
+const __next_removal_version = v"1.12-alpha"
+const __internal_changes_list = (
+    :invertedlinetables,
+    :codeinforefactor,
+    :miuninferredrm,
+    :codeinfonargs,  # #54341
+    :ocnopartial,
+    # Add new change names above this line
+)
+
+if !isempty(__internal_changes_list)
+    if VERSION == __next_removal_version
+        error("You have tagged a new release without clearing the internal changes list.")
+    end
+elseif __next_removal_version === nothing
+    error("You have tagged a new release candidate without clearing the internal changes list.")
+end
+
+"""
+    __has_internal_change(version_or::VersionNumber, change_name::Symbol)
+
+Some Julia packages have known dependencies on Julia internals (e.g. for introspection of
+internal julia datastructures). To ease the co-development of such packages with julia,
+a `change_name` is assigned on a best-effort basis or when explicitly requested.
+This `change_name` can be used to probe whether or not the particular pre-release build of julia has
+a particular change. In particular this function tests change scheduled for `version_or`
+is present in our current julia build, either because our current version
+is greater than `version_or` or because we're running a pre-release build that
+includes the change.
+
+Using this mechanism is a superior alternative to commit-number based `VERSION`
+comparisons, which can be brittle during pre-release stages when there are multiple
+actively developed branches.
+
+The list of changes is cleared twice during the release process:
+1. With the release of the first alpha
+2. For the first release candidate
+
+No new `change_name`s will be added during release candidates or bugfix releases
+(so in particular on any released version, the list of changes will be empty and
+`__has_internal_change` will always be equivalent to a version comparison.
+
+# Example
+
+Julia version `v"1.12.0-DEV.173"` changed the internal representation of line number debug info.
+Several debugging packages have custom code to display this information and need to be changed
+accordingly. In previous practice, this would often be accomplished with something like the following
+```
+@static if VERSION > v"1.12.0-DEV.173"
+    # Code to handle new format
+else
+    # Code to handle old format
+end
+```
+
+However, because such checks cannot be introduced until a VERSION number is assigned
+(which also automatically pushes out the change to all nightly users), there was a builtin period
+of breakage. With `__has_internal_change`, this can instead be written as:
+
+```
+@static if __has_internal_change(v"1.12-alpha", :invertedlinenames)
+    # Code to handle new format
+else
+    # Code to handle old format
+end
+```
+
+To find out the correct verrsion to use as the first argument, you may use
+`Base.__next_removal_version`, which is set to the next version number in which
+the list of changes will be cleared.
+
+The primary advantage of this approach is that it allows a new version of the
+package to be tagged and released *in advance* of the break on the nightly
+build, thus ensuring continuity of package operation for nightly users.
+
+!!! warning
+
+    This functionality is intended to help package developers which make use of
+    internal julia functionality. Doing so is explicitly discouraged unless absolutely
+    required and comes with the explicit understanding that the package will break.
+    In particular, this is not a generic feature-testing mechanism, but only a
+    simple, courtesy coordination mechanism for changes that are known (or found) to
+    be breaking a package depending on julia internals.
+"""
+function __has_internal_change(version_or::VersionNumber, change_name::Symbol)
+    VERSION > version_or && return true
+    change_name in __internal_changes_list
+end
+export __has_internal_change
+
 # Deprecated functions and objects
 #
 # Please add new deprecations at the bottom of the file.
@@ -10,9 +119,7 @@
 # and of exporting the function.
 #
 # For more complex cases, move the body of the deprecated method in this file,
-# and call depwarn() directly from inside it. The symbol depwarn() expects is
-# the name of the function, which is used to ensure that the deprecation warning
-# is only printed the first time for each call place.
+# and call depwarn() directly from inside it.
 
 """
     @deprecate old new [export_old=true]
@@ -22,6 +129,8 @@ with the specified signature in the process.
 
 To prevent `old` from being exported, set `export_old` to `false`.
 
+See also [`Base.depwarn()`](@ref).
+
 !!! compat "Julia 1.5"
     As of Julia 1.5, functions defined by `@deprecate` do not print warning when `julia`
     is run without the `--depwarn=yes` flag set, as the default value of `--depwarn` option
@@ -118,6 +227,26 @@ macro deprecate(old, new, export_old=true)
     end
 end
 
+"""
+    Base.depwarn(msg::String, funcsym::Symbol; force=false)
+
+Print `msg` as a deprecation warning. The symbol `funcsym` should be the name
+of the calling function, which is used to ensure that the deprecation warning is
+only printed the first time for each call place. Set `force=true` to force the
+warning to always be shown, even if Julia was started with `--depwarn=no` (the
+default).
+
+See also [`@deprecate`](@ref).
+
+# Examples
+```julia
+function deprecated_func()
+    Base.depwarn("Don't use `deprecated_func()`!", :deprecated_func)
+
+    1 + 1
+end
+```
+"""
 @nospecializeinfer function depwarn(msg, funcsym; force::Bool=false)
     @nospecialize
     # N.B. With this use of `@invokelatest`, we're preventing the addition of backedges from
@@ -397,3 +526,9 @@ end
 @deprecate invpermute!!(a, p::AbstractVector{<:Integer}) invpermute!(a, p) false
 
 # END 1.11 deprecations
+
+# BEGIN 1.12 deprecations
+
+@deprecate stat(fd::Integer) stat(RawFD(fd))
+
+# END 1.12 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index c924e10ab4d6e..4a63ed364b64d 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -868,34 +868,25 @@ struct PersistentDict{K,V} <: AbstractDict{K,V}
     @noinline function KeyValue.set(::Type{PersistentDict{K, V}}, ::Nothing, key, val) where {K, V}
         new{K, V}(HAMT.HAMT{K, V}(key => val))
     end
-    @noinline @Base.assume_effects :effect_free function KeyValue.set(dict::PersistentDict{K, V}, key, val) where {K, V}
+    @noinline Base.@assume_effects :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key, val) where {K, V} = @inline _keyvalueset(dict, key, val)
+    @noinline Base.@assume_effects :nothrow :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key::K, val::V) where {K, V} = @inline _keyvalueset(dict, key, val)
+    global function _keyvalueset(dict::PersistentDict{K, V}, key, val) where {K, V}
         trie = dict.trie
         h = HAMT.HashState(key)
-        found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=# true)
+        found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=#true)
         HAMT.insert!(found, present, trie, i, bi, hs, val)
         return new{K, V}(top)
     end
-    @noinline @Base.assume_effects :nothrow :effect_free function KeyValue.set(dict::PersistentDict{K, V}, key::K, val::V) where {K, V}
+    @noinline Base.@assume_effects :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key) where {K, V} = @inline _keyvalueset(dict, key)
+    @noinline Base.@assume_effects :nothrow :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key::K) where {K, V} = @inline _keyvalueset(dict, key)
+    global function _keyvalueset(dict::PersistentDict{K, V}, key) where {K, V}
         trie = dict.trie
         h = HAMT.HashState(key)
-        found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=# true)
-        HAMT.insert!(found, present, trie, i, bi, hs, val)
-        return new{K, V}(top)
-    end
-    @noinline @Base.assume_effects :effect_free function KeyValue.set(dict::PersistentDict{K, V}, key) where {K, V}
-        trie = dict.trie
-        h = HAMT.HashState(key)
-        found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=# true)
-        if found && present
-            deleteat!(trie.data, i)
-            HAMT.unset!(trie, bi)
-        end
-        return new{K, V}(top)
-    end
-    @noinline @Base.assume_effects :nothrow :effect_free function KeyValue.set(dict::PersistentDict{K, V}, key::K) where {K, V}
-        trie = dict.trie
-        h = HAMT.HashState(key)
-        found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=# true)
+        found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=#true)
         if found && present
             deleteat!(trie.data, i)
             HAMT.unset!(trie, bi)
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index 0c83376ecfc76..1327a1f795d4f 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -19,8 +19,9 @@ module Docs
 Functions, methods and types can be documented by placing a string before the definition:
 
     \"\"\"
-    # The Foo Function
-    `foo(x)`: Foo the living hell out of `x`.
+        foo(x)
+
+    Return a fooified version of `x`.
     \"\"\"
     foo(x) = ...
 
@@ -446,6 +447,52 @@ more than one expression is marked then the same docstring is applied to each ex
     end
 
 `@__doc__` has no effect when a macro that uses it is not documented.
+
+!!! compat "Julia 1.12"
+
+    This section documents a very subtle corner case that is only relevant to
+    macros which themselves both define other macros and then attempt to use them
+    within the same expansion. Such macros were impossible to write prior to
+    Julia 1.12 and are still quite rare. If you are not writing such a macro,
+    you may ignore this note.
+
+    In versions prior to Julia 1.12, macroexpansion would recursively expand through
+    `Expr(:toplevel)` blocks. This behavior was changed in Julia 1.12 to allow
+    macros to recursively define other macros and use them in the same returned
+    expression. However, to preserve backwards compatibility with existing uses of
+    `@__doc__`, the doc system will still expand through `Expr(:toplevel)` blocks
+    when looking for `@__doc__` markers. As a result, macro-defining-macros will
+    have an observable behavior difference when annotated with a docstring:
+
+    ```julia
+    julia> macro macroception()
+        Expr(:toplevel, :(macro foo() 1 end), :(@foo))
+    end
+
+    julia> @macroception
+    1
+
+    julia> "Docstring" @macroception
+    ERROR: LoadError: UndefVarError: `@foo` not defined in `Main`
+    ```
+
+    The supported workaround is to manually expand the `@__doc__` macro in the
+    defining macro, which the docsystem will recognize and suppress the recursive
+    expansion:
+
+    ```julia
+    julia> macro macroception()
+        Expr(:toplevel,
+            macroexpand(__module__, :(@__doc__ macro foo() 1 end); recursive=false),
+            :(@foo))
+    end
+
+    julia> @macroception
+    1
+
+    julia> "Docstring" @macroception
+    1
+    ```
 """
 :(Core.@__doc__)
 
@@ -453,17 +500,23 @@ function __doc__!(source, mod, meta, def, define::Bool)
     @nospecialize source mod meta def
     # Two cases must be handled here to avoid redefining all definitions contained in `def`:
     if define
-        # `def` has not been defined yet (this is the common case, i.e. when not generating
-        # the Base image). We just need to convert each `@__doc__` marker to an `@doc`.
-        finddoc(def) do each
+        function replace_meta_doc(each)
             each.head = :macrocall
             each.args = Any[Symbol("@doc"), source, mod, nothing, meta, each.args[end], define]
         end
+
+        # `def` has not been defined yet (this is the common case, i.e. when not generating
+        # the Base image). We just need to convert each `@__doc__` marker to an `@doc`.
+        found = finddoc(replace_meta_doc, mod, def; expand_toplevel = false)
+
+        if !found
+            found = finddoc(replace_meta_doc, mod, def; expand_toplevel = true)
+        end
     else
         # `def` has already been defined during Base image gen so we just need to find and
         # document any subexpressions marked with `@__doc__`.
         docs  = []
-        found = finddoc(def) do each
+        found = finddoc(mod, def; expand_toplevel = true) do each
             push!(docs, :(@doc($source, $mod, $meta, $(each.args[end]), $define)))
         end
         # If any subexpressions have been documented then replace the entire expression with
@@ -472,25 +525,30 @@ function __doc__!(source, mod, meta, def, define::Bool)
             def.head = :toplevel
             def.args = docs
         end
-        found
     end
+    return found
 end
 # Walk expression tree `def` and call `λ` when any `@__doc__` markers are found. Returns
 # `true` to signify that at least one `@__doc__` has been found, and `false` otherwise.
-function finddoc(λ, def::Expr)
+function finddoc(λ, mod::Module, def::Expr; expand_toplevel::Bool=false)
     if isexpr(def, :block, 2) && isexpr(def.args[1], :meta, 1) && (def.args[1]::Expr).args[1] === :doc
         # Found the macroexpansion of an `@__doc__` expression.
         λ(def)
         true
     else
+        if expand_toplevel && isexpr(def, :toplevel)
+            for i = 1:length(def.args)
+                def.args[i] = macroexpand(mod, def.args[i])
+            end
+        end
         found = false
         for each in def.args
-            found |= finddoc(λ, each)
+            found |= finddoc(λ, mod, each; expand_toplevel)
         end
         found
     end
 end
-finddoc(λ, @nospecialize def) = false
+finddoc(λ, mod::Module, @nospecialize def; expand_toplevel::Bool=false) = false
 
 # Predicates and helpers for `docm` expression selection:
 
@@ -505,14 +563,58 @@ isquotedmacrocall(@nospecialize x) =
 isbasicdoc(@nospecialize x) = isexpr(x, :.) || isa(x, Union{QuoteNode, Symbol})
 is_signature(@nospecialize x) = isexpr(x, :call) || (isexpr(x, :(::), 2) && isexpr(x.args[1], :call)) || isexpr(x, :where)
 
+function _doc(binding::Binding, sig::Type = Union{})
+    if defined(binding)
+        result = getdoc(resolve(binding), sig)
+        result === nothing || return result
+    end
+    # Lookup first match for `binding` and `sig` in all modules of the docsystem.
+    for mod in modules
+        dict = meta(mod; autoinit=false)
+        isnothing(dict) && continue
+        if haskey(dict, binding)
+            multidoc = dict[binding]
+            for msig in multidoc.order
+                sig <: msig && return multidoc.docs[msig]
+            end
+        end
+    end
+    return nothing
+end
+
+# Some additional convenience `doc` methods that take objects rather than `Binding`s.
+_doc(obj::UnionAll) = _doc(Base.unwrap_unionall(obj))
+_doc(object, sig::Type = Union{}) = _doc(aliasof(object, typeof(object)), sig)
+_doc(object, sig...)              = _doc(object, Tuple{sig...})
+
+function simple_lookup_doc(ex)
+    if isa(ex, Expr) && ex.head !== :(.) && Base.isoperator(ex.head)
+        # handle syntactic operators, e.g. +=, ::, .=
+        ex = ex.head
+    end
+    if haskey(keywords, ex)
+        return keywords[ex]
+    elseif !isa(ex, Expr) && !isa(ex, Symbol)
+        return :($(_doc)($(typeof)($(esc(ex)))))
+    end
+    binding = esc(bindingexpr(namify(ex)))
+    if isexpr(ex, :call) || isexpr(ex, :macrocall) || isexpr(ex, :where)
+        sig = esc(signature(ex))
+        :($(_doc)($binding, $sig))
+    else
+        :($(_doc)($binding))
+    end
+end
+
 function docm(source::LineNumberNode, mod::Module, ex)
     @nospecialize ex
     if isexpr(ex, :->) && length(ex.args) > 1
         return docm(source, mod, ex.args...)
-    elseif isassigned(Base.REPL_MODULE_REF)
+    elseif (REPL = Base.REPL_MODULE_REF[]) !== Base
         # TODO: this is a shim to continue to allow `@doc` for looking up docstrings
-        REPL = Base.REPL_MODULE_REF[]
         return invokelatest(REPL.lookup_doc, ex)
+    else
+        return simple_lookup_doc(ex)
     end
     return nothing
 end
@@ -528,8 +630,37 @@ iscallexpr(ex) = false
 function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true)
     @nospecialize meta ex
     # Some documented expressions may be decorated with macro calls which obscure the actual
-    # expression. Expand the macro calls and remove extra blocks.
-    x = unblock(macroexpand(mod, ex))
+    # expression. Expand the macro calls.
+    x = macroexpand(mod, ex)
+    return _docm(source, mod, meta, x, define)
+end
+
+function _docm(source::LineNumberNode, mod::Module, meta, x, define::Bool = true)
+    if isexpr(x, :var"hygienic-scope")
+        x.args[1] = _docm(source, mod, meta, x.args[1])
+        return x
+    elseif isexpr(x, :escape)
+        x.args[1] = _docm(source, mod, meta, x.args[1])
+        return x
+    elseif isexpr(x, :block)
+        docarg = 0
+        for i = 1:length(x.args)
+            isa(x.args[i], LineNumberNode) && continue
+            if docarg == 0
+                docarg = i
+                continue
+            end
+            # More than one documentable expression in the block, treat it as a whole
+            # expression, which will fall through and look for (Expr(:meta, doc))
+            docarg = 0
+            break
+        end
+        if docarg != 0
+            x.args[docarg] = _docm(source, mod, meta, x.args[docarg], define)
+            return x
+        end
+    end
+
     # Don't try to redefine expressions. This is only needed for `Base` img gen since
     # otherwise calling `loaddocs` would redefine all documented functions and types.
     def = define ? x : nothing
@@ -594,7 +725,7 @@ function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true
     # All other expressions are undocumentable and should be handled on a case-by-case basis
     # with `@__doc__`. Unbound string literals are also undocumentable since they cannot be
     # retrieved from the module's metadata `IdDict` without a reference to the string.
-    docerror(ex)
+    docerror(x)
 
     return doc
 end
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index d4785df98ec21..e03d0db78f29f 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -64,9 +64,12 @@ kw"export"
     public
 
 `public` is used within modules to tell Julia which names are part of the
-public API of the module . For example: `public foo` indicates that the name
-`foo` is public, without making it available when [`using`](@ref)
-the module. See the [manual section about modules](@ref modules) for details.
+public API of the module. For example: `public foo` indicates that the name
+`foo` is public, without making it available when [`using`](@ref) the module.
+
+As [`export`](@ref) already indicates that a name is public, it is
+unnecessary and an error to declare a name both as `public` and as `export`ed.
+See the [manual section about modules](@ref modules) for details.
 
 !!! compat "Julia 1.11"
     The public keyword was added in Julia 1.11. Prior to this the notion
@@ -660,8 +663,11 @@ kw"{", kw"{}", kw"}"
 """
     []
 
-Square braces are used for [indexing](@ref man-array-indexing), [indexed assignment](@ref man-indexed-assignment),
-[array literals](@ref man-array-literals), and [array comprehensions](@ref man-comprehensions).
+Square brackets are used for [indexing](@ref man-array-indexing) ([`getindex`](@ref)),
+[indexed assignment](@ref man-indexed-assignment) ([`setindex!`](@ref)),
+[array literals](@ref man-array-literals) ([`Base.vect`](@ref)),
+[array concatenation](@ref man-array-concatenation) ([`vcat`](@ref), [`hcat`](@ref), [`hvcat`](@ref), [`hvncat`](@ref)),
+and [array comprehensions](@ref man-comprehensions) ([`collect`](@ref)).
 """
 kw"[", kw"[]", kw"]"
 
@@ -1046,13 +1052,42 @@ exception object to the given variable within the `catch` block.
 
 The power of the `try`/`catch` construct lies in the ability to unwind a deeply
 nested computation immediately to a much higher level in the stack of calling functions.
+
+A `try/catch` block can also have an `else` clause that executes only if no exception occurred:
+```julia
+try
+    a_dangerous_operation()
+catch
+    @warn "The operation failed."
+else
+    @info "The operation succeeded."
+end
+```
+
+A `try` or `try`/`catch` block can also have a [`finally`](@ref) clause that executes
+at the end, regardless of whether an exception occurred.  For example, this can be
+used to guarantee that an opened file is closed:
+```julia
+f = open("file")
+try
+    operate_on_file(f)
+catch
+    @warn "An error occurred!"
+finally
+    close(f)
+end
+```
+(`finally` can also be used without a `catch` block.)
+
+!!! compat "Julia 1.8"
+    Else clauses require at least Julia 1.8.
 """
 kw"try", kw"catch"
 
 """
     finally
 
-Run some code when a given block of code exits, regardless
+Run some code when a given `try` block of code exits, regardless
 of how it exits. For example, here is how we can guarantee that an opened file is
 closed:
 
@@ -1354,7 +1389,11 @@ Usually `begin` will not be necessary, since keywords such as [`function`](@ref)
 implicitly begin blocks of code. See also [`;`](@ref).
 
 `begin` may also be used when indexing to represent the first index of a
-collection or the first index of a dimension of an array.
+collection or the first index of a dimension of an array. For example,
+`a[begin]` is the first element of an array `a`.
+
+!!! compat "Julia 1.4"
+    Use of `begin` as an index requires Julia 1.4 or later.
 
 # Examples
 ```jldoctest
@@ -1415,8 +1454,20 @@ kw"struct"
     mutable struct
 
 `mutable struct` is similar to [`struct`](@ref), but additionally allows the
-fields of the type to be set after construction. See the manual section on
-[Composite Types](@ref) for more information.
+fields of the type to be set after construction.
+
+Individual fields of a mutable struct can be marked as `const` to make them immutable:
+
+```julia
+mutable struct Baz
+    a::Int
+    const b::Float64
+end
+```
+!!! compat "Julia 1.8"
+    The `const` keyword for fields of mutable structs requires at least Julia 1.8.
+
+See the manual section on [Composite Types](@ref) for more information.
 """
 kw"mutable struct"
 
@@ -1518,6 +1569,8 @@ Nothing
 The singleton instance of type [`Nothing`](@ref), used by convention when there is no value to return
 (as in a C `void` function) or when a variable or field holds no value.
 
+A return value of `nothing` is not displayed by the REPL and similar interactive environments.
+
 See also: [`isnothing`](@ref), [`something`](@ref), [`missing`](@ref).
 """
 nothing
@@ -1620,6 +1673,34 @@ julia> ex.msg
 """
 ErrorException
 
+"""
+    FieldError(type::DataType, field::Symbol)
+
+An operation tried to access invalid `field` of `type`.
+
+!!! compat "Julia 1.12"
+    Prior to Julia 1.12, invalid field access threw an [`ErrorException`](@ref)
+
+See [`getfield`](@ref)
+
+# Examples
+```jldoctest
+julia> struct AB
+          a::Float32
+          b::Float64
+       end
+
+julia> ab = AB(1, 3)
+AB(1.0f0, 3.0)
+
+julia> ab.c # field `c` doesn't exist
+ERROR: FieldError: type AB has no field c
+Stacktrace:
+[...]
+```
+"""
+FieldError
+
 """
     WrappedException(msg)
 
@@ -1757,16 +1838,20 @@ Stacktrace:
 DomainError
 
 """
-    Task(func)
+    Task(func[, reserved_stack::Int])
 
 Create a `Task` (i.e. coroutine) to execute the given function `func` (which
 must be callable with no arguments). The task exits when this function returns.
 The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
+The optional `reserved_stack` argument specifies the size of the stack available
+for this task, in bytes. The default, `0`, uses the system-dependent stack size default.
+
 !!! warning
     By default tasks will have the sticky bit set to true `t.sticky`. This models the
     historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
-    they are first scheduled on. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
     bit manually to `false`.
 
 # Examples
@@ -2490,12 +2575,17 @@ cases.
 See also [`setproperty!`](@ref Base.setproperty!) and [`getglobal`](@ref)
 
 # Examples
-```jldoctest
-julia> module M end;
+```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
+julia> module M; global a; end;
 
 julia> M.a  # same as `getglobal(M, :a)`
 ERROR: UndefVarError: `a` not defined in `M`
-Suggestion: check for spelling errors or missing imports.
+Suggestion: add an appropriate import or assignment. This global was declared but not assigned.
+Stacktrace:
+ [1] getproperty(x::Module, f::Symbol)
+   @ Base ./Base.jl:42
+ [2] top-level scope
+   @ none:1
 
 julia> setglobal!(M, :a, 1)
 1
@@ -2516,18 +2606,6 @@ Retrieve the declared type of the binding `name` from the module `module`.
 """
 Core.get_binding_type
 
-"""
-    Core.set_binding_type!(module::Module, name::Symbol, [type::Type])
-
-Set the declared type of the binding `name` in the module `module` to `type`. Error if the
-binding already has a type that is not equivalent to `type`. If the `type` argument is
-absent, set the binding type to `Any` if unset, but do not error.
-
-!!! compat "Julia 1.9"
-    This function requires Julia 1.9 or later.
-"""
-Core.set_binding_type!
-
 """
     swapglobal!(module::Module, name::Symbol, x, [order::Symbol=:monotonic])
 
@@ -2663,23 +2741,23 @@ julia> Memory{Float64}(undef, 3)
 Memory{T}(::UndefInitializer, n)
 
 """
-    MemoryRef(memory)
+    memoryref(::GenericMemory)
 
-Construct a MemoryRef from a memory object. This does not fail, but the
-resulting memory may point out-of-bounds if the memory is empty.
+Construct a `GenericMemoryRef` from a memory object. This does not fail, but the
+resulting memory will point out-of-bounds if and only if the memory is empty.
 """
-MemoryRef(::Memory)
+memoryref(::GenericMemory)
 
 """
-    MemoryRef(::Memory, index::Integer)
-    MemoryRef(::MemoryRef, index::Integer)
+    memoryref(::GenericMemory, index::Integer)
+    memoryref(::GenericMemoryRef, index::Integer)
 
-Construct a MemoryRef from a memory object and an offset index (1-based) which
+Construct a `GenericMemoryRef` from a memory object and an offset index (1-based) which
 can also be negative. This always returns an inbounds object, and will throw an
 error if that is not possible (because the index would result in a shift
 out-of-bounds of the underlying memory).
 """
-MemoryRef(::Union{Memory,MemoryRef}, ::Integer)
+memoryref(::Union{GenericMemory,GenericMemoryRef}, ::Integer)
 
 """
     Vector{T}(undef, n)
@@ -3110,14 +3188,27 @@ Any
 """
     Union{}
 
-`Union{}`, the empty [`Union`](@ref) of types, is the type that has no values. That is, it has the defining
-property `isa(x, Union{}) == false` for any `x`. `Base.Bottom` is defined as its alias and the type of `Union{}`
-is `Core.TypeofBottom`.
+`Union{}`, the empty [`Union`](@ref) of types, is the *bottom* type of the type system. That is, for each
+`T::Type`, `Union{} <: T`. Also see the subtyping operator's documentation: [`<:`](@ref).
+
+As such, `Union{}` is also an *empty*/*uninhabited* type, meaning that it has no values. That is, for each `x`,
+`isa(x, Union{}) == false`.
+
+`Base.Bottom` is defined as its alias and the type of `Union{}` is `Core.TypeofBottom`.
 
 # Examples
 ```jldoctest
 julia> isa(nothing, Union{})
 false
+
+julia> Union{} <: Int
+true
+
+julia> typeof(Union{}) === Core.TypeofBottom
+true
+
+julia> isa(Union{}, Union)
+false
 ```
 """
 kw"Union{}", Base.Bottom
@@ -3711,6 +3802,20 @@ The current differences are:
 """
 Core.finalizer
 
+"""
+    ConcurrencyViolationError(msg) <: Exception
+
+An error thrown when a detectable violation of concurrent semantics has occurred.
+
+A non-exhaustive list of examples of when this is used include:
+
+ * Throwing when a deadlock has been detected (e.g. `wait(current_task())`)
+ * Known-unsafe behavior is attempted (e.g. `yield(current_task)`)
+ * A known non-threadsafe datastructure is attempted to be modified from multiple concurrent tasks
+ * A lock is being unlocked that wasn't locked by this task
+"""
+ConcurrencyViolationError
+
 Base.include(BaseDocs, "intrinsicsdocs.jl")
 
 end
diff --git a/base/docs/intrinsicsdocs.jl b/base/docs/intrinsicsdocs.jl
index ca06ad678bdbf..c9538ea74ab26 100644
--- a/base/docs/intrinsicsdocs.jl
+++ b/base/docs/intrinsicsdocs.jl
@@ -23,20 +23,20 @@ The `Core.Intrinsics` module holds the `Core.IntrinsicFunction` objects.
 Core.Intrinsics
 
 """
-    Core.memoryref(::GenericMemory)
-    Core.memoryref(::GenericMemoryRef, index::Int, [boundscheck::Bool])
+    Core.memoryrefnew(::GenericMemory)
+    Core.memoryrefnew(::GenericMemoryRef, index::Int, [boundscheck::Bool])
 
-Return a `GenericMemoryRef` for a `GenericMemory`. See [`MemoryRef`](@ref).
+Return a `GenericMemoryRef` for a `GenericMemory`. See [`memoryref`](@ref).
 
 !!! compat "Julia 1.11"
     This function requires Julia 1.11 or later.
 """
-Core.memoryref
+Core.memoryrefnew
 
 """
     Core..memoryrefoffset(::GenericMemoryRef)
 
-Return the offset index that was used to construct the `MemoryRef`. See [`Core.memoryref`](@ref).
+Return the offset index that was used to construct the `MemoryRef`. See [`memoryref`](@ref).
 
 !!! compat "Julia 1.11"
     This function requires Julia 1.11 or later.
diff --git a/base/error.jl b/base/error.jl
index 37ceb39253e38..ee533cee0b57d 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -27,6 +27,15 @@ throw
 
 ## native julia error handling ##
 
+# This is `Experimental.@max_methods 2 function error end`, which is not available at this point in bootstrap.
+# NOTE It is important to always be able to infer the return type of `error` as `Union{}`,
+# but there's a hitch when a package globally sets `@max_methods 1` and it causes inference
+# for `error(::Any)` to fail (JuliaLang/julia#54029).
+# This definition site `@max_methods 2` setting overrides any global `@max_methods 1` settings
+# on package side, guaranteeing that return type inference on `error` is successful always.
+function error end
+typeof(error).name.max_methods = UInt8(2)
+
 """
     error(message::AbstractString)
 
@@ -62,7 +71,7 @@ rethrow() = ccall(:jl_rethrow, Bottom, ())
 rethrow(@nospecialize(e)) = ccall(:jl_rethrow_other, Bottom, (Any,), e)
 
 struct InterpreterIP
-    code::Union{CodeInfo,Core.MethodInstance,Nothing}
+    code::Union{CodeInfo,Core.MethodInstance,Core.CodeInstance,Nothing}
     stmt::Csize_t
     mod::Union{Module,Nothing}
 end
@@ -87,7 +96,7 @@ function _reformat_bt(bt::Array{Ptr{Cvoid},1}, bt2::Array{Any,1})
         tag       = (entry_metadata >> 6) & 0xf
         header    =  entry_metadata >> 10
         if tag == 1 # JL_BT_INTERP_FRAME_TAG
-            code = bt2[j]::Union{CodeInfo,Core.MethodInstance,Nothing}
+            code = bt2[j]::Union{CodeInfo,Core.MethodInstance,Core.CodeInstance,Nothing}
             mod = njlvalues == 2 ? bt2[j+1]::Union{Module,Nothing} : nothing
             push!(ret, InterpreterIP(code, header, mod))
         else
@@ -162,7 +171,7 @@ end
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
     @noinline
-    throw(MethodError(Core.kwcall, (kw, args...)))
+    throw(MethodError(Core.kwcall, (kw, args...), tls_world_age()))
 end
 
 ## system error handling ##
@@ -223,20 +232,20 @@ macro assert(ex, msgs...)
         msg = msg # pass-through
     elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol))
         # message is an expression needing evaluating
-        msg = :(Main.Base.string($(esc(msg))))
+        msg = :(Main.Base.invokelatest(Main.Base.string, $(esc(msg))))
     elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg)
         msg = Main.Base.string(msg)
     else
         # string() might not be defined during bootstrap
-        msg = quote
-            msg = $(Expr(:quote,msg))
-            isdefined(Main, :Base) ? Main.Base.string(msg) :
-                (Core.println(msg); "Error during bootstrap. See stdout.")
-        end
+        msg = :(Main.Base.invokelatest(_assert_tostring, $(Expr(:quote,msg))))
     end
     return :($(esc(ex)) ? $(nothing) : throw(AssertionError($msg)))
 end
 
+# this may be overridden in contexts where `string(::Expr)` doesn't work
+_assert_tostring(msg) = isdefined(Main, :Base) ? Main.Base.string(msg) :
+    (Core.println(msg); "Error during bootstrap. See stdout.")
+
 struct ExponentialBackOff
     n::Int
     first_delay::Float64
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 1e6a6faaae5df..d805cb64fb81e 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -43,6 +43,15 @@ function showerror(io::IO, ex::Meta.ParseError)
     end
 end
 
+function showerror(io::IO, ex::Core.TypeNameError)
+    print(io, "TypeNameError: ")
+    if isa(ex.a, Union)
+        print(io, "typename does not apply to unions whose components have different typenames")
+    else
+        print(io, "typename does not apply to this type")
+    end
+end
+
 function showerror(io::IO, ex::BoundsError)
     print(io, "BoundsError")
     if isdefined(ex, :a)
@@ -196,7 +205,7 @@ function showerror(io::IO, ex::CanonicalIndexError)
     print(io, "CanonicalIndexError: ", ex.func, " not defined for ", ex.type)
 end
 
-typesof(@nospecialize args...) = Tuple{Any[ Core.Typeof(args[i]) for i in 1:length(args) ]...}
+typesof(@nospecialize args...) = Tuple{Any[Core.Typeof(arg) for arg in args]...}
 
 function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b::DataType), color::Symbol)
     if a.name === b.name
@@ -273,7 +282,7 @@ function showerror(io::IO, ex::MethodError)
         arg_types_param = arg_types_param[3:end]
         san_arg_types_param = san_arg_types_param[3:end]
         keys = kwt.parameters[1]::Tuple
-        kwargs = Any[(keys[i], fieldtype(kwt, i)) for i in 1:length(keys)]
+        kwargs = Any[(keys[i], fieldtype(kwt, i)) for i in eachindex(keys)]
         arg_types = rewrap_unionall(Tuple{arg_types_param...}, arg_types)
     end
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
@@ -327,10 +336,11 @@ function showerror(io::IO, ex::MethodError)
         end
     end
     if ex.world == typemax(UInt) || hasmethod(f, arg_types, world=ex.world)
+        if !isempty(kwargs)
+            print(io, "\nThis method does not support all of the given keyword arguments (and may not support any).")
+        end
         if ex.world == typemax(UInt) || isempty(kwargs)
             print(io, "\nThis error has been manually thrown, explicitly, so the method may exist but be intentionally marked as unimplemented.")
-        else
-            print(io, "\nThis method may not support any keyword arguments.")
         end
     elseif hasmethod(f, arg_types) && !hasmethod(f, arg_types, world=ex.world)
         curworld = get_world_counter()
@@ -366,6 +376,12 @@ function showerror(io::IO, ex::MethodError)
     nothing
 end
 
+function showerror(io::IO, exc::FieldError)
+    @nospecialize
+    print(io, "FieldError: type $(exc.type |> nameof) has no field $(exc.field)")
+    Base.Experimental.show_error_hints(io, exc)
+end
+
 striptype(::Type{T}) where {T} = T
 striptype(::Any) = nothing
 
@@ -399,7 +415,7 @@ end
 
 #Show an error by directly calling jl_printf.
 #Useful in Base submodule __init__ functions where stderr isn't defined yet.
-function showerror_nostdio(err, msg::AbstractString)
+function showerror_nostdio(@nospecialize(err), msg::AbstractString)
     stderr_stream = ccall(:jl_stderr_stream, Ptr{Cvoid}, ())
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, msg)
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, ":\n")
@@ -687,7 +703,7 @@ function show_reduced_backtrace(io::IO, t::Vector)
 
     push!(repeated_cycle, (0,0,0)) # repeated_cycle is never empty
     frame_counter = 1
-    for i in 1:length(displayed_stackframes)
+    for i in eachindex(displayed_stackframes)
         (frame, n) = displayed_stackframes[i]
 
         print_stackframe(io, frame_counter, frame, n, ndigits_max, STACKTRACE_FIXEDCOLORS, STACKTRACE_MODULECOLORS)
@@ -761,7 +777,7 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulec
 
     StackTraces.show_spec_linfo(IOContext(io, :backtrace=>true), frame)
     if n > 1
-        printstyled(io, " (repeats $n times)"; color=:light_black)
+        printstyled(io, " (repeats $n times)"; color=Base.warn_color(), bold=true)
     end
     println(io)
 
@@ -799,7 +815,7 @@ function show_backtrace(io::IO, t::Vector)
     end
 
     # t is a pre-processed backtrace (ref #12856)
-    if t isa Vector{Any}
+    if t isa Vector{Any} && (length(t) == 0 || t[1] isa Tuple{StackFrame,Int})
         filtered = t
     else
         filtered = process_backtrace(t)
@@ -864,7 +880,7 @@ end
 function _collapse_repeated_frames(trace)
     kept_frames = trues(length(trace))
     last_frame = nothing
-    for i in 1:length(trace)
+    for i in eachindex(trace)
         frame::StackFrame, _ = trace[i]
         if last_frame !== nothing && frame.file == last_frame.file && frame.line == last_frame.line
             #=
@@ -909,7 +925,7 @@ function _collapse_repeated_frames(trace)
                 end
                 if length(last_params) > length(params)
                     issame = true
-                    for i = 1:length(params)
+                    for i = eachindex(params)
                         issame &= params[i] == last_params[i]
                     end
                     if issame
@@ -1051,7 +1067,7 @@ Experimental.register_error_hint(nonsetable_type_hint_handler, MethodError)
 # (probably attempting concatenation)
 function string_concatenation_hint_handler(io, ex, arg_types, kwargs)
     @nospecialize
-    if (ex.f === +) && all(i -> i <: AbstractString, arg_types)
+    if (ex.f === +) && !isempty(arg_types) && all(i -> i <: AbstractString, arg_types)
         print(io, "\nString concatenation is performed with ")
         printstyled(io, "*", color=:cyan)
         print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).")
@@ -1085,6 +1101,20 @@ end
 
 Experimental.register_error_hint(methods_on_iterable, MethodError)
 
+# Display a hint in case the user tries to access non-member fields of container type datastructures
+function fielderror_hint_handler(io, exc)
+    @nospecialize
+    field = exc.field
+    type = exc.type
+    if type <: AbstractDict
+        print(io, "\nDid you mean to access dict values using key: `:$field` ? Consider using indexing syntax ")
+        printstyled(io, "dict[:$(field)]", color=:cyan)
+        println(io)
+    end
+end
+
+Experimental.register_error_hint(fielderror_hint_handler, FieldError)
+
 # ExceptionStack implementation
 size(s::ExceptionStack) = size(s.stack)
 getindex(s::ExceptionStack, i::Int) = s.stack[i]
diff --git a/base/essentials.jl b/base/essentials.jl
index fdc66e94d1a00..32c44a9571f23 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, memoryref, memoryrefget, memoryrefset!
+using Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, memoryrefnew, memoryrefget, memoryrefset!
 
 const Callable = Union{Function,Type}
 
@@ -180,22 +180,11 @@ macro isdefined(s::Symbol)
     return Expr(:escape, Expr(:isdefined, s))
 end
 
-"""
-    nameof(m::Module) -> Symbol
-
-Get the name of a `Module` as a [`Symbol`](@ref).
-
-# Examples
-```jldoctest
-julia> nameof(Base.Broadcast)
-:Broadcast
-```
-"""
-nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
+_nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
 
 function _is_internal(__module__)
     if ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module === Core.Compiler ||
-       nameof(__module__) === :Base
+       _nameof(__module__) === :Base
         return true
     end
     return false
@@ -212,7 +201,9 @@ macro _total_meta()
         #=:notaskstate=#true,
         #=:inaccessiblememonly=#true,
         #=:noub=#true,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 # can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
 macro _foldable_meta()
@@ -225,7 +216,9 @@ macro _foldable_meta()
         #=:notaskstate=#true,
         #=:inaccessiblememonly=#true,
         #=:noub=#true,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 # can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
 macro _terminates_locally_meta()
@@ -238,7 +231,9 @@ macro _terminates_locally_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :terminates_globally` (supposed to be used for bootstrapping)
 macro _terminates_globally_meta()
@@ -251,7 +246,9 @@ macro _terminates_globally_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :terminates_globally :notaskstate` (supposed to be used for bootstrapping)
 macro _terminates_globally_notaskstate_meta()
@@ -264,7 +261,9 @@ macro _terminates_globally_notaskstate_meta()
         #=:notaskstate=#true,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :terminates_globally :noub` (supposed to be used for bootstrapping)
 macro _terminates_globally_noub_meta()
@@ -277,7 +276,9 @@ macro _terminates_globally_noub_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#true,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
 macro _effect_free_terminates_locally_meta()
@@ -290,7 +291,9 @@ macro _effect_free_terminates_locally_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow :noub` (supposed to be used for bootstrapping)
 macro _nothrow_noub_meta()
@@ -303,7 +306,9 @@ macro _nothrow_noub_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#true,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
 macro _nothrow_meta()
@@ -316,7 +321,9 @@ macro _nothrow_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
 macro _noub_meta()
@@ -329,7 +336,8 @@ macro _noub_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#true,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false))
 end
 # can be used in place of `@assume_effects :notaskstate` (supposed to be used for bootstrapping)
 macro _notaskstate_meta()
@@ -342,7 +350,9 @@ macro _notaskstate_meta()
         #=:notaskstate=#true,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#false))
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :noub_if_noinbounds` (supposed to be used for bootstrapping)
 macro _noub_if_noinbounds_meta()
@@ -355,7 +365,9 @@ macro _noub_if_noinbounds_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
-        #=:noub_if_noinbounds=#true))
+        #=:noub_if_noinbounds=#true,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 
 # another version of inlining that propagates an inbounds context
@@ -372,10 +384,23 @@ default_access_order(a::GenericMemoryRef{:not_atomic}) = :not_atomic
 default_access_order(a::GenericMemoryRef{:atomic}) = :monotonic
 
 getindex(A::GenericMemory, i::Int) = (@_noub_if_noinbounds_meta;
-    memoryrefget(memoryref(memoryref(A), i, @_boundscheck), default_access_order(A), false))
+    memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false))
 getindex(A::GenericMemoryRef) = memoryrefget(A, default_access_order(A), @_boundscheck)
 
-function iterate end
+"""
+    nameof(m::Module) -> Symbol
+
+Get the name of a `Module` as a [`Symbol`](@ref).
+
+# Examples
+```jldoctest
+julia> nameof(Base.Broadcast)
+:Broadcast
+```
+"""
+nameof(m::Module) = (@_total_meta; ccall(:jl_module_name, Ref{Symbol}, (Any,), m))
+
+typeof(function iterate end).name.constprop_heuristic = Core.ITERATE_HEURISTIC
 
 """
     convert(T, x)
@@ -397,8 +422,9 @@ Stacktrace:
 [...]
 ```
 
-If `T` is a [`AbstractFloat`](@ref) type,
-then it will return the closest value to `x` representable by `T`.
+If `T` is a [`AbstractFloat`](@ref) type, then it will return the
+closest value to `x` representable by `T`. Inf is treated as one
+ulp greater than `floatmax(T)` for purposes of determining nearest.
 
 ```jldoctest
 julia> x = 1/3
@@ -560,15 +586,7 @@ function unconstrain_vararg_length(va::Core.TypeofVararg)
     return Vararg{unwrapva(va)}
 end
 
-typename(a) = error("typename does not apply to this type")
-typename(a::DataType) = a.name
-function typename(a::Union)
-    ta = typename(a.a)
-    tb = typename(a.b)
-    ta === tb || error("typename does not apply to unions whose components have different typenames")
-    return tb
-end
-typename(union::UnionAll) = typename(union.body)
+import Core: typename
 
 _tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
@@ -849,11 +867,11 @@ end
 
     Using `@inbounds` may return incorrect results/crashes/corruption
     for out-of-bounds indices. The user is responsible for checking it manually.
-    Only use `@inbounds` when it is certain from the information locally available
-    that all accesses are in bounds. In particular, using `1:length(A)` instead of
-    `eachindex(A)` in a function like the one above is _not_ safely inbounds because
-    the first index of `A` may not be `1` for all user defined types that subtype
-    `AbstractArray`.
+    Only use `@inbounds` when you are certain that all accesses are in bounds (as
+    undefined behavior, e.g. crashes, might occur if this assertion is violated). For
+    example, using `1:length(A)` instead of `eachindex(A)` in a function like
+    the one above is _not_ safely inbounds because the first index of `A` may not
+    be `1` for all user defined types that subtype `AbstractArray`.
 """
 macro inbounds(blk)
     return Expr(:block,
@@ -889,16 +907,16 @@ end
 function getindex(A::Array, i::Int)
     @_noub_if_noinbounds_meta
     @boundscheck ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A))) || throw_boundserror(A, (i,))
-    memoryrefget(memoryref(getfield(A, :ref), i, false), :not_atomic, false)
+    memoryrefget(memoryrefnew(getfield(A, :ref), i, false), :not_atomic, false)
 end
 # simple Array{Any} operations needed for bootstrap
 function setindex!(A::Array{Any}, @nospecialize(x), i::Int)
     @_noub_if_noinbounds_meta
     @boundscheck ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A))) || throw_boundserror(A, (i,))
-    memoryrefset!(memoryref(getfield(A, :ref), i, false), x, :not_atomic, false)
+    memoryrefset!(memoryrefnew(getfield(A, :ref), i, false), x, :not_atomic, false)
     return A
 end
-setindex!(A::Memory{Any}, @nospecialize(x), i::Int) = (memoryrefset!(memoryref(memoryref(A), i, @_boundscheck), x, :not_atomic, @_boundscheck); A)
+setindex!(A::Memory{Any}, @nospecialize(x), i::Int) = (memoryrefset!(memoryrefnew(memoryrefnew(A), i, @_boundscheck), x, :not_atomic, @_boundscheck); A)
 setindex!(A::MemoryRef{T}, x) where {T} = (memoryrefset!(A, convert(T, x), :not_atomic, @_boundscheck); A)
 setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomic, @_boundscheck); A)
 
@@ -1066,6 +1084,12 @@ function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; k
     return Core._call_in_world(world, Core.kwcall, kwargs, f, args...)
 end
 
+"""
+    inferencebarrier(x)
+
+A shorthand for `compilerbarrier(:type, x)` causes the type of this statement to be inferred as `Any`.
+See [`Base.compilerbarrier`](@ref) for more info.
+"""
 inferencebarrier(@nospecialize(x)) = compilerbarrier(:type, x)
 
 """
@@ -1224,3 +1248,16 @@ that is whether it has an `iterate` method or not.
 function isiterable(T)::Bool
     return hasmethod(iterate, Tuple{T})
 end
+
+# Special constprop heuristics for various binary opes
+typename(typeof(function + end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function - end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function * end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function == end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function != end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function <= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function < end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function > end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function << end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >> end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
diff --git a/base/experimental.jl b/base/experimental.jl
index 8dbdcacd65376..58c7258120f3f 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -318,7 +318,7 @@ function show_error_hints(io, ex, args...)
     isnothing(hinters) && return
     for handler in hinters
         try
-            Base.invokelatest(handler, io, ex, args...)
+            @invokelatest handler(io, ex, args...)
         catch err
             tn = typeof(handler).name
             @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error"
@@ -330,17 +330,99 @@ end
 include("opaque_closure.jl")
 
 """
-    Experimental.@overlay mt [function def]
+    Base.Experimental.@overlay mt def
 
 Define a method and add it to the method table `mt` instead of to the global method table.
 This can be used to implement a method override mechanism. Regular compilation will not
 consider these methods, and you should customize the compilation flow to look in these
 method tables (e.g., using [`Core.Compiler.OverlayMethodTable`](@ref)).
+
+!!! note
+    Please be aware that when defining overlay methods using `@overlay`, it is not necessary
+    to have an original method that corresponds exactly in terms of how the method dispatches.
+    This means that the method overlay mechanism enabled by `@overlay` is not implemented by
+    replacing the methods themselves, but through an additional and prioritized method
+    lookup during the method dispatch.
+
+    Considering this, it is important to understand that in compilations using an overlay
+    method table like the following, the method dispatched by `callx(x)` is not the regular
+    method `callx(::Float64)`, but the overlay method `callx(x::Real)`:
+    ```julia
+    callx(::Real) = :real
+    @overlay SOME_OVERLAY_MT callx(::Real) = :overlay_real
+    callx(::Float64) = :float64
+
+    # some overlay callsite
+    let x::Float64
+        callx(x) #> :overlay_real
+    end
+    ```
 """
 macro overlay(mt, def)
-    def = macroexpand(__module__, def) # to expand @inline, @generated, etc
-    is_function_def(def) || error("@overlay requires a function definition")
-    return esc(overlay_def!(mt, def))
+    inner = Base.unwrap_macrocalls(def)
+    is_function_def(inner) || error("@overlay requires a function definition")
+    overlay_def!(mt, inner)
+    return esc(def)
+end
+
+"""
+    Base.Experimental.@consistent_overlay mt def
+
+This macro operates almost identically to [`Base.Experimental.@overlay`](@ref), defining a
+new overlay method. The key difference with this macro is that it informs the compiler that
+the invocation of the overlay method it defines is `:consistent` with a regular,
+non-overlayed method call.
+
+More formally, when evaluating a generic function call ``f(x)`` at a specific world age
+``i``, if a regular method call ``fᵢ(x)`` is redirected to an overlay method call ``fᵢ′(x)``
+defined by this macro, ``fᵢ(x)`` and ``fᵢ′(x)`` are considered `:consistent` if the following
+conditions are met:
+- If ``fᵢ(x)`` returns a value ``y``, then ``fᵢ′(x)`` also returns some value ``yᵢ``, and ``y ≡ yᵢ`` holds.
+- If ``fᵢ(x)`` throws an exception, then ``fᵢ′(x)`` also throws some exception.
+
+For a detailed definition of `:consistent`-cy, consult the corresponding section in
+[`Base.@assume_effects`](@ref).
+
+!!! note
+    Note that the requirements for `:consistent`-cy include not only that the return values
+    are egal, but also that the manner of termination is the same. However, it's important
+    to aware that when they throw exceptions, the exceptions themselves don't necessarily
+    have to be egal. In other words, if ``fᵢ(x)`` throws an exception, ``fᵢ′(x)`` is
+    required to also throw one, but the exact exceptions may differ.
+
+!!! note
+    Please note that the `:consistent`-cy requirement applies not to method itself but to
+    _method invocation_. This means that for the use of `@consistent_overlay`, it is
+    necessary for method invocations with the native regular compilation and those with
+    a compilation with overlay method table to be `:consistent`.
+
+    For example, it is important to understand that, `@consistent_overlay` can be used like
+    the following:
+    ```julia
+    callsin(x::Real) = x < 0 ? error(x) : sin(x)
+    @consistent_overlay SOME_OVERLAY_MT callsin(x::Float64) =
+        x < 0 ? error_somehow(x) : sin(x)
+    ```
+    However, be aware that this `@consistent_overlay` will immediately become invalid if a
+    new method for `callsin` is defined subsequently, such as:
+    ```julia
+    callsin(x::Float64) = cos(x)
+    ```
+
+    This specifically implies that the use of `@consistent_overlay` should be restricted as
+    much as possible to cases where a regular method with a concrete signature is replaced
+    by an overlay method with the same concrete signature.
+
+    This constraint is closely related to the note in [`Base.Experimental.@overlay`](@ref);
+    you are advised to consult that as well.
+"""
+macro consistent_overlay(mt, def)
+    inner = Base.unwrap_macrocalls(def)
+    is_function_def(inner) || error("@consistent_overlay requires a function definition")
+    overlay_def!(mt, inner)
+    override = Core.Compiler.EffectsOverride(; consistent_overlay=true)
+    Base.pushmeta!(def::Expr, Base.form_purity_expr(override))
+    return esc(def)
 end
 
 function overlay_def!(mt, @nospecialize ex)
@@ -367,11 +449,11 @@ let new_mt(name::Symbol, mod::Module) = begin
 end
 
 """
-    Experimental.@MethodTable(name)
+    Base.Experimental.@MethodTable name
 
 Create a new MethodTable in the current module, bound to `name`. This method table can be
-used with the [`Experimental.@overlay`](@ref) macro to define methods for a function without
-adding them to the global method table.
+used with the [`Base.Experimental.@overlay`](@ref) macro to define methods for a function
+without adding them to the global method table.
 """
 :@MethodTable
 
diff --git a/base/exports.jl b/base/exports.jl
index cb4acce27c66d..daba9a010a9e6 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -58,6 +58,7 @@ export
     IOBuffer,
     IOStream,
     LinRange,
+    Lockable,
     Irrational,
     LazyString,
     Matrix,
@@ -406,6 +407,7 @@ export
     indexin,
     argmax,
     argmin,
+    insertdims,
     invperm,
     invpermute!,
     isassigned,
@@ -460,7 +462,6 @@ export
     vcat,
     vec,
     view,
-    wrap,
     zeros,
 
 # search, find, match and related functions
@@ -550,6 +551,7 @@ export
     mapfoldl,
     mapfoldr,
     mapreduce,
+    memoryref,
     merge!,
     mergewith!,
     merge,
@@ -594,6 +596,7 @@ export
     codepoint,
     codeunit,
     codeunits,
+    ctruncate,
     digits,
     digits!,
     eachsplit,
@@ -618,6 +621,7 @@ export
     join,
     lpad,
     lstrip,
+    ltruncate,
     ncodeunits,
     ndigits,
     nextind,
@@ -630,6 +634,7 @@ export
     rpad,
     rsplit,
     rstrip,
+    rtruncate,
     split,
     string,
     strip,
@@ -653,11 +658,6 @@ export
     sprint,
     summary,
 
-# ScopedValue
-    with,
-    @with,
-    ScopedValue,
-
 # logging
     @debug,
     @info,
@@ -711,6 +711,8 @@ export
     yield,
     yieldto,
     wait,
+    waitany,
+    waitall,
     timedwait,
     asyncmap,
     asyncmap!,
@@ -936,6 +938,7 @@ export
     isblockdev,
     ischardev,
     isdir,
+    isexecutable,
     isfifo,
     isfile,
     islink,
diff --git a/base/expr.jl b/base/expr.jl
index 94abe5b2eab67..c4f64b89de8b6 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -33,9 +33,6 @@ macro gensym(names...)
     return blk
 end
 
-## line numbers ##
-convert(::Type{LineNumberNode}, lin::Core.LineInfoNode) = LineNumberNode(Int(lin.line), lin.file)
-
 ## expressions ##
 
 isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
@@ -68,7 +65,7 @@ function copy_exprs(@nospecialize(x))
     end
     return x
 end
-copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in 1:length(x)]
+copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in eachindex(x)]
 
 @eval exprarray(head::Symbol, arg::Array{Any,1}) = $(Expr(:new, :Expr, :head, :arg))
 
@@ -81,8 +78,6 @@ function copy(c::CodeInfo)
     if cnew.slottypes !== nothing
         cnew.slottypes = copy(cnew.slottypes::Vector{Any})
     end
-    cnew.codelocs  = copy(cnew.codelocs)
-    cnew.linetable = copy(cnew.linetable::Union{Vector{Any},Vector{Core.LineInfoNode}})
     cnew.ssaflags  = copy(cnew.ssaflags)
     cnew.edges     = cnew.edges === nothing ? nothing : copy(cnew.edges::Vector)
     ssavaluetypes  = cnew.ssavaluetypes
@@ -484,7 +479,7 @@ CodeInfo(
 !!! compat "Julia 1.10"
     The usage within a function body requires at least Julia 1.10.
 
-!!! compact "Julia 1.11"
+!!! compat "Julia 1.11"
     The code block annotation requires at least Julia 1.11.
 
 !!! warning
@@ -510,6 +505,7 @@ The following `setting`s are supported.
 - `:inaccessiblememonly`
 - `:noub`
 - `:noub_if_noinbounds`
+- `:nortcall`
 - `:foldable`
 - `:removable`
 - `:total`
@@ -530,7 +526,7 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
 
 !!! note
     The `:consistent`-cy assertion is made world-age wise. More formally, write
-    ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then we require:
+    ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then this setting requires:
     ```math
     ∀ i, x, y: x ≡ y → fᵢ(x) ≡ fᵢ(y)
     ```
@@ -678,6 +674,20 @@ The `:noub` setting asserts that the method will not execute any undefined behav
 any other effect assertions (such as `:consistent` or `:effect_free`) as well, but we do
 not model this, and they assume the absence of undefined behavior.
 
+---
+## `:nortcall`
+
+The `:nortcall` setting asserts that the method does not call `Core.Compiler.return_type`,
+and that any other methods this method might call also do not call `Core.Compiler.return_type`.
+
+!!! note
+    To be precise, this assertion can be used when a call to `Core.Compiler.return_type` is
+    not made at runtime; that is, when the result of `Core.Compiler.return_type` is known
+    exactly at compile time and the call is eliminated by the optimizer. However, since
+    whether the result of `Core.Compiler.return_type` is folded at compile time depends
+    heavily on the compiler's implementation, it is generally risky to assert this if
+    the method in question uses `Core.Compiler.return_type` in any form.
+
 ---
 ## `:foldable`
 
@@ -688,6 +698,7 @@ currently equivalent to the following `setting`s:
 - `:effect_free`
 - `:terminates_globally`
 - `:noub`
+- `:nortcall`
 
 !!! note
     This list in particular does not include `:nothrow`. The compiler will still
@@ -721,6 +732,7 @@ the following other `setting`s:
 - `:notaskstate`
 - `:inaccessiblememonly`
 - `:noub`
+- `:nortcall`
 
 !!! warning
     `:total` is a very strong assertion and will likely gain additional semantics
@@ -742,7 +754,7 @@ macro assume_effects(args...)
     lastex = args[end]
     override = compute_assumed_settings(args[begin:end-1])
     if is_function_def(unwrap_macrocalls(lastex))
-        return esc(pushmeta!(lastex, form_purity_expr(override)))
+        return esc(pushmeta!(lastex::Expr, form_purity_expr(override)))
     elseif isexpr(lastex, :macrocall) && lastex.args[1] === Symbol("@ccall")
         lastex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
         insert!(lastex.args, 3, Core.Compiler.encode_effects_override(override))
@@ -767,7 +779,7 @@ function compute_assumed_settings(settings)
     for setting in settings
         override = compute_assumed_setting(override, setting)
         override === nothing &&
-            throw(ArgumentError("@assume_effects $setting not supported"))
+            throw(ArgumentError("`@assume_effects $setting` not supported"))
     end
     return override
 end
@@ -799,26 +811,27 @@ function compute_assumed_setting(override::EffectsOverride, @nospecialize(settin
     elseif setting === :noub_if_noinbounds
         return EffectsOverride(override; noub_if_noinbounds = val)
     elseif setting === :foldable
-        consistent = effect_free = terminates_globally = noub = val
-        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub)
+        consistent = effect_free = terminates_globally = noub = nortcall = val
+        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub, nortcall)
     elseif setting === :removable
         effect_free = nothrow = terminates_globally = val
         return EffectsOverride(override; effect_free, nothrow, terminates_globally)
     elseif setting === :total
         consistent = effect_free = nothrow = terminates_globally = notaskstate =
-            inaccessiblememonly = noub = val
+            inaccessiblememonly = noub = nortcall = val
         return EffectsOverride(override;
             consistent, effect_free, nothrow, terminates_globally, notaskstate,
-            inaccessiblememonly, noub)
+            inaccessiblememonly, noub, nortcall)
     end
     return nothing
 end
 
 function form_purity_expr(override::EffectsOverride)
-    return Expr(:purity,
-        override.consistent, override.effect_free, override.nothrow,
-        override.terminates_globally, override.terminates_locally, override.notaskstate,
-        override.inaccessiblememonly, override.noub, override.noub_if_noinbounds)
+    ex = Expr(:purity)
+    for i = 1:Core.Compiler.NUM_EFFECTS_OVERRIDES
+        push!(ex.args, getfield(override, i))
+    end
+    return ex
 end
 
 """
@@ -854,6 +867,9 @@ while it can not infer the concrete return type of it.
 Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`,
 indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on
 specialized code generation.
+
+!!! compat "Julia 1.10"
+    Using `Base.@nospecializeinfer` requires Julia version 1.10.
 """
 macro nospecializeinfer(ex)
     esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex)
@@ -1016,14 +1032,10 @@ function remove_linenums!(@nospecialize ex)
         for subex in ex.args
             subex isa Expr && remove_linenums!(subex)
         end
-        return ex
     elseif ex isa CodeInfo
-        ex.codelocs .= 0
-        length(ex.linetable::Vector) > 1 && resize!(ex.linetable::Vector, 1)
-        return ex
-    else
-        return ex
+        ex.debuginfo = Core.DebugInfo(ex.debuginfo.def) # TODO: filter partially, but keep edges
     end
+    return ex
 end
 
 replace_linenums!(ex, ln::LineNumberNode) = ex
@@ -1109,13 +1121,22 @@ If no `order` is specified it defaults to :sequentially_consistent.
     @atomic a.b.x += addend
     @atomic :release a.b.x = new
     @atomic :acquire_release a.b.x += addend
+    @atomic m[idx] = new
+    @atomic m[idx] += addend
+    @atomic :release m[idx] = new
+    @atomic :acquire_release m[idx] += addend
 
 Perform the store operation expressed on the right atomically and return the
 new value.
 
-With `=`, this operation translates to a `setproperty!(a.b, :x, new)` call.
-With any operator also, this operation translates to a `modifyproperty!(a.b,
-:x, +, addend)[2]` call.
+With assignment (`=`), this operation translates to a `setproperty!(a.b, :x, new)`
+or, in case of reference, to a `setindex_atomic!(m, order, new, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
+
+With any modifying operator this operation translates to a
+`modifyproperty!(a.b, :x, op, addend)[2]` or, in case of reference, to a
+`modifyindex_atomic!(m, order, op, addend, idx...)[2]` call,
+with `order` defaulting to `:sequentially_consistent`.
 
     @atomic a.b.x max arg2
     @atomic a.b.x + arg2
@@ -1123,12 +1144,20 @@ With any operator also, this operation translates to a `modifyproperty!(a.b,
     @atomic :acquire_release max(a.b.x, arg2)
     @atomic :acquire_release a.b.x + arg2
     @atomic :acquire_release a.b.x max arg2
+    @atomic m[idx] max arg2
+    @atomic m[idx] + arg2
+    @atomic max(m[idx], arg2)
+    @atomic :acquire_release max(m[idx], arg2)
+    @atomic :acquire_release m[idx] + arg2
+    @atomic :acquire_release m[idx] max arg2
 
 Perform the binary operation expressed on the right atomically. Store the
-result into the field in the first argument and return the values `(old, new)`.
-
-This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
+result into the field or the reference in the first argument, and return the values
+`(old, new)`.
 
+This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` or,
+in case of reference to a `modifyindex_atomic!(m, order, func, arg2, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1161,8 +1190,36 @@ julia> @atomic a.x max 5 # again change field x of a to the max value, with sequ
 10 => 10
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 2 # set mem[1] to value 2 with sequential consistency
+2
+
+julia> @atomic :monotonic mem[1] # fetch the first value of mem, with monotonic consistency
+2
+
+julia> @atomic mem[1] += 1 # increment the first value of mem, with sequential consistency
+3
+
+julia> @atomic mem[1] + 1 # increment the first value of mem, with sequential consistency
+3 => 4
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+4
+
+julia> @atomic max(mem[1], 10) # change the first value of mem to the max value, with sequential consistency
+4 => 10
+
+julia> @atomic mem[1] max 5 # again change the first value of mem to the max value, with sequential consistency
+10 => 10
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomic(ex)
     if !isa(ex, Symbol) && !is_expr(ex, :(::))
@@ -1189,11 +1246,17 @@ function make_atomic(order, ex)
             return :(getproperty($l, $r, $order))
         elseif isexpr(ex, :call, 3)
             return make_atomic(order, ex.args[2], ex.args[1], ex.args[3])
+        elseif isexpr(ex, :ref)
+            x, idcs = esc(ex.args[1]), map(esc, ex.args[2:end])
+            return :(getindex_atomic($x, $order, $(idcs...)))
         elseif ex.head === :(=)
             l, r = ex.args[1], esc(ex.args[2])
             if is_expr(l, :., 2)
                 ll, lr = esc(l.args[1]), esc(l.args[2])
                 return :(setproperty!($ll, $lr, $r, $order))
+            elseif is_expr(l, :ref)
+                x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+                return :(setindex_atomic!($x, $order, $r, $(idcs...)))
             end
         end
         if length(ex.args) == 2
@@ -1216,19 +1279,29 @@ function make_atomic(order, ex)
 end
 function make_atomic(order, a1, op, a2)
     @nospecialize
-    is_expr(a1, :., 2) || error("@atomic modify expression missing field access")
-    a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
-    return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+    if is_expr(a1, :., 2)
+        a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
+        return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+    elseif is_expr(a1, :ref)
+        x, idcs, op, a2 = esc(a1.args[1]), map(esc, a1.args[2:end]), esc(op), esc(a2)
+        return :(modifyindex_atomic!($x, $order, $op, $a2, $(idcs...)))
+    end
+    error("@atomic modify expression missing field access or indexing")
 end
 
 
 """
     @atomicswap a.b.x = new
     @atomicswap :sequentially_consistent a.b.x = new
+    @atomicswap m[idx] = new
+    @atomicswap :sequentially_consistent m[idx] = new
 
-Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
+Stores `new` into `a.b.x` (`m[idx]` in case of reference) and returns the old
+value of `a.b.x` (the old value stored at `m[idx]`, respectively).
 
-This operation translates to a `swapproperty!(a.b, :x, new)` call.
+This operation translates to a `swapproperty!(a.b, :x, new)` or,
+in case of reference, `swapindex_atomic!(mem, order, new, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1246,8 +1319,23 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 1;
+
+julia> @atomicswap mem[1] = 4 # replace the first value of `mem` with 4, with sequential consistency
+1
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+4
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomicswap(order, ex)
     order isa QuoteNode || (order = esc(order))
@@ -1260,9 +1348,14 @@ function make_atomicswap(order, ex)
     @nospecialize
     is_expr(ex, :(=), 2) || error("@atomicswap expression missing assignment")
     l, val = ex.args[1], esc(ex.args[2])
-    is_expr(l, :., 2) || error("@atomicswap expression missing field access")
-    ll, lr = esc(l.args[1]), esc(l.args[2])
-    return :(swapproperty!($ll, $lr, $val, $order))
+    if is_expr(l, :., 2)
+        ll, lr = esc(l.args[1]), esc(l.args[2])
+        return :(swapproperty!($ll, $lr, $val, $order))
+    elseif is_expr(l, :ref)
+        x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+        return :(swapindex_atomic!($x, $order, $val, $(idcs...)))
+    end
+    error("@atomicswap expression missing field access or indexing")
 end
 
 
@@ -1270,12 +1363,18 @@ end
     @atomicreplace a.b.x expected => desired
     @atomicreplace :sequentially_consistent a.b.x expected => desired
     @atomicreplace :sequentially_consistent :monotonic a.b.x expected => desired
+    @atomicreplace m[idx] expected => desired
+    @atomicreplace :sequentially_consistent m[idx] expected => desired
+    @atomicreplace :sequentially_consistent :monotonic m[idx] expected => desired
 
 Perform the conditional replacement expressed by the pair atomically, returning
 the values `(old, success::Bool)`. Where `success` indicates whether the
 replacement was completed.
 
-This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
+This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` or,
+in case of reference, to a
+`replaceindex_atomic!(mem, success_order, fail_order, expected, desired, idx)` call,
+with both orders defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1292,7 +1391,7 @@ julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 2
 
-julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+julia> @atomicreplace a.x 1 => 3 # replace field x of a with 2 if it was 1, with sequential consistency
 (old = 2, success = false)
 
 julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
@@ -1304,8 +1403,34 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 0
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 1;
+
+julia> @atomicreplace mem[1] 1 => 2 # replace the first value of mem with 2 if it was 1, with sequential consistency
+(old = 1, success = true)
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+2
+
+julia> @atomicreplace mem[1] 1 => 3 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 2, success = false)
+
+julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
+
+julia> @atomicreplace mem[1] xchg
+(old = 2, success = true)
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+0
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomicreplace(success_order, fail_order, ex, old_new)
     fail_order isa QuoteNode || (fail_order = esc(fail_order))
@@ -1321,27 +1446,42 @@ macro atomicreplace(ex, old_new)
 end
 function make_atomicreplace(success_order, fail_order, ex, old_new)
     @nospecialize
-    is_expr(ex, :., 2) || error("@atomicreplace expression missing field access")
-    ll, lr = esc(ex.args[1]), esc(ex.args[2])
-    if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
-        exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
-        return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
-    else
-        old_new = esc(old_new)
-        return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+    if is_expr(ex, :., 2)
+        ll, lr = esc(ex.args[1]), esc(ex.args[2])
+        if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+            exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+            return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
+        else
+            old_new = esc(old_new)
+            return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+        end
+    elseif is_expr(ex, :ref)
+        x, idcs = esc(ex.args[1]), map(esc, ex.args[2:end])
+        if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+            exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+            return :(replaceindex_atomic!($x, $success_order, $fail_order, $exp, $rep, $(idcs...)))
+        else
+            old_new = esc(old_new)
+            return :(replaceindex_atomic!($x, $success_order, $fail_order, $old_new::Pair..., $(idcs...)))
+        end
     end
+    error("@atomicreplace expression missing field access or indexing")
 end
 
 """
     @atomiconce a.b.x = value
     @atomiconce :sequentially_consistent a.b.x = value
     @atomiconce :sequentially_consistent :monotonic a.b.x = value
+    @atomiconce m[idx] = value
+    @atomiconce :sequentially_consistent m[idx] = value
+    @atomiconce :sequentially_consistent :monotonic m[idx] = value
 
 Perform the conditional assignment of value atomically if it was previously
-unset, returning the value `success::Bool`. Where `success` indicates whether
-the assignment was completed.
+unset. Returned value `success::Bool` indicates whether the assignment was completed.
 
-This operation translates to a `setpropertyonce!(a.b, :x, value)` call.
+This operation translates to a `setpropertyonce!(a.b, :x, value)` or,
+in case of reference, to a `setindexonce_atomic!(m, success_order, fail_order, value, idx)` call,
+with both orders defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1361,12 +1501,39 @@ true
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 1
 
-julia> @atomiconce a.x = 1 # set field x of a to 1, if unset, with sequential consistency
+julia> @atomiconce :monotonic a.x = 2 # set field x of a to 1, if unset, with monotonic consistence
+false
+```
+
+```jldoctest
+julia> mem = AtomicMemory{Vector{Int}}(undef, 1);
+
+julia> isassigned(mem, 1)
 false
+
+julia> @atomiconce mem[1] = [1] # set the first value of mem to [1], if unset, with sequential consistency
+true
+
+julia> isassigned(mem, 1)
+true
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+1-element Vector{Int64}:
+ 1
+
+julia> @atomiconce :monotonic mem[1] = [2] # set the first value of mem to [2], if unset, with monotonic
+false
+
+julia> @atomic mem[1]
+1-element Vector{Int64}:
+ 1
 ```
 
 !!! compat "Julia 1.11"
-    This functionality requires at least Julia 1.11.
+    Atomic fields functionality requires at least Julia 1.11.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomiconce(success_order, fail_order, ex)
     fail_order isa QuoteNode || (fail_order = esc(fail_order))
@@ -1384,7 +1551,12 @@ function make_atomiconce(success_order, fail_order, ex)
     @nospecialize
     is_expr(ex, :(=), 2) || error("@atomiconce expression missing assignment")
     l, val = ex.args[1], esc(ex.args[2])
-    is_expr(l, :., 2) || error("@atomiconce expression missing field access")
-    ll, lr = esc(l.args[1]), esc(l.args[2])
-    return :(setpropertyonce!($ll, $lr, $val, $success_order, $fail_order))
+    if is_expr(l, :., 2)
+        ll, lr = esc(l.args[1]), esc(l.args[2])
+        return :(setpropertyonce!($ll, $lr, $val, $success_order, $fail_order))
+    elseif is_expr(l, :ref)
+        x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+        return :(setindexonce_atomic!($x, $success_order, $fail_order, $val, $(idcs...)))
+    end
+    error("@atomiconce expression missing field access or indexing")
 end
diff --git a/base/fastmath.jl b/base/fastmath.jl
index a34a4ca66e06b..b82d613f1fc76 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -30,6 +30,7 @@ export @fastmath
 import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
     add_float_fast, sub_float_fast, mul_float_fast, div_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
+import Base: afoldl
 
 const fast_op =
     Dict(# basic arithmetic
@@ -101,9 +102,12 @@ const rewrite_op =
 function make_fastmath(expr::Expr)
     if expr.head === :quote
         return expr
-    elseif expr.head === :call && expr.args[1] === :^ && expr.args[3] isa Integer
-        # mimic Julia's literal_pow lowering of literal integer powers
-        return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(expr.args[2]), Val{expr.args[3]}())
+    elseif expr.head === :call && expr.args[1] === :^
+        ea = expr.args
+        if length(ea) >= 3 && isa(ea[3], Int)
+            # mimic Julia's literal_pow lowering of literal integer powers
+            return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(ea[2]), Val(ea[3]))
+        end
     end
     op = get(rewrite_op, expr.head, :nothing)
     if op !== :nothing
@@ -165,11 +169,6 @@ sub_fast(x::T, y::T) where {T<:FloatTypes} = sub_float_fast(x, y)
 mul_fast(x::T, y::T) where {T<:FloatTypes} = mul_float_fast(x, y)
 div_fast(x::T, y::T) where {T<:FloatTypes} = div_float_fast(x, y)
 
-add_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
-    add_fast(add_fast(x, y), zs...)
-mul_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
-    mul_fast(mul_fast(x, y), zs...)
-
 @fastmath begin
     cmp_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(x==y, 0, ifelse(x<y, -1, +1))
     log_fast(b::T, x::T) where {T<:FloatTypes} = log_fast(x)/log_fast(b)
@@ -242,9 +241,6 @@ ComplexTypes = Union{ComplexF32, ComplexF64}
     max_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, y, x)
     min_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, x, y)
     minmax_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, (x,y), (y,x))
-
-    max_fast(x::T, y::T, z::T...) where {T<:FloatTypes} = max_fast(max_fast(x, y), z...)
-    min_fast(x::T, y::T, z::T...) where {T<:FloatTypes} = min_fast(min_fast(x, y), z...)
 end
 
 # fall-back implementations and type promotion
@@ -257,7 +253,7 @@ for op in (:abs, :abs2, :conj, :inv, :sign)
     end
 end
 
-for op in (:+, :-, :*, :/, :(==), :!=, :<, :<=, :cmp, :rem, :min, :max, :minmax)
+for op in (:-, :/, :(==), :!=, :<, :<=, :cmp, :rem, :minmax)
     op_fast = fast_op[op]
     @eval begin
         # fall-back implementation for non-numeric types
@@ -270,6 +266,31 @@ for op in (:+, :-, :*, :/, :(==), :!=, :<, :<=, :cmp, :rem, :min, :max, :minmax)
     end
 end
 
+for op in (:+, :*, :min, :max)
+    op_fast = fast_op[op]
+    @eval begin
+        $op_fast(x) = $op(x)
+        # fall-back implementation for non-numeric types
+        $op_fast(x, y) = $op(x, y)
+        # type promotion
+        $op_fast(x::Number, y::Number) =
+            $op_fast(promote(x,y)...)
+        # fall-back implementation that applies after promotion
+        $op_fast(x::T,y::T) where {T<:Number} = $op(x,y)
+        # note: these definitions must not cause a dispatch loop when +(a,b) is
+        # not defined, and must only try to call 2-argument definitions, so
+        # that defining +(a,b) is sufficient for full functionality.
+        ($op_fast)(a, b, c, xs...) = (@inline; afoldl($op_fast, ($op_fast)(($op_fast)(a,b),c), xs...))
+        # a further concern is that it's easy for a type like (Int,Int...)
+        # to match many definitions, so we need to keep the number of
+        # definitions down to avoid losing type information.
+        # type promotion
+        $op_fast(a::Number, b::Number, c::Number, xs::Number...) =
+            $op_fast(promote(a,b,c,xs...)...)
+        # fall-back implementation that applies after promotion
+        $op_fast(a::T, b::T, c::T, xs::T...) where {T<:Number} = (@inline; afoldl($op_fast, ($op_fast)(($op_fast)(a,b),c), xs...))
+    end
+end
 
 # Math functions
 exp2_fast(x::Union{Float32,Float64})  = Base.Math.exp2_fast(x)
@@ -278,8 +299,12 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
-pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32.i32", llvmcall, Float32, (Float32, Int32), x, y)
-pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
+function pow_fast(x::Float64, y::Integer)
+    z = y % Int32
+    z == y ? pow_fast(x, z) : x^y
+end
+pow_fast(x::Float32, y::Integer) = x^y
+pow_fast(x::Float64, y::Int32) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
@@ -364,6 +389,10 @@ for f in (:^, :atan, :hypot, :log)
         # fall-back implementation that applies after promotion
         $f_fast(x::T, y::T) where {T<:Number} = $f(x, y)
     end
+    # Issue 53886 - avoid promotion of Int128 etc to be consistent with non-fastmath
+    if f === :^
+        @eval $f_fast(x::Number, y::Integer) = $f(x, y)
+    end
 end
 
 # Reductions
diff --git a/base/file.jl b/base/file.jl
index b7adb8e1a3fbd..567783c4b1e5b 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -230,15 +230,19 @@ julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directo
 julia> isdir("intermediate_dir/actually_a_directory.txt")
 true
 
+julia> mkpath("my/test/dir/") # returns the original `path`
+"my/test/dir/"
 ```
 """
 function mkpath(path::AbstractString; mode::Integer = 0o777)
-    isdirpath(path) && (path = dirname(path))
-    dir = dirname(path)
-    (path == dir || isdir(path)) && return path
-    mkpath(dir, mode = checkmode(mode))
+    parent = dirname(path)
+    # stop recursion for `""`, `"/"`, or existing dir
+    (path == parent || isdir(path)) && return path
+    mkpath(parent, mode = checkmode(mode))
     try
-        mkdir(path, mode = mode)
+        # The `isdir` check could be omitted, then `mkdir` will throw an error in cases like `x/`.
+        # Although the error will not be rethrown, we avoid it in advance for performance reasons.
+        isdir(path) || mkdir(path, mode = mode)
     catch err
         # If there is a problem with making the directory, but the directory
         # does in fact exist, then ignore the error. Else re-throw it.
@@ -246,7 +250,7 @@ function mkpath(path::AbstractString; mode::Integer = 0o777)
             rethrow()
         end
     end
-    path
+    return path
 end
 
 # Files that were requested to be deleted but can't be by the current process
@@ -381,7 +385,7 @@ of the file or directory `src` refers to.
 Return `dst`.
 
 !!! note
-    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    The `cp` function is different from the `cp` Unix command. The `cp` function always operates on
     the assumption that `dst` is a file, while the command does different things depending
     on whether `dst` is a directory or a file.
     Using `force=true` when `dst` is a directory will result in loss of all the contents present
@@ -434,13 +438,73 @@ julia> mv("hello.txt", "goodbye.txt", force=true)
 julia> rm("goodbye.txt");
 
 ```
+
+!!! note
+    The `mv` function is different from the `mv` Unix command. The `mv` function by
+    default will error if `dst` exists, while the command will delete
+    an existing `dst` file by default.
+    Also the `mv` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function mv(src::AbstractString, dst::AbstractString; force::Bool=false)
-    checkfor_mv_cp_cptree(src, dst, "moving"; force=force)
-    rename(src, dst)
+    if force
+        _mv_replace(src, dst)
+    else
+        _mv_noreplace(src, dst)
+    end
+end
+
+function _mv_replace(src::AbstractString, dst::AbstractString)
+    # This check is copied from checkfor_mv_cp_cptree
+    if ispath(dst) && Base.samefile(src, dst)
+        abs_src = islink(src) ? abspath(readlink(src)) : abspath(src)
+        abs_dst = islink(dst) ? abspath(readlink(dst)) : abspath(dst)
+        throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir. ",
+                                   "This is not supported.\n  ",
+                                   "`src` refers to: $(abs_src)\n  ",
+                                   "`dst` refers to: $(abs_dst)\n")))
+    end
+    # First try to do a regular rename, because this might avoid a situation
+    # where dst is deleted or truncated.
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on rename error try to delete dst if it exists and isn't the same as src
+        checkfor_mv_cp_cptree(src, dst, "moving"; force=true)
+        try
+            rename(src, dst)
+        catch err
+            err isa IOError || rethrow()
+            # on second error, default to force cp && rm
+            cp(src, dst; force=true, follow_symlinks=false)
+            rm(src; recursive=true)
+        end
+    end
+    dst
+end
+
+function _mv_noreplace(src::AbstractString, dst::AbstractString)
+    # Error if dst exists.
+    # This check currently has TOCTTOU issues.
+    checkfor_mv_cp_cptree(src, dst, "moving"; force=false)
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on error, default to cp && rm
+        cp(src, dst; force=false, follow_symlinks=false)
+        rm(src; recursive=true)
+    end
     dst
 end
 
+
 """
     touch(path::AbstractString)
     touch(fd::File)
@@ -821,7 +885,7 @@ end
     mktempdir(f::Function, parent=tempdir(); prefix=$(repr(temp_prefix)))
 
 Apply the function `f` to the result of [`mktempdir(parent; prefix)`](@ref) and remove the
-temporary directory all of its contents upon completion.
+temporary directory and all of its contents upon completion.
 
 See also: [`mktemp`](@ref), [`mkdir`](@ref).
 
@@ -1039,24 +1103,30 @@ end
     walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw)
 
 Return an iterator that walks the directory tree of a directory.
-The iterator returns a tuple containing `(rootpath, dirs, files)`.
+
+The iterator returns a tuple containing `(path, dirs, files)`.
+Each iteration `path` will change to the next directory in the tree;
+then `dirs` and `files` will be vectors containing the directories and files
+in the current `path` directory.
 The directory tree can be traversed top-down or bottom-up.
 If `walkdir` or `stat` encounters a `IOError` it will rethrow the error by default.
 A custom error handling function can be provided through `onerror` keyword argument.
 `onerror` is called with a `IOError` as argument.
+The returned iterator is stateful so when accessed repeatedly each access will
+resume where the last left off, like [`Iterators.Stateful`](@ref).
 
 See also: [`readdir`](@ref).
 
 # Examples
 ```julia
-for (root, dirs, files) in walkdir(".")
-    println("Directories in \$root")
+for (path, dirs, files) in walkdir(".")
+    println("Directories in \$path")
     for dir in dirs
-        println(joinpath(root, dir)) # path to directories
+        println(joinpath(path, dir)) # path to directories
     end
-    println("Files in \$root")
+    println("Files in \$path")
     for file in files
-        println(joinpath(root, file)) # path to files
+        println(joinpath(path, file)) # path to files
     end
 end
 ```
@@ -1066,18 +1136,18 @@ julia> mkpath("my/test/dir");
 
 julia> itr = walkdir("my");
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my", ["test"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test", ["dir"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test/dir", String[], String[])
 ```
 """
-function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
-    function _walkdir(chnl, root)
+function walkdir(path; topdown=true, follow_symlinks=false, onerror=throw)
+    function _walkdir(chnl, path)
         tryf(f, p) = try
                 f(p)
             catch err
@@ -1089,7 +1159,7 @@ function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
                 end
                 return
             end
-        entries = tryf(_readdirx, root)
+        entries = tryf(_readdirx, path)
         entries === nothing && return
         dirs = Vector{String}()
         files = Vector{String}()
@@ -1103,17 +1173,17 @@ function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
         end
 
         if topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         for dir in dirs
-            _walkdir(chnl, joinpath(root, dir))
+            _walkdir(chnl, joinpath(path, dir))
         end
         if !topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         nothing
     end
-    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, root))
+    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, path))
 end
 
 function unlink(p::AbstractString)
@@ -1122,15 +1192,38 @@ function unlink(p::AbstractString)
     nothing
 end
 
-# For move command
-function rename(src::AbstractString, dst::AbstractString; force::Bool=false)
-    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), src, dst)
-    # on error, default to cp && rm
+"""
+    Base.rename(oldpath::AbstractString, newpath::AbstractString)
+
+Change the name of a file or directory from `oldpath` to `newpath`.
+If `newpath` is an existing file or empty directory it may be replaced.
+Equivalent to [rename(2)](https://man7.org/linux/man-pages/man2/rename.2.html) on Unix.
+If a path contains a "\\0" throw an `ArgumentError`.
+On other failures throw an `IOError`.
+Return `newpath`.
+
+This is a lower level filesystem operation used to implement [`mv`](@ref).
+
+OS-specific restrictions may apply when `oldpath` and `newpath` are in different directories.
+
+Currently there are a few differences in behavior on Windows which may be resolved in a future release.
+Specifically, currently on Windows:
+1. `rename` will fail if `oldpath` or `newpath` are opened files.
+2. `rename` will fail if `newpath` is an existing directory.
+3. `rename` may work if `newpath` is a file and `oldpath` is a directory.
+4. `rename` may remove `oldpath` if it is a hardlink to `newpath`.
+
+See also: [`mv`](@ref).
+
+!!! compat "Julia 1.12"
+    This method was made public in Julia 1.12.
+"""
+function rename(oldpath::AbstractString, newpath::AbstractString)
+    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), oldpath, newpath)
     if err < 0
-        cp(src, dst; force=force, follow_symlinks=false)
-        rm(src; recursive=true)
+        uv_error("rename($(repr(oldpath)), $(repr(newpath)))", err)
     end
-    nothing
+    newpath
 end
 
 function sendfile(src::AbstractString, dst::AbstractString)
diff --git a/base/filesystem.jl b/base/filesystem.jl
index ee6fde982caab..bc1f4942877e8 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -140,7 +140,8 @@ import .Base:
     IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
     bytesavailable, position, read, read!, readavailable, seek, seekend, show,
     skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
-    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
+    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize,
+    isexecutable, isreadable, iswritable, MutableDenseArrayType
 
 import .Base.RefValue
 
@@ -158,7 +159,7 @@ uv_fs_req_cleanup(req) = ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
 include("path.jl")
 include("stat.jl")
 include("file.jl")
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
+include(string(Base.BUILDROOT, "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
 
 ## Operations with File (fd) objects ##
 
@@ -308,7 +309,7 @@ bytesavailable(f::File) = max(0, filesize(f) - position(f)) # position can be >
 
 eof(f::File) = bytesavailable(f) == 0
 
-function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
+function readbytes!(f::File, b::MutableDenseArrayType{UInt8}, nb=length(b))
     nr = min(nb, bytesavailable(f))
     if length(b) < nr
         resize!(b, nr)
@@ -365,5 +366,85 @@ function touch(f::File)
     f
 end
 
+"""
+    isexecutable(path::String)
+
+Return `true` if the given `path` has executable permissions.
+
+!!! note
+    This permission may change before the user executes `path`,
+    so it is recommended to execute the file and handle the error if that fails,
+    rather than calling `isexecutable` first.
+
+!!! note
+    Prior to Julia 1.6, this did not correctly interrogate filesystem
+    ACLs on Windows, therefore it would return `true` for any
+    file.  From Julia 1.6 on, it correctly determines whether the
+    file is marked as executable or not.
+
+See also [`ispath`](@ref), [`isreadable`](@ref), [`iswritable`](@ref).
+"""
+function isexecutable(path::String)
+    # We use `access()` and `X_OK` to determine if a given path is
+    # executable by the current user.  `X_OK` comes from `unistd.h`.
+    X_OK = 0x01
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, X_OK) == 0
+end
+isexecutable(path::AbstractString) = isexecutable(String(path))
+
+"""
+    isreadable(path::String)
+
+Return `true` if the access permissions for the given `path` permitted reading by the current user.
+
+!!! note
+    This permission may change before the user calls `open`,
+    so it is recommended to just call `open` alone and handle the error if that fails,
+    rather than calling `isreadable` first.
+
+!!! note
+    Currently this function does not correctly interrogate filesystem
+    ACLs on Windows, therefore it can return wrong results.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`ispath`](@ref), [`isexecutable`](@ref), [`iswritable`](@ref).
+"""
+function isreadable(path::String)
+    # We use `access()` and `R_OK` to determine if a given path is
+    # readable by the current user.  `R_OK` comes from `unistd.h`.
+    R_OK = 0x04
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, R_OK) == 0
+end
+isreadable(path::AbstractString) = isreadable(String(path))
+
+"""
+    iswritable(path::String)
+
+Return `true` if the access permissions for the given `path` permitted writing by the current user.
+
+!!! note
+    This permission may change before the user calls `open`,
+    so it is recommended to just call `open` alone and handle the error if that fails,
+    rather than calling `iswritable` first.
+
+!!! note
+    Currently this function does not correctly interrogate filesystem
+    ACLs on Windows, therefore it can return wrong results.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`ispath`](@ref), [`isexecutable`](@ref), [`isreadable`](@ref).
+"""
+function iswritable(path::String)
+    # We use `access()` and `W_OK` to determine if a given path is
+    # writeable by the current user.  `W_OK` comes from `unistd.h`.
+    W_OK = 0x02
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, W_OK) == 0
+end
+iswritable(path::AbstractString) = iswritable(String(path))
+
 
 end
diff --git a/base/float.jl b/base/float.jl
index 64dcb8b807550..ff628f0ac7126 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -464,6 +464,19 @@ round(x::IEEEFloat, ::RoundingMode{:Down})    = floor_llvm(x)
 round(x::IEEEFloat, ::RoundingMode{:Up})      = ceil_llvm(x)
 round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x)
 
+rounds_up(x, ::RoundingMode{:Down}) = false
+rounds_up(x, ::RoundingMode{:Up}) = true
+rounds_up(x, ::RoundingMode{:ToZero}) = signbit(x)
+rounds_up(x, ::RoundingMode{:FromZero}) = !signbit(x)
+function _round_convert(::Type{T}, x_integer, x, r::Union{RoundingMode{:ToZero}, RoundingMode{:FromZero}, RoundingMode{:Up}, RoundingMode{:Down}}) where {T<:AbstractFloat}
+    x_t = convert(T, x_integer)
+    if rounds_up(x, r)
+        x_t < x ? nextfloat(x_t) : x_t
+    else
+        x_t > x ? prevfloat(x_t) : x_t
+    end
+end
+
 ## floating point promotions ##
 promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
 promote_rule(::Type{Float64}, ::Type{Float16}) = Float64
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index a2a0f60bcf399..67e7899b4107c 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -42,7 +42,7 @@ it is the minimum of `maxintfloat(T)` and [`typemax(S)`](@ref).
 maxintfloat(::Type{S}, ::Type{T}) where {S<:AbstractFloat, T<:Integer} = min(maxintfloat(S), S(typemax(T)))
 maxintfloat() = maxintfloat(Float64)
 
-isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
+isinteger(x::AbstractFloat) = iszero(x - trunc(x)) # note: x == trunc(x) would be incorrect for x=Inf
 
 # See rounding.jl for docstring.
 
diff --git a/base/gcutils.jl b/base/gcutils.jl
index 3b8c4cf4ede10..84a184537ffc0 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -38,7 +38,7 @@ WeakRef
 # Used by `Base.finalizer` to validate mutability of an object being finalized.
 function _check_mutable(@nospecialize(o)) @noinline
     if !ismutable(o)
-        error("objects of type ", typeof(o), " cannot be finalized")
+        error("objects of type ", typeof(o), " cannot be finalized because they are not mutable")
     end
 end
 
@@ -109,6 +109,8 @@ Module with garbage collection utilities.
 """
 module GC
 
+public gc, enable, @preserve, safepoint, enable_logging, logging_enabled
+
 # mirrored from julia.h
 const GC_AUTO = 0
 const GC_FULL = 1
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
index b5f519a0f854d..6537839320206 100644
--- a/base/genericmemory.jl
+++ b/base/genericmemory.jl
@@ -3,9 +3,21 @@
 ## genericmemory.jl: Managed Memory
 
 """
-    GenericMemory{kind::Symbol, T, addrspace=Core.CPU} <: AbstractVector{T}
+    GenericMemory{kind::Symbol, T, addrspace=Core.CPU} <: DenseVector{T}
 
-One-dimensional dense array with elements of type `T`.
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+
+`kind` can currently be either `:not_atomic` or `:atomic`. For details on what `:atomic` implies, see [`AtomicMemory`](@ref)
+
+`addrspace` can currently only be set to Core.CPU. It is designed to  to permit extension by other systems
+such as GPUs, which might define values such as:
+```
+module CUDA
+const Generic = bitcast(Core.AddrSpace{CUDA}, 0)
+const Global = bitcast(Core.AddrSpace{CUDA}, 1)
+end
+```
+The exact semantics of these other addrspaces is defined by the specific backend, but will error if the user is attempting to access these on the CPU.
 
 !!! compat "Julia 1.11"
     This type requires Julia 1.11 or later.
@@ -15,7 +27,7 @@ GenericMemory
 """
     Memory{T} == GenericMemory{:not_atomic, T, Core.CPU}
 
-One-dimensional dense array with elements of type `T`.
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
 
 !!! compat "Julia 1.11"
     This type requires Julia 1.11 or later.
@@ -25,11 +37,24 @@ Memory
 """
     AtomicMemory{T} == GenericMemory{:atomic, T, Core.CPU}
 
-One-dimensional dense array with elements of type `T`, where each element is
-independently atomic when accessed, and cannot be set non-atomically.
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+Fetching of any of its individual elements is performed atomically
+(with `:monotonic` ordering by default).
+
+!!! warning
+    The access to `AtomicMemory` must be done by either using the [`@atomic`](@ref)
+    macro or the lower level interface functions: `Base.getindex_atomic`,
+    `Base.setindex_atomic!`, `Base.setindexonce_atomic!`,
+    `Base.swapindex_atomic!`, `Base.modifyindex_atomic!`, and `Base.replaceindex_atomic!`.
+
+For details, see [Atomic Operations](@ref man-atomic-operations) as well as macros
+[`@atomic`](@ref), [`@atomiconce`](@ref), [`@atomicswap`](@ref), and [`@atomicreplace`](@ref).
 
 !!! compat "Julia 1.11"
     This type requires Julia 1.11 or later.
+
+!!! compat "Julia 1.12"
+    Lower level interface functions or `@atomic` macro requires Julia 1.12 or later.
 """
 AtomicMemory
 
@@ -46,14 +71,16 @@ size(a::GenericMemory) = (length(a),)
 
 IndexStyle(::Type{<:GenericMemory}) = IndexLinear()
 
+parent(ref::GenericMemoryRef) = ref.mem
+
 pointer(mem::GenericMemoryRef) = unsafe_convert(Ptr{Cvoid}, mem) # no bounds check, even for empty array
 
-_unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(GenericMemoryRef(A, i)); A)
+_unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(memoryref(A, i)); A)
 function _unsetindex!(A::MemoryRef{T}) where T
     @_terminates_locally_meta
     @_propagate_inbounds_meta
     @inline
-    @boundscheck GenericMemoryRef(A, 1)
+    @boundscheck memoryref(A, 1)
     mem = A.mem
     MemT = typeof(mem)
     arrayelem = datatype_arrayelem(MemT)
@@ -82,7 +109,7 @@ sizeof(a::GenericMemory) = Core.sizeof(a)
 function isassigned(a::GenericMemory, i::Int)
     @inline
     @boundscheck (i - 1)%UInt < length(a)%UInt || return false
-    return @inbounds memoryref_isassigned(GenericMemoryRef(a, i), default_access_order(a), false)
+    return @inbounds memoryref_isassigned(memoryref(a, i), default_access_order(a), false)
 end
 
 isassigned(a::GenericMemoryRef) = memoryref_isassigned(a, default_access_order(a), @_boundscheck)
@@ -91,21 +118,21 @@ isassigned(a::GenericMemoryRef) = memoryref_isassigned(a, default_access_order(a
 function unsafe_copyto!(dest::MemoryRef{T}, src::MemoryRef{T}, n) where {T}
     @_terminates_globally_notaskstate_meta
     n == 0 && return dest
-    @boundscheck GenericMemoryRef(dest, n), GenericMemoryRef(src, n)
+    @boundscheck memoryref(dest, n), memoryref(src, n)
     ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n))
     return dest
 end
 
 function unsafe_copyto!(dest::GenericMemoryRef, src::GenericMemoryRef, n)
     n == 0 && return dest
-    @boundscheck GenericMemoryRef(dest, n), GenericMemoryRef(src, n)
+    @boundscheck memoryref(dest, n), memoryref(src, n)
     unsafe_copyto!(dest.mem, memoryrefoffset(dest), src.mem, memoryrefoffset(src), n)
     return dest
 end
 
 function unsafe_copyto!(dest::Memory{T}, doffs, src::Memory{T}, soffs, n) where{T}
     n == 0 && return dest
-    unsafe_copyto!(GenericMemoryRef(dest, doffs), GenericMemoryRef(src, soffs), n)
+    unsafe_copyto!(memoryref(dest, doffs), memoryref(src, soffs), n)
     return dest
 end
 
@@ -138,6 +165,7 @@ end
 
 copy(a::T) where {T<:Memory} = ccall(:jl_genericmemory_copy, Ref{T}, (Any,), a)
 
+copyto!(dest::Memory, src::Memory) = copyto!(dest, 1, src, 1, length(src))
 function copyto!(dest::Memory, doffs::Integer, src::Memory, soffs::Integer, n::Integer)
     n < 0 && _throw_argerror("Number of elements to copy must be non-negative.")
     unsafe_copyto!(dest, doffs, src, soffs, n)
@@ -209,10 +237,11 @@ getindex(A::Memory, c::Colon) = copy(A)
 
 function setindex!(A::Memory{T}, x, i1::Int) where {T}
     val = x isa T ? x : convert(T,x)::T
-    ref = memoryref(memoryref(A), i1, @_boundscheck)
+    ref = memoryrefnew(memoryref(A), i1, @_boundscheck)
     memoryrefset!(ref, val, :not_atomic, @_boundscheck)
     return A
 end
+
 function setindex!(A::Memory{T}, x, i1::Int, i2::Int, I::Int...) where {T}
     @inline
     @boundscheck (i2 == 1 && all(==(1), I)) || throw_boundserror(A, (i1, i2, I...))
@@ -288,3 +317,74 @@ function indcopy(sz::Dims, I::GenericMemory)
     src = eltype(I)[I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
     dst, src
 end
+
+# get, set(once), modify, swap and replace at index, atomically
+function getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)
+    memref = memoryref(mem, i)
+    return memoryrefget(memref, order, @_boundscheck)
+end
+
+function setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return memoryrefset!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        order,
+        @_boundscheck
+    )
+end
+
+function setindexonce_atomic!(
+    mem::GenericMemory,
+    success_order::Symbol,
+    fail_order::Symbol,
+    val,
+    i::Int,
+)
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefsetonce!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        success_order,
+        fail_order,
+        @_boundscheck
+    )
+end
+
+function modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)
+    memref = memoryref(mem, i)
+    return Core.memoryrefmodify!(memref, op, val, order, @_boundscheck)
+end
+
+function swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefswap!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        order,
+        @_boundscheck
+    )
+end
+
+function replaceindex_atomic!(
+    mem::GenericMemory,
+    success_order::Symbol,
+    fail_order::Symbol,
+    expected,
+    desired,
+    i::Int,
+)
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefreplace!(
+        memref,
+        expected,
+        desired isa T ? desired : convert(T, desired)::T,
+        success_order,
+        fail_order,
+        @_boundscheck,
+    )
+end
diff --git a/base/gmp.jl b/base/gmp.jl
index beeec879ed487..1eaa20d6baecf 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -10,7 +10,8 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
              trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit
+             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit,
+             clamp
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -29,10 +30,13 @@ else
     const libgmp = "libgmp.so.10"
 end
 
-version() = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
+_version() = unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar})))
+version() = VersionNumber(_version())
+major_version() = _version()[1]
 bits_per_limb() = Int(unsafe_load(cglobal((:__gmp_bits_per_limb, libgmp), Cint)))
 
 const VERSION = version()
+const MAJOR_VERSION = major_version()
 const BITS_PER_LIMB = bits_per_limb()
 
 # GMP's mp_limb_t is by default a typedef of `unsigned long`, but can also be configured to be either
@@ -101,7 +105,7 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 
 function __init__()
     try
-        if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
+        if major_version() != MAJOR_VERSION || bits_per_limb() != BITS_PER_LIMB
             msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
                      does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
                      Please rebuild Julia."""
@@ -358,6 +362,8 @@ end
 
 rem(x::Integer, ::Type{BigInt}) = BigInt(x)
 
+clamp(x, ::Type{BigInt}) = convert(BigInt, x)
+
 isodd(x::BigInt) = MPZ.tstbit(x, 0)
 iseven(x::BigInt) = !isodd(x)
 
@@ -658,11 +664,6 @@ end
 powermod(x::Integer, p::Integer, m::BigInt) = powermod(big(x), big(p), m)
 
 function gcdx(a::BigInt, b::BigInt)
-    if iszero(b) # shortcut this to ensure consistent results with gcdx(a,b)
-        return a < 0 ? (-a,-ONE,b) : (a,one(BigInt),b)
-        # we don't return the globals ONE and ZERO in case the user wants to
-        # mutate the result
-    end
     g, s, t = MPZ.gcdext(a, b)
     if t == 0
         # work around a difference in some versions of GMP
@@ -754,7 +755,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1)
     iszero(n) && pad < 1 && return ""
     nd1 = ndigits(n, base=base)
     nd  = max(nd1, pad)
-    sv  = Base.StringVector(nd + isneg(n))
+    sv  = Base.StringMemory(nd + isneg(n))
     GC.@preserve sv MPZ.get_str!(pointer(sv) + nd - nd1, base, n)
     @inbounds for i = (1:nd-nd1) .+ isneg(n)
         sv[i] = '0' % UInt8
@@ -834,7 +835,12 @@ Base.add_with_overflow(a::BigInt, b::BigInt) = a + b, false
 Base.sub_with_overflow(a::BigInt, b::BigInt) = a - b, false
 Base.mul_with_overflow(a::BigInt, b::BigInt) = a * b, false
 
-Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)
+# checked_pow doesn't follow the same promotion rules as the others, above.
+Base.checked_pow(x::BigInt, p::Integer) = x^p
+Base.checked_pow(x::Integer, p::BigInt) = x^p
+Base.checked_pow(x::BigInt, p::BigInt) = x^p
+
+Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)::BigInt
 
 ## streamlined hashing for BigInt, by avoiding allocation from shifts ##
 
diff --git a/base/idset.jl b/base/idset.jl
index 23f9ca009af0c..c46d49968ff73 100644
--- a/base/idset.jl
+++ b/base/idset.jl
@@ -5,14 +5,13 @@
     IdSet()
 
 IdSet{T}() constructs a set (see [`Set`](@ref)) using
-`===` as equality with values of type `V`.
+`===` as equality with values of type `T`.
 
-In the example below, the values are all `isequal` so they get overwritten.
-The `IdSet` compares by `===` so preserves the 3 different keys.
-
-Examples
-≡≡≡≡≡≡≡≡
+In the example below, the values are all `isequal` so they get overwritten in the ordinary `Set`.
+The `IdSet` compares by `===` and so preserves the 3 different values.
 
+# Examples
+```jldoctest; filter = r"\\n\\s*(1|1\\.0|true)"
 julia> Set(Any[true, 1, 1.0])
 Set{Any} with 1 element:
   1.0
@@ -22,6 +21,7 @@ IdSet{Any} with 3 elements:
   1.0
   1
   true
+```
 """
 mutable struct IdSet{K} <: AbstractSet{K}
     list::Memory{Any}
diff --git a/base/indices.jl b/base/indices.jl
index 0bd46f9787dab..455bb0f7656a1 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -310,9 +310,9 @@ to_index(I::AbstractArray{Bool}) = LogicalIndex(I)
 to_index(I::AbstractArray) = I
 to_index(I::AbstractArray{Union{}}) = I
 to_index(I::AbstractArray{<:Union{AbstractArray, Colon}}) =
-    throw(ArgumentError("invalid index: $(limitrepr(I)) of type $(typeof(I))"))
+    throw(ArgumentError(LazyString("invalid index: ", limitrepr(I), " of type ", typeof(I))))
 to_index(::Colon) = throw(ArgumentError("colons must be converted by to_indices(...)"))
-to_index(i) = throw(ArgumentError("invalid index: $(limitrepr(i)) of type $(typeof(i))"))
+to_index(i) = throw(ArgumentError(LazyString("invalid index: ", limitrepr(i), " of type ", typeof(i))))
 
 # The general to_indices is mostly defined in multidimensional.jl, but this
 # definition is required for bootstrap:
@@ -423,15 +423,57 @@ first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
 length(S::IdentityUnitRange) = length(S.indices)
-getindex(S::IdentityUnitRange, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+unsafe_length(S::IdentityUnitRange) = unsafe_length(S.indices)
+getindex(S::IdentityUnitRange, i::Integer) = (@inline; @boundscheck checkbounds(S, i); convert(eltype(S), i))
+getindex(S::IdentityUnitRange, i::Bool) = throw(ArgumentError("invalid index: $i of type Bool"))
+function getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return convert(AbstractUnitRange{eltype(S)}, i)
+end
+function getindex(S::IdentityUnitRange, i::AbstractUnitRange{Bool})
+    @inline
+    @boundscheck checkbounds(S, i)
+    range(first(i) ? first(S) : last(S), length = last(i))
+end
+function getindex(S::IdentityUnitRange, i::StepRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return convert(AbstractRange{eltype(S)}, i)
+end
+function getindex(S::IdentityUnitRange, i::StepRange{Bool})
+    @inline
+    @boundscheck checkbounds(S, i)
+    range(first(i) ? first(S) : last(S), length = last(i), step = Int(step(i)))
+end
+# Indexing with offset ranges should preserve the axes of the indices
+# however, this is only really possible in general with OffsetArrays.
+# In some cases, though, we may obtain correct results using Base ranges
+# the following methods are added to allow OffsetArrays to dispatch on the first argument without ambiguities
+function getindex(S::IdentityUnitRange{<:AbstractUnitRange{<:Integer}},
+                    i::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return i
+end
+function getindex(S::Slice{<:AbstractUnitRange{<:Integer}},
+                    i::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return i
+end
 show(io::IO, r::IdentityUnitRange) = print(io, "Base.IdentityUnitRange(", r.indices, ")")
 iterate(S::IdentityUnitRange, s...) = iterate(S.indices, s...)
 
 # For OneTo, the values and indices of the values are identical, so this may be defined in Base.
 # In general such an indexing operation would produce offset ranges
-getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@inline; @boundscheck checkbounds(S, I); I)
+# This should also ideally return an AbstractUnitRange{eltype(S)}, but currently
+# we're restricted to eltype(::IdentityUnitRange) == Int by definition
+function getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, I)
+    return I
+end
 
 """
     LinearIndices(A::AbstractArray)
@@ -523,6 +565,7 @@ function getindex(iter::LinearIndices, i::AbstractRange{<:Integer})
     @boundscheck checkbounds(iter, i)
     @inbounds isa(iter, LinearIndices{1}) ? iter.indices[1][i] : (first(iter):last(iter))[i]
 end
+copy(iter::LinearIndices) = iter
 # More efficient iteration — predominantly for non-vector LinearIndices
 # but one-dimensional LinearIndices must be special-cased to support OffsetArrays
 iterate(iter::LinearIndices{1}, s...) = iterate(axes1(iter.indices[1]), s...)
diff --git a/base/initdefs.jl b/base/initdefs.jl
index 56c2c0c587272..707c96a2444d6 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -9,7 +9,7 @@ A string containing the script name passed to Julia from the command line. Note
 script name remains unchanged from within included files. Alternatively see
 [`@__FILE__`](@ref).
 """
-global PROGRAM_FILE = ""
+global PROGRAM_FILE::String = ""
 
 """
     ARGS
@@ -75,11 +75,11 @@ Here is an overview of some of the subdirectories that may exist in a depot:
 
 * `artifacts`: Contains content that packages use for which Pkg manages the installation of.
 * `clones`: Contains full clones of package repos. Maintained by `Pkg.jl` and used as a cache.
-* `config`: Contains julia-level configuration such as a `startup.jl`
+* `config`: Contains julia-level configuration such as a `startup.jl`.
 * `compiled`: Contains precompiled `*.ji` files for packages. Maintained by Julia.
 * `dev`: Default directory for `Pkg.develop`. Maintained by `Pkg.jl` and the user.
 * `environments`: Default package environments. For instance the global environment for a specific julia version. Maintained by `Pkg.jl`.
-* `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and `Julia`.
+* `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and Julia.
 * `packages`: Contains packages, some of which were explicitly installed and some which are implicit dependencies. Maintained by `Pkg.jl`.
 * `registries`: Contains package registries. By default only `General`. Maintained by `Pkg.jl`.
 * `scratchspaces`: Contains content that a package itself installs via the [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) package. `Pkg.gc()` will delete content that is known to be unused.
@@ -245,8 +245,14 @@ function init_load_path()
     if haskey(ENV, "JULIA_LOAD_PATH")
         paths = parse_load_path(ENV["JULIA_LOAD_PATH"])
     else
-        paths = filter!(env -> env !== nothing,
-            String[env == "@." ? current_project() : env for env in DEFAULT_LOAD_PATH])
+        paths = String[]
+        for env in DEFAULT_LOAD_PATH
+            if env == "@."
+                env = current_project()
+                env === nothing && continue
+            end
+            push!(paths, env)
+        end
     end
     append!(empty!(LOAD_PATH), paths)
 end
@@ -263,6 +269,7 @@ function init_active_project()
 end
 
 ## load path expansion: turn LOAD_PATH entries into concrete paths ##
+cmd_suppresses_program(cmd) = cmd in ('e', 'E')
 
 function load_path_expand(env::AbstractString)::Union{String, Nothing}
     # named environment?
@@ -271,19 +278,25 @@ function load_path_expand(env::AbstractString)::Union{String, Nothing}
         # if you put a `@.` in LOAD_PATH manually, it's expanded late
         env == "@" && return active_project(false)
         env == "@." && return current_project()
+        env == "@temp" && return mktempdir()
         env == "@stdlib" && return Sys.STDLIB
-        if startswith(env, "@scriptdir")
+        if startswith(env, "@script")
             if @isdefined(PROGRAM_FILE)
                 dir = dirname(PROGRAM_FILE)
             else
-                cmds = unsafe_load_commands(opts.commands)
-                if any((cmd, arg)->cmd_suppresses_program(cmd), cmds)
+                cmds = unsafe_load_commands(JLOptions().commands)
+                if any(cmd::Pair{Char, String}->cmd_suppresses_program(first(cmd)), cmds)
                     # Usage error. The user did not pass a script.
                     return nothing
                 end
                 dir = dirname(ARGS[1])
             end
-            return abspath(replace(env, "@scriptdir" => dir))
+            if env == "@script"  # complete match, not startswith, so search upwards
+                return current_project(dir)
+            else
+                # starts with, so assume relative path is after
+                return abspath(replace(env, "@script" => dir))
+            end
         end
         env = replace(env, '#' => VERSION.major, count=1)
         env = replace(env, '#' => VERSION.minor, count=1)
@@ -425,6 +438,11 @@ function atexit(f::Function)
 end
 
 function _atexit(exitcode::Cint)
+    # this current task shouldn't be scheduled anywhere, but if it was (because
+    # this exit came from a signal for example), then try to clear that state
+    # to minimize scheduler issues later
+    ct = current_task()
+    q = ct.queue; q === nothing || list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
     # Don't hold the lock around the iteration, just in case any other thread executing in
     # parallel tries to register a new atexit hook while this is running. We don't want to
     # block that thread from proceeding, and we can allow it to register its hook which we
@@ -480,7 +498,7 @@ end
 
 ## hook for disabling threaded libraries ##
 
-library_threading_enabled = true
+library_threading_enabled::Bool = true
 const disable_library_threading_hooks = []
 
 function at_disable_library_threading(f)
diff --git a/base/int.jl b/base/int.jl
index 61576d4360835..a25b17e2cc958 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -843,166 +843,14 @@ widemul(x::Bool,y::Number) = x * y
 widemul(x::Number,y::Bool) = x * y
 
 
-## wide multiplication, Int128 multiply and divide ##
-
-if Core.sizeof(Int) == 4
-    function widemul(u::Int64, v::Int64)
-        local u0::UInt64, v0::UInt64, w0::UInt64
-        local u1::Int64, v1::Int64, w1::UInt64, w2::Int64, t::UInt64
-
-        u0 = u & 0xffffffff; u1 = u >> 32
-        v0 = v & 0xffffffff; v1 = v >> 32
-        w0 = u0 * v0
-        t = reinterpret(UInt64, u1) * v0 + (w0 >>> 32)
-        w2 = reinterpret(Int64, t) >> 32
-        w1 = u0 * reinterpret(UInt64, v1) + (t & 0xffffffff)
-        hi = u1 * v1 + w2 + (reinterpret(Int64, w1) >> 32)
-        lo = w0 & 0xffffffff + (w1 << 32)
-        return Int128(hi) << 64 + Int128(lo)
-    end
-
-    function widemul(u::UInt64, v::UInt64)
-        local u0::UInt64, v0::UInt64, w0::UInt64
-        local u1::UInt64, v1::UInt64, w1::UInt64, w2::UInt64, t::UInt64
-
-        u0 = u & 0xffffffff; u1 = u >>> 32
-        v0 = v & 0xffffffff; v1 = v >>> 32
-        w0 = u0 * v0
-        t = u1 * v0 + (w0 >>> 32)
-        w2 = t >>> 32
-        w1 = u0 * v1 + (t & 0xffffffff)
-        hi = u1 * v1 + w2 + (w1 >>> 32)
-        lo = w0 & 0xffffffff + (w1 << 32)
-        return UInt128(hi) << 64 + UInt128(lo)
-    end
-
-    function *(u::Int128, v::Int128)
-        u0 = u % UInt64; u1 = Int64(u >> 64)
-        v0 = v % UInt64; v1 = Int64(v >> 64)
-        lolo = widemul(u0, v0)
-        lohi = widemul(reinterpret(Int64, u0), v1)
-        hilo = widemul(u1, reinterpret(Int64, v0))
-        t = reinterpret(UInt128, hilo) + (lolo >>> 64)
-        w1 = reinterpret(UInt128, lohi) + (t & 0xffffffffffffffff)
-        return Int128(lolo & 0xffffffffffffffff) + reinterpret(Int128, w1) << 64
-    end
-
-    function *(u::UInt128, v::UInt128)
-        u0 = u % UInt64; u1 = UInt64(u>>>64)
-        v0 = v % UInt64; v1 = UInt64(v>>>64)
-        lolo = widemul(u0, v0)
-        lohi = widemul(u0, v1)
-        hilo = widemul(u1, v0)
-        t = hilo + (lolo >>> 64)
-        w1 = lohi + (t & 0xffffffffffffffff)
-        return (lolo & 0xffffffffffffffff) + UInt128(w1) << 64
-    end
-
-    function _setbit(x::UInt128, i)
-        # faster version of `return x | (UInt128(1) << i)`
-        j = i >> 5
-        y = UInt128(one(UInt32) << (i & 0x1f))
-        if j == 0
-            return x | y
-        elseif j == 1
-            return x | (y << 32)
-        elseif j == 2
-            return x | (y << 64)
-        elseif j == 3
-            return x | (y << 96)
-        end
-        return x
-    end
+# Int128 multiply and divide
+*(x::T, y::T) where {T<:Union{Int128,UInt128}}  = mul_int(x, y)
 
-    function divrem(x::UInt128, y::UInt128)
-        iszero(y) && throw(DivideError())
-        if (x >> 64) % UInt64 == 0
-            if (y >> 64) % UInt64 == 0
-                # fast path: upper 64 bits are zero, so we can fallback to UInt64 division
-                q64, x64 = divrem(x % UInt64, y % UInt64)
-                return UInt128(q64), UInt128(x64)
-            else
-                # this implies y>x, so
-                return zero(UInt128), x
-            end
-        end
-        n = leading_zeros(y) - leading_zeros(x)
-        q = zero(UInt128)
-        ys = y << n
-        while n >= 0
-            # ys == y * 2^n
-            if ys <= x
-                x -= ys
-                q = _setbit(q, n)
-                if (x >> 64) % UInt64 == 0
-                    # exit early, similar to above fast path
-                    if (y >> 64) % UInt64 == 0
-                        q64, x64 = divrem(x % UInt64, y % UInt64)
-                        q |= q64
-                        x = UInt128(x64)
-                    end
-                    return q, x
-                end
-            end
-            ys >>>= 1
-            n -= 1
-        end
-        return q, x
-    end
+div(x::Int128,  y::Int128)  = checked_sdiv_int(x, y)
+div(x::UInt128, y::UInt128) = checked_udiv_int(x, y)
 
-    function div(x::Int128, y::Int128)
-        (x == typemin(Int128)) & (y == -1) && throw(DivideError())
-        return Int128(div(BigInt(x), BigInt(y)))::Int128
-    end
-    div(x::UInt128, y::UInt128) = divrem(x, y)[1]
-
-    function rem(x::Int128, y::Int128)
-        return Int128(rem(BigInt(x), BigInt(y)))::Int128
-    end
-
-    function rem(x::UInt128, y::UInt128)
-        iszero(y) && throw(DivideError())
-        if (x >> 64) % UInt64 == 0
-            if (y >> 64) % UInt64 == 0
-                # fast path: upper 64 bits are zero, so we can fallback to UInt64 division
-                return UInt128(rem(x % UInt64, y % UInt64))
-            else
-                # this implies y>x, so
-                return x
-            end
-        end
-        n = leading_zeros(y) - leading_zeros(x)
-        ys = y << n
-        while n >= 0
-            # ys == y * 2^n
-            if ys <= x
-                x -= ys
-                if (x >> 64) % UInt64 == 0
-                    # exit early, similar to above fast path
-                    if (y >> 64) % UInt64 == 0
-                        x = UInt128(rem(x % UInt64, y % UInt64))
-                    end
-                    return x
-                end
-            end
-            ys >>>= 1
-            n -= 1
-        end
-        return x
-    end
-
-    function mod(x::Int128, y::Int128)
-        return Int128(mod(BigInt(x), BigInt(y)))::Int128
-    end
-else
-    *(x::T, y::T) where {T<:Union{Int128,UInt128}}  = mul_int(x, y)
-
-    div(x::Int128,  y::Int128)  = checked_sdiv_int(x, y)
-    div(x::UInt128, y::UInt128) = checked_udiv_int(x, y)
-
-    rem(x::Int128,  y::Int128)  = checked_srem_int(x, y)
-    rem(x::UInt128, y::UInt128) = checked_urem_int(x, y)
-end
+rem(x::Int128,  y::Int128)  = checked_srem_int(x, y)
+rem(x::UInt128, y::UInt128) = checked_urem_int(x, y)
 
 # issue #15489: since integer ops are unchecked, they shouldn't check promotion
 for op in (:+, :-, :*, :&, :|, :xor)
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 916cd28a32c6f..8d46fcffa3ad5 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -198,6 +198,7 @@ julia> gcdx(240, 46)
 """
 Base.@assume_effects :terminates_locally function gcdx(a::Integer, b::Integer)
     T = promote_type(typeof(a), typeof(b))
+    a == b == 0 && return (zero(T), zero(T), zero(T))
     # a0, b0 = a, b
     s0, s1 = oneunit(T), zero(T)
     t0, t1 = s1, s0
@@ -262,14 +263,16 @@ end
     invmod(n::T) where {T <: Base.BitInteger}
 
 Compute the modular inverse of `n` in the integer ring of type `T`, i.e. modulo
-`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words these
+`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words, these
 methods satisfy the following identities:
 ```
 n * invmod(n) == 1
 (n * invmod(n, T)) % T == 1
 (n % T) * invmod(n, T) == 1
 ```
-Note that `*` here is modular multiplication in the integer ring, `T`.
+Note that `*` here is modular multiplication in the integer ring, `T`.  This will
+throw an error if `n` is even, because then it is not relatively prime with `2^N`
+and thus has no such inverse.
 
 Specifying the modulus implied by an integer type as an explicit value is often
 inconvenient since the modulus is by definition too big to be represented by the
@@ -302,12 +305,12 @@ to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
 @noinline throw_domerr_powbysq(::Integer, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer x to a negative power ", p, ".",
     "\nMake x or ", p, " a float by adding a zero decimal ",
-    "(e.g., 2.0^", p, " or 2^", float(p), " instead of 2^", p, ")",
+    "(e.g., 2.0^", p, " or 2^", float(p), " instead of 2^", p, ") ",
     "or write 1/x^", -p, ", float(x)^", p, ", x^float(", p, ") or (x//1)^", p, ".")))
 @noinline throw_domerr_powbysq(::AbstractMatrix, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer matrix x to a negative power ", p, ".",
     "\nMake x a float matrix by adding a zero decimal ",
-    "(e.g., [2.0 1.0;1.0 0.0]^", p, " instead of [2 1;1 0]^", p, ")",
+    "(e.g., [2.0 1.0;1.0 0.0]^", p, " instead of [2 1;1 0]^", p, ") ",
     "or write float(x)^", p, " or Rational.(x)^", p, ".")))
 # The * keyword supports `*=checked_mul` for `checked_pow`
 @assume_effects :terminates_locally function power_by_squaring(x_, p::Integer; mul=*)
@@ -742,7 +745,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba
 function bin(x::Unsigned, pad::Int, neg::Bool)
     m = top_set_bit(x)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
     #     @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1)
     # end
@@ -769,7 +772,7 @@ end
 function oct(x::Unsigned, pad::Int, neg::Bool)
     m = div(top_set_bit(x) + 2, 3)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     while i > neg
         @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7)
@@ -844,7 +847,7 @@ end
 
 function dec(x::Unsigned, pad::Int, neg::Bool)
     n = neg + ndigits(x, pad=pad)
-    a = StringVector(n)
+    a = StringMemory(n)
     append_c_digits_fast(n, x, a, 1)
     neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
@@ -853,7 +856,7 @@ end
 function hex(x::Unsigned, pad::Int, neg::Bool)
     m = 2 * sizeof(x) - (leading_zeros(x) >> 2)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     while i >= 2
         b = (x % UInt8)::UInt8
@@ -880,7 +883,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
     b = (base % Int)::Int
     digits = abs(b) <= 36 ? base36digits : base62digits
     n = neg + ndigits(x, base=b, pad=pad)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     @inbounds while i > neg
         if b > 0
@@ -940,7 +943,8 @@ string(b::Bool) = b ? "true" : "false"
 """
     bitstring(n)
 
-A string giving the literal bit representation of a primitive type.
+A string giving the literal bit representation of a primitive type
+(in bigendian order, i.e. most-significant bit first).
 
 See also [`count_ones`](@ref), [`count_zeros`](@ref), [`digits`](@ref).
 
@@ -954,9 +958,9 @@ julia> bitstring(2.2)
 ```
 """
 function bitstring(x::T) where {T}
-    isprimitivetype(T) || throw(ArgumentError("$T not a primitive type"))
+    isprimitivetype(T) || throw(ArgumentError(LazyString(T, " not a primitive type")))
     sz = sizeof(T) * 8
-    str = StringVector(sz)
+    str = StringMemory(sz)
     i = sz
     @inbounds while i >= 4
         b = UInt32(sizeof(T) == 1 ? bitcast(UInt8, x) : trunc_int(UInt8, x))
@@ -976,7 +980,7 @@ end
 
 Return an array with element type `T` (default `Int`) of the digits of `n` in the given
 base, optionally padded with zeros to a specified size. More significant digits are at
-higher indices, such that `n == sum(digits[k]*base^(k-1) for k=1:length(digits))`.
+higher indices, such that `n == sum(digits[k]*base^(k-1) for k in 1:length(digits))`.
 
 See also [`ndigits`](@ref), [`digits!`](@ref),
 and for base 2 also [`bitstring`](@ref), [`count_ones`](@ref).
@@ -1054,7 +1058,7 @@ julia> digits!([2, 2, 2, 2, 2, 2], 10, base = 2)
 function digits!(a::AbstractVector{T}, n::Integer; base::Integer = 10) where T<:Integer
     2 <= abs(base) || throw(DomainError(base, "base must be ≥ 2 or ≤ -2"))
     hastypemax(T) && abs(base) - 1 > typemax(T) &&
-        throw(ArgumentError("type $T too small for base $base"))
+        throw(ArgumentError(LazyString("type ", T, " too small for base ", base)))
     isempty(a) && return a
 
     if base > 0
@@ -1203,6 +1207,7 @@ Base.@assume_effects :terminates_locally function binomial(n::T, k::T) where T<:
     end
     copysign(x, sgn)
 end
+binomial(n::Integer, k::Integer) = binomial(promote(n, k)...)
 
 """
     binomial(x::Number, k::Integer)
@@ -1234,3 +1239,102 @@ function binomial(x::Number, k::Integer)
     # and instead divide each term by i, to avoid spurious overflow.
     return prod(i -> (x-(i-1))/i, OneTo(k), init=oneunit(x)/one(k))
 end
+
+"""
+    clamp(x, lo, hi)
+
+Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
+are promoted to a common type.
+
+See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` as the first argument requires at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
+3-element Vector{BigFloat}:
+ 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
+ 2.0
+ 9.0
+
+julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
+3-element Vector{Int64}:
+  6
+  6
+ 10
+```
+"""
+function clamp(x::X, lo::L, hi::H) where {X,L,H}
+    T = promote_type(X, L, H)
+    return (x > hi) ? convert(T, hi) : (x < lo) ? convert(T, lo) : convert(T, x)
+end
+
+"""
+    clamp(x, T)::T
+
+Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
+
+See also [`trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> clamp(200, Int8)
+127
+
+julia> clamp(-200, Int8)
+-128
+
+julia> trunc(Int, 4pi^2)
+39
+```
+"""
+function clamp(x, ::Type{T}) where {T<:Integer}
+    # delegating to clamp(x, typemin(T), typemax(T)) would promote types
+    # this way, we avoid unnecessary conversions
+    # think of, e.g., clamp(big(2) ^ 200, Int16)
+    lo = typemin(T)
+    hi = typemax(T)
+    return (x > hi) ? hi : (x < lo) ? lo : convert(T, x)
+end
+
+
+"""
+    clamp!(array::AbstractArray, lo, hi)
+
+Restrict values in `array` to the specified range, in-place.
+See also [`clamp`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` entries in `array` require at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> row = collect(-4:4)';
+
+julia> clamp!(row, 0, Inf)
+1×9 adjoint(::Vector{Int64}) with eltype Int64:
+ 0  0  0  0  0  1  2  3  4
+
+julia> clamp.((-4:4)', 0, Inf)
+1×9 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
+```
+"""
+function clamp!(x::AbstractArray, lo, hi)
+    @inbounds for i in eachindex(x)
+        x[i] = clamp(x[i], lo, hi)
+    end
+    x
+end
+
+"""
+    clamp(x::Integer, r::AbstractUnitRange)
+
+Clamp `x` to lie within range `r`.
+
+!!! compat "Julia 1.6"
+     This method requires at least Julia 1.6.
+"""
+clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
diff --git a/base/io.jl b/base/io.jl
index 7d93bc3e6d9c8..83a215d6359fc 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -131,6 +131,8 @@ data has already been buffered. The result is a `Vector{UInt8}`.
 """
 function readavailable end
 
+function isexecutable end
+
 """
     isreadable(io) -> Bool
 
@@ -245,7 +247,7 @@ The endianness of the written value depends on the endianness of the host system
 Convert to/from a fixed endianness when writing/reading (e.g. using  [`htol`](@ref) and
 [`ltoh`](@ref)) to get results that are consistent across platforms.
 
-You can write multiple values with the same `write` call. i.e. the following are equivalent:
+You can write multiple values with the same `write` call, i.e. the following are equivalent:
 
     write(io, x, y...)
     write(io, x) + write(io, y...)
@@ -541,8 +543,8 @@ julia> rm("my_file.txt")
 ```
 """
 readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String)
-readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...))
-readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)))
+readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))
+readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)))
 readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep)
 
 
@@ -615,7 +617,7 @@ Logan
 readline(filename::AbstractString; keep::Bool=false) =
     open(io -> readline(io; keep), filename)
 readline(s::IO=stdin; keep::Bool=false) =
-    String(_unsafe_take!(copyline(IOBuffer(sizehint=70), s; keep)))
+    String(_unsafe_take!(copyline(IOBuffer(sizehint=16), s; keep)))
 
 """
     copyline(out::IO, io::IO=stdin; keep::Bool=false)
@@ -802,7 +804,7 @@ unsafe_write(s::IO, p::Ptr, n::Integer) = unsafe_write(s, convert(Ptr{UInt8}, p)
 function write(s::IO, x::Ref{T}) where {T}
     x isa Ptr && error("write cannot copy from a Ptr")
     if isbitstype(T)
-        unsafe_write(s, x, Core.sizeof(T))
+        Int(unsafe_write(s, x, Core.sizeof(T)))
     else
         write(s, x[])
     end
@@ -1109,7 +1111,7 @@ function copyuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false)
 end
 
 function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
-    out = (T === UInt8 ? resize!(StringVector(70), 0) : Vector{T}())
+    out = (T === UInt8 ? resize!(StringVector(16), 0) : Vector{T}())
     readuntil_vector!(io, target, keep, out)
     return out
 end
@@ -1424,7 +1426,7 @@ previously marked position. Throw an error if the stream is not marked.
 See also [`mark`](@ref), [`unmark`](@ref), [`ismarked`](@ref).
 """
 function reset(io::T) where T<:IO
-    ismarked(io) || throw(ArgumentError("$T not marked"))
+    ismarked(io) || throw(ArgumentError(LazyString(T, " not marked")))
     m = io.mark
     seek(io, m)
     io.mark = -1 # must be after seek, or seek may fail
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index dadb13e1f1e6a..c0c2731eec08b 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -219,7 +219,7 @@ function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T
     if offs+nel-1 > length(a) || offs < 1 || nel < 0
         throw(BoundsError())
     end
-    if isbitstype(T) && isa(a,Array)
+    if isa(a, MutableDenseArrayType{UInt8})
         nb = UInt(nel * sizeof(T))
         GC.@preserve a unsafe_read(from, pointer(a, offs), nb)
     else
@@ -260,22 +260,32 @@ bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1
 position(io::GenericIOBuffer) = io.ptr - io.offset - 1
 
 function skip(io::GenericIOBuffer, n::Integer)
-    seekto = io.ptr + n
-    n < 0 && return seek(io, seekto-1) # Does error checking
-    io.ptr = min(seekto, io.size+1)
-    return io
+    skip(io, clamp(n, Int))
+end
+function skip(io::GenericIOBuffer, n::Int)
+    if signbit(n)
+        seekto = clamp(widen(position(io)) + widen(n), Int)
+        seek(io, seekto) # Does error checking
+    else
+        n_max = io.size + 1 - io.ptr
+        io.ptr += min(n, n_max)
+        io
+    end
 end
 
 function seek(io::GenericIOBuffer, n::Integer)
+    seek(io, clamp(n, Int))
+end
+function seek(io::GenericIOBuffer, n::Int)
     if !io.seekable
         ismarked(io) || throw(ArgumentError("seek failed, IOBuffer is not seekable and is not marked"))
         n == io.mark || throw(ArgumentError("seek failed, IOBuffer is not seekable and n != mark"))
     end
     # TODO: REPL.jl relies on the fact that this does not throw (by seeking past the beginning or end
     #       of an GenericIOBuffer), so that would need to be fixed in order to throw an error here
-    #(n < 0 || n > io.size) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
-    #io.ptr = n+1
-    io.ptr = min(max(0, n)+io.offset, io.size)+1
+    #(n < 0 || n > io.size - io.offset) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
+    #io.ptr = n + io.offset + 1
+    io.ptr = clamp(n, 0, io.size - io.offset) + io.offset + 1
     return io
 end
 
@@ -456,16 +466,16 @@ function take!(io::IOBuffer)
         if nbytes == 0 || io.reinit
             data = StringVector(0)
         elseif io.writable
-            data = wrap(Array, MemoryRef(io.data, io.offset + 1), nbytes)
+            data = wrap(Array, memoryref(io.data, io.offset + 1), nbytes)
         else
-            data = copyto!(StringVector(io.size), 1, io.data, io.offset + 1, nbytes)
+            data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
         end
     else
         nbytes = bytesavailable(io)
         if nbytes == 0
             data = StringVector(0)
         elseif io.writable
-            data = wrap(Array, MemoryRef(io.data, io.ptr), nbytes)
+            data = wrap(Array, memoryref(io.data, io.ptr), nbytes)
         else
             data = read!(io, data)
         end
@@ -493,8 +503,8 @@ Array allocation), as well as omits some checks.
 """
 _unsafe_take!(io::IOBuffer) =
     wrap(Array, io.size == io.offset ?
-        MemoryRef(Memory{UInt8}()) :
-        MemoryRef(io.data, io.offset + 1),
+        memoryref(Memory{UInt8}()) :
+        memoryref(io.data, io.offset + 1),
         io.size - io.offset)
 
 function write(to::IO, from::GenericIOBuffer)
@@ -542,8 +552,8 @@ end
     return sizeof(UInt8)
 end
 
-readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
-function readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb::Int)
+readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
+function readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb::Int)
     nr = min(nb, bytesavailable(io))
     if length(b) < nr
         resize!(b, nr)
diff --git a/base/iostream.jl b/base/iostream.jl
index 5d972945e00e0..762f881cfbecb 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -47,12 +47,18 @@ macro _lock_ios(s, expr)
 end
 
 """
-    fd(stream)
+    fd(stream) -> RawFD
 
 Return the file descriptor backing the stream or file. Note that this function only applies
 to synchronous `File`'s and `IOStream`'s not to any of the asynchronous streams.
+
+`RawFD` objects can be passed directly to other languages via the `ccall` interface.
+
+!!! compat "Julia 1.12"
+    Prior to 1.12, this function returned an `Int` instead of a `RawFD`. You may use
+    `RawFD(fd(x))` to produce a `RawFD` in all Julia versions.
 """
-fd(s::IOStream) = Int(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
+fd(s::IOStream) = RawFD(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
 
 stat(s::IOStream) = stat(fd(s))
 
@@ -292,12 +298,15 @@ function open(fname::String; lock = true,
     if !lock
         s._dolock = false
     end
-    systemerror("opening file $(repr(fname))",
-                ccall(:ios_file, Ptr{Cvoid},
-                      (Ptr{UInt8}, Cstring, Cint, Cint, Cint, Cint),
-                      s.ios, fname, flags.read, flags.write, flags.create, flags.truncate) == C_NULL)
+    if ccall(:ios_file, Ptr{Cvoid},
+             (Ptr{UInt8}, Cstring, Cint, Cint, Cint, Cint),
+             s.ios, fname, flags.read, flags.write, flags.create, flags.truncate) == C_NULL
+        systemerror("opening file $(repr(fname))")
+    end
     if flags.append
-        systemerror("seeking to end of file $fname", ccall(:ios_seek_end, Int64, (Ptr{Cvoid},), s.ios) != 0)
+        if ccall(:ios_seek_end, Int64, (Ptr{Cvoid},), s.ios) != 0
+            systemerror("seeking to end of file $fname")
+        end
     end
     return s
 end
@@ -446,8 +455,8 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep)
 end
 readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) =
-    delim ≤ '\x7f' ? readuntil_string(s, delim % UInt8, keep) :
-    String(unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)))
+    isascii(delim) ? readuntil_string(s, delim % UInt8, keep) :
+    String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)))
 
 function readline(s::IOStream; keep::Bool=false)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2)
@@ -483,9 +492,7 @@ function copyuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false)
     return out
 end
 
-function readbytes_all!(s::IOStream,
-                        b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                        nb::Integer)
+function readbytes_all!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb::Integer)
     olb = lb = length(b)
     nr = 0
     let l = s._dolock, slock = s.lock
@@ -513,9 +520,7 @@ function readbytes_all!(s::IOStream,
     return nr
 end
 
-function readbytes_some!(s::IOStream,
-                         b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                         nb::Integer)
+function readbytes_some!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb::Integer)
     olb = length(b)
     if nb > olb
         resize!(b, nb)
@@ -544,10 +549,7 @@ requested bytes, until an error or end-of-file occurs. If `all` is `false`, at m
 `read` call is performed, and the amount of data returned is device-dependent. Note that not
 all stream types support the `all` option.
 """
-function readbytes!(s::IOStream,
-                    b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                    nb=length(b);
-                    all::Bool=true)
+function readbytes!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb=length(b); all::Bool=true)
     return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb)
 end
 
diff --git a/base/iterators.jl b/base/iterators.jl
index a0b3a6cd4672d..8bd30991319b6 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -13,9 +13,9 @@ using .Base:
     SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
     @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, IdDict,
     AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom,
-    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing,
+    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, =>, missing,
     any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex,
-    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape
+    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape, LazyString
 using Core: @doc
 
 if Base !== Core.Compiler
@@ -36,6 +36,7 @@ import .Base:
     popfirst!, isdone, peek, intersect
 
 export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap
+public accumulate, filter, map, peel, reverse, Stateful
 
 if Base !== Core.Compiler
 export partition
@@ -61,9 +62,6 @@ julia> collect(Iterators.map(x -> x^2, 1:3))
 """
 map(f, arg, args...) = Base.Generator(f, arg, args...)
 
-tail_if_any(::Tuple{}) = ()
-tail_if_any(x::Tuple) = tail(x)
-
 _min_length(a, b, ::IsInfinite, ::IsInfinite) = min(length(a),length(b)) # inherit behaviour, error
 _min_length(a, b, A, ::IsInfinite) = length(a)
 _min_length(a, b, ::IsInfinite, B) = length(b)
@@ -1099,7 +1097,7 @@ _prod_size(t::Tuple) = (_prod_size1(t[1], IteratorSize(t[1]))..., _prod_size(tai
 _prod_size1(a, ::HasShape)  = size(a)
 _prod_size1(a, ::HasLength) = (length(a),)
 _prod_size1(a, A) =
-    throw(ArgumentError("Cannot compute size for object of type $(typeof(a))"))
+    throw(ArgumentError(LazyString("Cannot compute size for object of type ", typeof(a))))
 
 axes(P::ProductIterator) = _prod_indices(P.iterators)
 _prod_indices(::Tuple{}) = ()
@@ -1107,7 +1105,7 @@ _prod_indices(t::Tuple) = (_prod_axes1(t[1], IteratorSize(t[1]))..., _prod_indic
 _prod_axes1(a, ::HasShape)  = axes(a)
 _prod_axes1(a, ::HasLength) = (OneTo(length(a)),)
 _prod_axes1(a, A) =
-    throw(ArgumentError("Cannot compute indices for object of type $(typeof(a))"))
+    throw(ArgumentError(LazyString("Cannot compute indices for object of type ", typeof(a))))
 
 ndims(p::ProductIterator) = length(axes(p))
 length(P::ProductIterator) = reduce(checked_mul, size(P); init=1)
@@ -1575,4 +1573,39 @@ only(x::NamedTuple) = throw(
     ArgumentError("NamedTuple contains $(length(x)) elements, must contain exactly 1 element")
 )
 
+"""
+    IterableStatePairs(x)
+
+This internal type is returned by [`pairs`](@ref), when the key is the same as
+the state of `iterate`. This allows the iterator to determine the key => value
+pairs by only calling iterate on the values.
+
+"""
+struct IterableStatePairs{T}
+    x::T
+end
+
+IteratorSize(::Type{<:IterableStatePairs{T}}) where T = IteratorSize(T)
+length(x::IterableStatePairs) = length(x.x)
+Base.eltype(::Type{IterableStatePairs{T}}) where T = Pair{<:Any, eltype(T)}
+
+function iterate(x::IterableStatePairs, state=first(keys(x.x)))
+    it = iterate(x.x, state)
+    it === nothing && return nothing
+    (state => first(it), last(it))
+end
+
+reverse(x::IterableStatePairs) = IterableStatePairs(Iterators.reverse(x.x))
+reverse(x::IterableStatePairs{<:Iterators.Reverse}) = IterableStatePairs(x.x.itr)
+
+function iterate(x::IterableStatePairs{<:Iterators.Reverse}, state=last(keys(x.x.itr)))
+    it = iterate(x.x, state)
+    it === nothing && return nothing
+    (state => first(it), last(it))
+end
+
+# According to the docs of iterate(::AbstractString), the iteration state must
+# be the same as the keys, so this is a valid optimization (see #51631)
+pairs(s::AbstractString) = IterableStatePairs(s)
+
 end
diff --git a/base/libc.jl b/base/libc.jl
index c4a93eae6f3a3..21f9554f7e6db 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -17,7 +17,7 @@ if Sys.iswindows()
     export GetLastError, FormatMessage
 end
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
+include(string(Base.BUILDROOT, "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
 
 ## RawFD ##
 
diff --git a/base/libuv.jl b/base/libuv.jl
index 66dfcfb3414ad..143201598fde0 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -2,7 +2,7 @@
 
 # Core definitions for interacting with the libuv library from Julia
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
+include(string(Base.BUILDROOT, "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
 
 # convert UV handle data to julia object, checking for null
 function uv_sizeof_handle(handle)
@@ -134,17 +134,17 @@ function uv_asynccb end
 function uv_timercb end
 
 function reinit_stdio()
-    global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))
-    global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))
-    global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))
+    global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))::IO
+    global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))::IO
+    global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))::IO
     opts = JLOptions()
-    if opts.color != 0
-        have_color = (opts.color == 1)
+    color = colored_text(opts)
+    if !isnothing(color)
         if !isa(stdout, TTY)
-            global stdout = IOContext(stdout, :color => have_color)
+            global stdout = IOContext(stdout, :color => color::Bool)
         end
         if !isa(stderr, TTY)
-            global stderr = IOContext(stderr, :color => have_color)
+            global stderr = IOContext(stderr, :color => color::Bool)
         end
     end
     nothing
diff --git a/base/linking.jl b/base/linking.jl
index 2d68ea730c0fb..953d80c82cc42 100644
--- a/base/linking.jl
+++ b/base/linking.jl
@@ -79,7 +79,7 @@ end
 const VERBOSE = Ref{Bool}(false)
 
 function __init__()
-    VERBOSE[] = Base.get_bool_env("JULIA_VERBOSE_LINKING", false)
+    VERBOSE[] = something(Base.get_bool_env("JULIA_VERBOSE_LINKING", false), false)
 
     __init_lld_path()
     __init_dsymutil_path()
@@ -110,7 +110,7 @@ function ld()
         # LLD supports mingw style linking
         flavor = "gnu"
         m = Sys.ARCH == :x86_64 ? "i386pep" : "i386pe"
-        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition`
+        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition --disable-auto-import --disable-runtime-pseudo-reloc`
     elseif Sys.isapple()
         flavor = "darwin"
         arch = Sys.ARCH == :aarch64 ? :arm64 : Sys.ARCH
diff --git a/base/loading.jl b/base/loading.jl
index e055eb3003c9e..8d180845f942f 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -264,11 +264,14 @@ const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
 LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set(), Dict(), Dict(), Dict())
 
 
-struct TOMLCache
-    p::TOML.Parser
+struct TOMLCache{Dates}
+    p::TOML.Parser{Dates}
     d::Dict{String, CachedTOMLDict}
 end
-const TOML_CACHE = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
+TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}())
+TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d))
+
+const TOML_CACHE = TOMLCache(TOML.Parser{nothing}())
 
 parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, require_lock)
 function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
@@ -426,7 +429,6 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
                 @goto done
             end
             if path !== nothing
-                path = entry_path(path, pkg.name)
                 env′ = env
                 @goto done
             end
@@ -438,12 +440,15 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
         # e.g. if they have been explicitly added to the project/manifest
         mbypath = manifest_uuid_path(Sys.STDLIB, pkg)
         if mbypath isa String
-            path = entry_path(mbypath, pkg.name)
+            path = mbypath
             env′ = Sys.STDLIB
             @goto done
         end
     end
     @label done
+    if path !== nothing && !isfile_casesensitive(path)
+        path = nothing
+    end
     if cache !== nothing
         cache.located[(pkg, stopenv)] = path === nothing ? nothing : (path, something(env′))
     end
@@ -503,6 +508,8 @@ package root.
 To get the root directory of the package that implements the current module
 the form `pkgdir(@__MODULE__)` can be used.
 
+If an extension module is given, the root of the parent package is returned.
+
 ```julia-repl
 julia> pkgdir(Foo)
 "/path/to/Foo.jl"
@@ -520,7 +527,19 @@ function pkgdir(m::Module, paths::String...)
     rootmodule = moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
-    return joinpath(dirname(dirname(path)), paths...)
+    original = path
+    path, base = splitdir(dirname(path))
+    if base == "src"
+        # package source in `../src/Foo.jl`
+    elseif base == "ext"
+        # extension source in `../ext/FooExt.jl`
+    elseif basename(path) == "ext"
+        # extension source in `../ext/FooExt/FooExt.jl`
+        path = dirname(path)
+    else
+        error("Unexpected path structure for module source: $original")
+    end
+    return joinpath(path, paths...)
 end
 
 function get_pkgversion_from_path(path)
@@ -611,6 +630,23 @@ function env_project_file(env::String)::Union{Bool,String}
     end
 end
 
+function base_project(project_file)
+    base_dir = abspath(joinpath(dirname(project_file), ".."))
+    base_project_file = env_project_file(base_dir)
+    base_project_file isa String || return nothing
+    d = parsed_toml(base_project_file)
+    workspace = get(d, "workspace", nothing)::Union{Dict{String, Any}, Nothing}
+    if workspace === nothing
+        return nothing
+    end
+    projects = get(workspace, "projects", nothing)::Union{Vector{String}, Nothing, String}
+    projects === nothing && return nothing
+    if projects isa Vector && basename(dirname(project_file)) in projects
+        return base_project_file
+    end
+    return nothing
+end
+
 function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
     project_file = env_project_file(env)
     if project_file isa String
@@ -622,21 +658,27 @@ function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
     return nothing
 end
 
+function package_get(project_file, where::PkgId, name::String)
+    proj = project_file_name_uuid(project_file, where.name)
+    if proj == where
+        # if `where` matches the project, use [deps] section as manifest, and stop searching
+        pkg_uuid = explicit_project_deps_get(project_file, name)
+        return PkgId(pkg_uuid, name)
+    end
+    return nothing
+end
+
 function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothing,PkgId}
     uuid = where.uuid
     @assert uuid !== nothing
     project_file = env_project_file(env)
     if project_file isa String
-        # first check if `where` names the Project itself
-        proj = project_file_name_uuid(project_file, where.name)
-        if proj == where
-            # if `where` matches the project, use [deps] section as manifest, and stop searching
-            pkg_uuid = explicit_project_deps_get(project_file, name)
-            return PkgId(pkg_uuid, name)
-        end
+        pkg = package_get(project_file, where, name)
+        pkg === nothing || return pkg
         d = parsed_toml(project_file)
         exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
         if exts !== nothing
+            proj = project_file_name_uuid(project_file, where.name)
             # Check if `where` is an extension of the project
             if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name)
                 # Extensions can load weak deps...
@@ -667,9 +709,9 @@ function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missi
         proj = project_file_name_uuid(project_file, pkg.name)
         if proj == pkg
             # if `pkg` matches the project, return the project itself
-            return project_file_path(project_file)
+            return project_file_path(project_file, pkg.name)
         end
-        mby_ext = project_file_ext_path(project_file, pkg.name)
+        mby_ext = project_file_ext_path(project_file, pkg)
         mby_ext === nothing || return mby_ext
         # look for manifest file and `where` stanza
         return explicit_manifest_uuid_path(project_file, pkg)
@@ -684,7 +726,7 @@ function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missi
             if parent_project_file !== nothing
                 parentproj = project_file_name_uuid(parent_project_file, parentid.name)
                 if parentproj == parentid
-                    mby_ext = project_file_ext_path(parent_project_file, pkg.name)
+                    mby_ext = project_file_ext_path(parent_project_file, pkg)
                     mby_ext === nothing || return mby_ext
                 end
             end
@@ -700,13 +742,13 @@ function find_ext_path(project_path::String, extname::String)
     return joinpath(project_path, "ext", extname * ".jl")
 end
 
-function project_file_ext_path(project_file::String, name::String)
+function project_file_ext_path(project_file::String, ext::PkgId)
     d = parsed_toml(project_file)
-    p = project_file_path(project_file)
+    p = dirname(project_file)
     exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
     if exts !== nothing
-        if name in keys(exts)
-            return find_ext_path(p, name)
+        if ext.name in keys(exts) && ext.uuid == uuid5(UUID(d["uuid"]::String), ext.name)
+            return find_ext_path(p, ext.name)
         end
     end
     return nothing
@@ -721,9 +763,22 @@ function project_file_name_uuid(project_file::String, name::String)::PkgId
     return PkgId(uuid, name)
 end
 
-function project_file_path(project_file::String)
+function project_file_path(project_file::String, name::String)
     d = parsed_toml(project_file)
-    joinpath(dirname(project_file), get(d, "path", "")::String)
+    entryfile = get(d, "path", nothing)::Union{String, Nothing}
+    # "path" entry in project file is soft deprecated
+    if entryfile === nothing
+        entryfile = get(d, "entryfile", nothing)::Union{String, Nothing}
+    end
+    return entry_path(dirname(project_file), name, entryfile)
+end
+
+function workspace_manifest(project_file)
+    base = base_project(project_file)
+    if base !== nothing
+        return project_file_manifest_path(base)
+    end
+    return nothing
 end
 
 # find project file's corresponding manifest file
@@ -736,6 +791,10 @@ function project_file_manifest_path(project_file::String)::Union{Nothing,String}
     end
     dir = abspath(dirname(project_file))
     d = parsed_toml(project_file)
+    base_manifest = workspace_manifest(project_file)
+    if base_manifest !== nothing
+        return base_manifest
+    end
     explicit_manifest = get(d, "manifest", nothing)::Union{String, Nothing}
     manifest_path = nothing
     if explicit_manifest !== nothing
@@ -776,14 +835,15 @@ end
 # given a project directory (implicit env from LOAD_PATH) and a name,
 # find an entry point for `name`, and see if it has an associated project file
 function entry_point_and_project_file(dir::String, name::String)::Union{Tuple{Nothing,Nothing},Tuple{String,Nothing},Tuple{String,String}}
-    path = normpath(joinpath(dir, "$name.jl"))
-    isfile_casesensitive(path) && return path, nothing
     dir_name = joinpath(dir, name)
     path, project_file = entry_point_and_project_file_inside(dir_name, name)
     path === nothing || return path, project_file
     dir_jl = dir_name * ".jl"
     path, project_file = entry_point_and_project_file_inside(dir_jl, name)
     path === nothing || return path, project_file
+    # check for less likely case with a bare file and no src directory last to minimize stat calls
+    path = normpath(joinpath(dir, "$name.jl"))
+    isfile_casesensitive(path) && return path, nothing
     return nothing, nothing
 end
 
@@ -792,9 +852,7 @@ function implicit_env_project_file_extension(dir::String, ext::PkgId)
     for pkg in readdir(dir; join=true)
         project_file = env_project_file(pkg)
         project_file isa String || continue
-        proj = project_file_name_uuid(project_file, "")
-        uuid5(proj.uuid, ext.name) == ext.uuid || continue
-        path = project_file_ext_path(project_file, ext.name)
+        path = project_file_ext_path(project_file, ext)
         if path !== nothing
             return path, project_file
         end
@@ -802,12 +860,11 @@ function implicit_env_project_file_extension(dir::String, ext::PkgId)
     return nothing, nothing
 end
 
-# given a path and a name, return the entry point
-function entry_path(path::String, name::String)::Union{Nothing,String}
+# given a path, name, and possibly an entryfile, return the entry point
+function entry_path(path::String, name::String, entryfile::Union{Nothing,String})::String
     isfile_casesensitive(path) && return normpath(path)
-    path = normpath(joinpath(path, "src", "$name.jl"))
-    isfile_casesensitive(path) && return path
-    return nothing # source not found
+    entrypoint = entryfile === nothing ? joinpath("src", "$name.jl") : entryfile
+    return normpath(joinpath(path, entrypoint))
 end
 
 ## explicit project & manifest API ##
@@ -981,15 +1038,16 @@ end
 
 function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::Dict{String,Any})
     path = get(entry, "path", nothing)::Union{Nothing, String}
+    entryfile = get(entry, "entryfile", nothing)::Union{Nothing, String}
     if path !== nothing
-        path = normpath(abspath(dirname(manifest_file), path))
+        path = entry_path(normpath(abspath(dirname(manifest_file), path)), pkg.name, entryfile)
         return path
     end
     hash = get(entry, "git-tree-sha1", nothing)::Union{Nothing, String}
     if hash === nothing
         mbypath = manifest_uuid_path(Sys.STDLIB, pkg)
-        if mbypath isa String
-            return entry_path(mbypath, pkg.name)
+        if mbypath isa String && isfile(mbypath)
+            return mbypath
         end
         return nothing
     end
@@ -999,7 +1057,7 @@ function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::
     for slug in (version_slug(uuid, hash), version_slug(uuid, hash, 4))
         for depot in DEPOT_PATH
             path = joinpath(depot, "packages", pkg.name, slug)
-            ispath(path) && return abspath(path)
+            ispath(path) && return entry_path(abspath(path), pkg.name, entryfile)
         end
     end
     # no depot contains the package, return missing to stop looking
@@ -1081,13 +1139,8 @@ function cache_file_entry(pkg::PkgId)
         uuid === nothing ? pkg.name : package_slug(uuid)
 end
 
-# for use during running the REPL precompilation subprocess script, given we don't
-# want it to pick up caches that already exist for other optimization levels
-const ignore_compiled_cache = PkgId[]
-
 function find_all_in_cache_path(pkg::PkgId, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     paths = String[]
-    pkg in ignore_compiled_cache && return paths
     entrypath, entryfile = cache_file_entry(pkg)
     for path in DEPOT_PATH
         path = joinpath(path, entrypath)
@@ -1150,7 +1203,7 @@ const TIMING_IMPORTS = Threads.Atomic{Int}(0)
 # these return either the array of modules loaded from the path / content given
 # or an Exception that describes why it couldn't be loaded
 # and it reconnects the Base.Docs.META
-function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing)
+function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing; register::Bool=true)
     if isnothing(ignore_native)
         if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
             ignore_native = false
@@ -1174,12 +1227,12 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
         t_comp_before = cumulative_compile_time_ns()
     end
 
-    for i in 1:length(depmods)
+    for i in eachindex(depmods)
         dep = depmods[i]
         dep isa Module && continue
         _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
-        @assert root_module_exists(depkey)
-        dep = root_module(depkey)
+        dep = loaded_precompiles[depkey => depbuild_id]
+        @assert PkgId(dep) == depkey && module_build_id(dep) === depbuild_id
         depmods[i] = dep
     end
 
@@ -1199,23 +1252,11 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
     for M in restored
         M = M::Module
         if parentmodule(M) === M && PkgId(M) == pkg
+            register && register_root_module(M)
             if timing_imports
-                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
+                elapsed_time = time_ns() - t_before
                 comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
-                print(lpad(elapsed, 9), " ms  ")
-                parentid = get(EXT_PRIMED, pkg, nothing)
-                if parentid !== nothing
-                    print(parentid.name, " → ")
-                end
-                print(pkg.name)
-                if comp_time > 0
-                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
-                end
-                if recomp_time > 0
-                    perc = Float64(100 * recomp_time / comp_time)
-                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
-                end
-                println()
+                print_time_imports_report(M, elapsed_time, comp_time, recomp_time)
             end
             return M
         end
@@ -1227,6 +1268,73 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
     end
 end
 
+# printing functions for @time_imports
+# note that the time inputs are UInt64 on all platforms. Give default values here so that we don't have
+# confusing UInt64 types in generate_precompile.jl
+function print_time_imports_report(
+        mod::Module,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    print(lpad(round(elapsed_time / 1e6, digits=1), 9), " ms  ")
+    ext_parent = extension_parent_name(mod)
+    if ext_parent !== nothing
+        print(ext_parent::String, " → ")
+    end
+    print(string(mod))
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * comp_time / (elapsed_time)), 2)
+        printstyled(" $perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        perc_show = perc < 1 ? "<1" : Ryu.writefixed(perc, 0)
+        printstyled(" ($perc_show% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+function print_time_imports_report_init(
+        mod::Module, i::Int=1,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    connector = i > 1 ? "├" : "┌"
+    printstyled("               $connector ", color = :light_black)
+    print("$(round(elapsed_time / 1e6, digits=1)) ms $mod.__init__() ")
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * (comp_time) / elapsed_time), 2)
+        printstyled("$perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+
+# if M is an extension, return the string name of the parent. Otherwise return nothing
+function extension_parent_name(M::Module)
+    rootmodule = moduleroot(M)
+    src_path = pathof(rootmodule)
+    src_path === nothing && return nothing
+    pkgdir_parts = splitpath(src_path)
+    ext_pos = findlast(==("ext"), pkgdir_parts)
+    if ext_pos !== nothing && ext_pos >= length(pkgdir_parts) - 2
+        parent_package_root = joinpath(pkgdir_parts[1:ext_pos-1]...)
+        parent_package_project_file = locate_project_file(parent_package_root)
+        if parent_package_project_file isa String
+            d = parsed_toml(parent_package_project_file)
+            name = get(d, "name", nothing)
+            if name !== nothing
+                return name
+            end
+        end
+    end
+    return nothing
+end
+
 function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
     # This function is also used by PkgCacheInspector.jl
     restored = sv[1]::Vector{Any}
@@ -1236,7 +1344,8 @@ function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
             push!(Base.Docs.modules, M)
         end
         if parentmodule(M) === M
-            register_root_module(M)
+            push!(loaded_modules_order, M)
+            loaded_precompiles[pkg => module_build_id(M)] = M
         end
     end
 
@@ -1262,31 +1371,18 @@ function run_module_init(mod::Module, i::Int=1)
     # `i` informs ordering for the `@time_imports` report formatting
     if TIMING_IMPORTS[] == 0
         ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
-    else
-        if isdefined(mod, :__init__)
-            connector = i > 1 ? "├" : "┌"
-            printstyled("               $connector ", color = :light_black)
-
-            elapsedtime = time_ns()
-            cumulative_compile_timing(true)
-            compile_elapsedtimes = cumulative_compile_time_ns()
+    elseif isdefined(mod, :__init__)
+        elapsed_time = time_ns()
+        cumulative_compile_timing(true)
+        compile_elapsedtimes = cumulative_compile_time_ns()
 
-            ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+        ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
 
-            elapsedtime = (time_ns() - elapsedtime) / 1e6
-            cumulative_compile_timing(false);
-            comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6
+        elapsed_time = time_ns() - elapsed_time
+        cumulative_compile_timing(false);
+        comp_time, recomp_time = cumulative_compile_time_ns() .- compile_elapsedtimes
 
-            print("$(round(elapsedtime, digits=1)) ms $mod.__init__() ")
-            if comp_time > 0
-                printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color())
-            end
-            if recomp_time > 0
-                perc = Float64(100 * recomp_time / comp_time)
-                printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color())
-            end
-            println()
-        end
+        print_time_imports_report_init(mod, i, elapsed_time, comp_time, recomp_time)
     end
 end
 
@@ -1346,13 +1442,12 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
         proj_pkg = project_file_name_uuid(implicit_project_file, pkg.name)
         if pkg == proj_pkg
             d_proj = parsed_toml(implicit_project_file)
-            weakdeps = get(d_proj, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
             extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
             extensions === nothing && return
-            weakdeps === nothing && return
-            if weakdeps isa Dict{String, Any}
-                return _insert_extension_triggers(pkg, extensions, weakdeps)
-            end
+            weakdeps = get(Dict{String, Any}, d_proj, "weakdeps")::Dict{String,Any}
+            deps = get(Dict{String, Any}, d_proj, "deps")::Dict{String,Any}
+            total_deps = merge(weakdeps, deps)
+            return _insert_extension_triggers(pkg, extensions, total_deps)
         end
 
         # Now look in manifest
@@ -1367,27 +1462,35 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
                 uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
                 uuid === nothing && continue
                 if UUID(uuid) == pkg.uuid
-                    weakdeps = get(entry, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
                     extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
                     extensions === nothing && return
-                    weakdeps === nothing && return
-                    if weakdeps isa Dict{String, Any}
-                        return _insert_extension_triggers(pkg, extensions, weakdeps)
+                    weakdeps = get(Dict{String, Any}, entry, "weakdeps")::Union{Vector{String}, Dict{String,Any}}
+                    deps = get(Dict{String, Any}, entry, "deps")::Union{Vector{String}, Dict{String,Any}}
+
+                    function expand_deps_list(deps′::Vector{String})
+                        deps′_expanded = Dict{String, Any}()
+                        for (dep_name, entries) in d
+                            dep_name in deps′ || continue
+                            entries::Vector{Any}
+                            if length(entries) != 1
+                                error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
+                            end
+                            entry = first(entries)::Dict{String, Any}
+                            uuid = entry["uuid"]::String
+                            deps′_expanded[dep_name] = uuid
+                        end
+                        return deps′_expanded
                     end
 
-                    d_weakdeps = Dict{String, Any}()
-                    for (dep_name, entries) in d
-                        dep_name in weakdeps || continue
-                        entries::Vector{Any}
-                        if length(entries) != 1
-                            error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
-                        end
-                        entry = first(entries)::Dict{String, Any}
-                        uuid = entry["uuid"]::String
-                        d_weakdeps[dep_name] = uuid
+                    if weakdeps isa Vector{String}
+                        weakdeps = expand_deps_list(weakdeps)
                     end
-                    @assert length(d_weakdeps) == length(weakdeps)
-                    return _insert_extension_triggers(pkg, extensions, d_weakdeps)
+                    if deps isa Vector{String}
+                        deps = expand_deps_list(deps)
+                    end
+
+                    total_deps = merge(weakdeps, deps)
+                    return _insert_extension_triggers(pkg, extensions, total_deps)
                 end
             end
         end
@@ -1395,12 +1498,12 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
     return nothing
 end
 
-function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, weakdeps::Dict{String, Any})
+function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, totaldeps::Dict{String, Any})
     for (ext, triggers) in extensions
         triggers = triggers::Union{String, Vector{String}}
         triggers isa String && (triggers = [triggers])
         id = PkgId(uuid5(parent.uuid::UUID, ext), ext)
-        if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id)
+        if haskey(EXT_PRIMED, id) || haskey(Base.loaded_modules, id)
             continue  # extension is already primed or loaded, don't add it again
         end
         EXT_PRIMED[id] = parent
@@ -1409,7 +1512,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}
         push!(trigger1, gid)
         for trigger in triggers
             # TODO: Better error message if this lookup fails?
-            uuid_trigger = UUID(weakdeps[trigger]::String)
+            uuid_trigger = UUID(totaldeps[trigger]::String)
             trigger_id = PkgId(uuid_trigger, trigger)
             if !haskey(explicit_loaded_modules, trigger_id) || haskey(package_locks, trigger_id)
                 trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id)
@@ -1524,7 +1627,21 @@ function CacheFlags(f::UInt8)
     CacheFlags(use_pkgimages, debug_level, check_bounds, inline, opt_level)
 end
 CacheFlags(f::Int) = CacheFlags(UInt8(f))
-CacheFlags() = CacheFlags(ccall(:jl_cache_flags, UInt8, ()))
+function CacheFlags(cf::CacheFlags=CacheFlags(ccall(:jl_cache_flags, UInt8, ()));
+            use_pkgimages::Union{Nothing,Bool}=nothing,
+            debug_level::Union{Nothing,Int}=nothing,
+            check_bounds::Union{Nothing,Int}=nothing,
+            inline::Union{Nothing,Bool}=nothing,
+            opt_level::Union{Nothing,Int}=nothing
+        )
+    return CacheFlags(
+        use_pkgimages === nothing ? cf.use_pkgimages : use_pkgimages,
+        debug_level === nothing ? cf.debug_level : debug_level,
+        check_bounds === nothing ? cf.check_bounds : check_bounds,
+        inline === nothing ? cf.inline : inline,
+        opt_level === nothing ? cf.opt_level : opt_level
+    )
+end
 
 function _cacheflag_to_uint8(cf::CacheFlags)::UInt8
     f = UInt8(0)
@@ -1629,35 +1746,22 @@ end
 # should sync with the types of arguments of `stale_cachefile`
 const StaleCacheKey = Tuple{Base.PkgId, UInt128, String, String}
 
-"""
-    Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
-
-Returns whether a given PkgId within the active project is precompiled.
-
-By default this check observes the same approach that code loading takes
-with respect to when different versions of dependencies are currently loaded
-to that which is expected. To ignore loaded modules and answer as if in a
-fresh julia session specify `ignore_loaded=true`.
-
-!!! compat "Julia 1.10"
-    This function requires at least Julia 1.10.
-"""
-function isprecompiled(pkg::PkgId;
+function compilecache_path(pkg::PkgId;
         ignore_loaded::Bool=false,
         stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
         cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg),
         sourcepath::Union{String,Nothing}=Base.locate_package(pkg),
         flags::CacheFlags=CacheFlags())
+    path = nothing
     isnothing(sourcepath) && error("Cannot locate source for $(repr("text/plain", pkg))")
     for path_to_try in cachepaths
         staledeps = stale_cachefile(sourcepath, path_to_try, ignore_loaded = true, requested_flags=flags)
         if staledeps === true
             continue
         end
-        staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+        staledeps, _, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
         # finish checking staledeps module graph
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
+        for dep in staledeps
             dep isa Module && continue
             modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
             modpaths = find_all_in_cache_path(modkey)
@@ -1679,32 +1783,83 @@ function isprecompiled(pkg::PkgId;
             # file might be read-only and then we fail to update timestamp, which is fine
             ex isa IOError || rethrow()
         end
-        return true
+        path = path_to_try
+        break
         @label check_next_path
     end
-    return false
+    return path
+end
+
+"""
+    Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
+
+Returns whether a given PkgId within the active project is precompiled.
+
+By default this check observes the same approach that code loading takes
+with respect to when different versions of dependencies are currently loaded
+to that which is expected. To ignore loaded modules and answer as if in a
+fresh julia session specify `ignore_loaded=true`.
+
+!!! compat "Julia 1.10"
+    This function requires at least Julia 1.10.
+"""
+function isprecompiled(pkg::PkgId;
+        ignore_loaded::Bool=false,
+        stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
+        cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg),
+        sourcepath::Union{String,Nothing}=Base.locate_package(pkg),
+        flags::CacheFlags=CacheFlags())
+    path = compilecache_path(pkg; ignore_loaded, stale_cache, cachepaths, sourcepath, flags)
+    return !isnothing(path)
+end
+
+"""
+    Base.isrelocatable(pkg::PkgId)
+
+Returns whether a given PkgId within the active project is precompiled and the
+associated cache is relocatable.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function isrelocatable(pkg::PkgId)
+    path = compilecache_path(pkg)
+    isnothing(path) && return false
+    io = open(path, "r")
+    try
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
+        _, (includes, includes_srcfiles, _), _... = _parse_cache_header(io, path)
+        for inc in includes
+            !startswith(inc.filename, "@depot") && return false
+            if inc ∉ includes_srcfiles
+                # its an include_dependency
+                track_content = inc.mtime == -1.0
+                track_content || return false
+            end
+        end
+    finally
+        close(io)
+    end
+    return true
 end
 
 # search for a precompile cache file to load, after some various checks
 function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
     assert_havelock(require_lock)
-    if root_module_exists(modkey)
-        loaded = root_module(modkey)
-    else
-        loaded = start_loading(modkey)
-        if loaded === nothing
-            try
-                modpath = locate_package(modkey)
-                modpath === nothing && return nothing
-                set_pkgorigin_version_path(modkey, String(modpath))
-                loaded = _require_search_from_serialized(modkey, String(modpath), build_id, true)
-            finally
-                end_loading(modkey, loaded)
-            end
-            if loaded isa Module
-                insert_extension_triggers(modkey)
-                run_package_callbacks(modkey)
-            end
+    loaded = start_loading(modkey, build_id, false)
+    if loaded === nothing
+        try
+            modpath = locate_package(modkey)
+            isnothing(modpath) && error("Cannot locate source for $(repr("text/plain", modkey))")
+            modpath = String(modpath)::String
+            set_pkgorigin_version_path(modkey, modpath)
+            loaded = _require_search_from_serialized(modkey, modpath, build_id, true)
+        finally
+            end_loading(modkey, loaded)
+        end
+        if loaded isa Module
+            insert_extension_triggers(modkey)
+            run_package_callbacks(modkey)
         end
     end
     if loaded isa Module && PkgId(loaded) == modkey && module_build_id(loaded) === build_id
@@ -1777,10 +1932,11 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union
         depmods[i] = dep
     end
     # then load the file
-    return _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native)
+    loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native; register = true)
+    return loaded
 end
 
-# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
+# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it or it was stale
 # returns the set of modules restored if the cache load succeeded
 @constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128, stalecheck::Bool; reasons=nothing, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     assert_havelock(require_lock)
@@ -1792,9 +1948,9 @@ end
             continue
         end
         try
-            staledeps, ocachefile = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+            staledeps, ocachefile, newbuild_id = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
             # finish checking staledeps module graph
-            for i in 1:length(staledeps)
+            for i in eachindex(staledeps)
                 dep = staledeps[i]
                 dep isa Module && continue
                 modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
@@ -1804,7 +1960,7 @@ end
                     if modstaledeps === true
                         continue
                     end
-                    modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+                    modstaledeps, modocachepath, _ = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
                     staledeps[i] = (modpath, modkey, modbuild_id, modpath_to_try, modstaledeps, modocachepath)
                     @goto check_next_dep
                 end
@@ -1812,6 +1968,11 @@ end
                 @goto check_next_path
                 @label check_next_dep
             end
+            M = get(loaded_precompiles, pkg => newbuild_id, nothing)
+            if isa(M, Module)
+                stalecheck && register_root_module(M)
+                return M
+            end
             if stalecheck
                 try
                     touch(path_to_try) # update timestamp of precompilation file
@@ -1820,30 +1981,25 @@ end
                 end
             end
             # finish loading module graph into staledeps
-            for i in 1:length(staledeps)
+            # TODO: call all start_loading calls (in reverse order) before calling any _include_from_serialized, since start_loading will drop the loading lock
+            for i in eachindex(staledeps)
                 dep = staledeps[i]
                 dep isa Module && continue
                 modpath, modkey, modbuild_id, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
-                dep = nothing
-                if root_module_exists(modkey)
-                    dep = root_module(modkey)
-                end
+                dep = start_loading(modkey, modbuild_id, stalecheck)
                 while true
                     if dep isa Module
                         if PkgId(dep) == modkey && module_build_id(dep) === modbuild_id
                             break
                         else
-                            if stalecheck
-                                @debug "Rejecting cache file $path_to_try because module $modkey is already loaded and incompatible."
-                                @goto check_next_path
-                            end
+                            @debug "Rejecting cache file $path_to_try because module $modkey got loaded at a different version than expected."
+                            @goto check_next_path
                         end
                     end
-                    dep = start_loading(modkey)
                     if dep === nothing
                         try
                             set_pkgorigin_version_path(modkey, modpath)
-                            dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps)
+                            dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps; register = stalecheck)
                         finally
                             end_loading(modkey, dep)
                         end
@@ -1857,14 +2013,17 @@ end
                 end
                 staledeps[i] = dep
             end
-            restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps)
+            restored = get(loaded_precompiles, pkg => newbuild_id, nothing)
+            if !isa(restored, Module)
+                restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps; register = stalecheck)
+            end
             isa(restored, Module) && return restored
             @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
             @label check_next_path
         finally
             for modkey in newdeps
                 insert_extension_triggers(modkey)
-                run_package_callbacks(modkey)
+                stalecheck && run_package_callbacks(modkey)
             end
             empty!(newdeps)
         end
@@ -1877,12 +2036,22 @@ const package_locks = Dict{PkgId,Pair{Task,Threads.Condition}}()
 
 debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but more complete algorithm that can handle simultaneous tasks.
                                # This only triggers if you have multiple tasks trying to load the same package at the same time,
-                               # so it is unlikely to make a difference normally.
-function start_loading(modkey::PkgId)
-    # handle recursive calls to require
+                               # so it is unlikely to make a performance difference normally.
+function start_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool)
+    # handle recursive and concurrent calls to require
     assert_havelock(require_lock)
-    loading = get(package_locks, modkey, nothing)
-    if loading !== nothing
+    while true
+        loaded = stalecheck ? maybe_root_module(modkey) : nothing
+        loaded isa Module && return loaded
+        if build_id != UInt128(0)
+            loaded = get(loaded_precompiles, modkey => build_id, nothing)
+            loaded isa Module && return loaded
+        end
+        loading = get(package_locks, modkey, nothing)
+        if loading === nothing
+            package_locks[modkey] = current_task() => Threads.Condition(require_lock)
+            return nothing
+        end
         # load already in progress for this module on the task
         task, cond = loading
         deps = String[modkey.name]
@@ -1921,10 +2090,9 @@ function start_loading(modkey::PkgId)
             end
             throw(ConcurrencyViolationError(msg))
         end
-        return wait(cond)
+        loaded = wait(cond)
+        loaded isa Module && return loaded
     end
-    package_locks[modkey] = current_task() => Threads.Condition(require_lock)
-    return
 end
 
 function end_loading(modkey::PkgId, @nospecialize loaded)
@@ -1943,17 +2111,24 @@ const package_callbacks = Any[]
 const include_callbacks = Any[]
 
 # used to optionally track dependencies when requiring a module:
-const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
+const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", because they are explicitly loaded, and the process should try to avoid invalidating them
 const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash, mtime) tuples that are the file dependencies of the module currently being precompiled
 const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
-function _include_dependency(mod::Module, _path::AbstractString; track_content=true)
+function _include_dependency(mod::Module, _path::AbstractString; track_content=true,
+                             path_may_be_dir=false)
     prev = source_path(nothing)
     if prev === nothing
         path = abspath(_path)
     else
         path = normpath(joinpath(dirname(prev), _path))
     end
-    if _track_dependencies[]
+    if !_track_dependencies[]
+        if !path_may_be_dir && !isfile(path)
+            throw(SystemError("opening file $(repr(path))", Libc.ENOENT))
+        elseif path_may_be_dir && !Filesystem.isreadable(path)
+            throw(SystemError("opening file or folder $(repr(path))", Libc.ENOENT))
+        end
+    else
         @lock require_lock begin
             if track_content
                 hash = isdir(path) ? _crc32c(join(readdir(path))) : open(_crc32c, path, "r")
@@ -1968,22 +2143,23 @@ function _include_dependency(mod::Module, _path::AbstractString; track_content=t
 end
 
 """
-    include_dependency(path::AbstractString; track_content::Bool=false)
+    include_dependency(path::AbstractString; track_content::Bool=true)
 
 In a module, declare that the file, directory, or symbolic link specified by `path`
-(relative or absolute) is a dependency for precompilation; that is, the module will need
-to be recompiled if the modification time `mtime` of `path` changes.
-If `track_content=true` recompilation is triggered when the content of `path` changes
+(relative or absolute) is a dependency for precompilation; that is, if `track_content=true`
+the module will need to be recompiled if the content of `path` changes
 (if `path` is a directory the content equals `join(readdir(path))`).
+If `track_content=false` recompilation is triggered when the modification time `mtime` of `path` changes.
 
 This is only needed if your module depends on a path that is not used via [`include`](@ref). It has
 no effect outside of compilation.
 
 !!! compat "Julia 1.11"
     Keyword argument `track_content` requires at least Julia 1.11.
+    An error is now thrown if `path` is not readable.
 """
-function include_dependency(path::AbstractString; track_content::Bool=false)
-    _include_dependency(Main, path, track_content=track_content)
+function include_dependency(path::AbstractString; track_content::Bool=true)
+    _include_dependency(Main, path, track_content=track_content, path_may_be_dir=true)
     return nothing
 end
 
@@ -2012,6 +2188,7 @@ order to throw an error if Julia attempts to precompile it.
 end
 
 # require always works in Main scope and loads files from node 1
+# XXX: (this is deprecated, but still used by Distributed)
 const toplevel_load = Ref(true)
 
 const _require_world_age = Ref{UInt}(typemax(UInt))
@@ -2130,15 +2307,16 @@ function collect_manifest_warnings()
     if !isempty(unsuitable_manifests)
         msg *= """
         - Note that the following manifests in the load path were resolved with a different
-          julia version, which may be the cause of the error:
+          julia version, which may be the cause of the error. Try to re-resolve them in the
+          current version, or consider deleting them if that fails:
             $(join(unsuitable_manifests, "\n    "))
         """
     end
     if !isempty(dev_manifests)
         msg *= """
-        - Note that the following manifests in the load path were resolved a potentially
-          different DEV version of the current version, which may be the cause
-          of the error:
+        - Note that the following manifests in the load path were resolved with a potentially
+          different DEV version of the current version, which may be the cause of the error.
+          Try to re-resolve them in the current version, or consider deleting them if that fails:
             $(join(dev_manifests, "\n    "))
         """
     end
@@ -2147,8 +2325,6 @@ end
 
 require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
 
-const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL")
-
 function _require_prelocked(uuidkey::PkgId, env=nothing)
     if _require_world_age[] != typemax(UInt)
         Base.invoke_in_world(_require_world_age[], __require_prelocked, uuidkey, env)
@@ -2159,24 +2335,28 @@ end
 
 function __require_prelocked(uuidkey::PkgId, env=nothing)
     assert_havelock(require_lock)
-    if !root_module_exists(uuidkey)
-        newm = _require(uuidkey, env)
-        if newm === nothing
-            error("package `$(uuidkey.name)` did not define the expected \
-                  module `$(uuidkey.name)`, check for typos in package module name")
+    m = start_loading(uuidkey, UInt128(0), true)
+    if m === nothing
+        last = toplevel_load[]
+        try
+            toplevel_load[] = false
+            m = _require(uuidkey, env)
+            if m === nothing
+                error("package `$(uuidkey.name)` did not define the expected \
+                      module `$(uuidkey.name)`, check for typos in package module name")
+            end
+        finally
+            toplevel_load[] = last
+            end_loading(uuidkey, m)
         end
         insert_extension_triggers(uuidkey)
         # After successfully loading, notify downstream consumers
         run_package_callbacks(uuidkey)
-    else
-        m = get(loaded_modules, uuidkey, nothing)
-        if m !== nothing
-            explicit_loaded_modules[uuidkey] = m
-            run_package_callbacks(uuidkey)
-        end
-        newm = root_module(uuidkey)
+    elseif !haskey(explicit_loaded_modules, uuidkey)
+        explicit_loaded_modules[uuidkey] = m
+        run_package_callbacks(uuidkey)
     end
-    return newm
+    return m
 end
 
 mutable struct PkgOrigin
@@ -2187,15 +2367,19 @@ end
 PkgOrigin() = PkgOrigin(nothing, nothing, nothing)
 const pkgorigins = Dict{PkgId,PkgOrigin}()
 
-const loaded_modules = Dict{PkgId,Module}()
-# Emptied on Julia start
-const explicit_loaded_modules = Dict{PkgId,Module}()
+const explicit_loaded_modules = Dict{PkgId,Module}() # Emptied on Julia start
+const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded
+const loaded_precompiles = Dict{Pair{PkgId,UInt128},Module}() # extended (complete) list of modules, available to be loaded
 const loaded_modules_order = Vector{Module}()
-const module_keys = IdDict{Module,PkgId}() # the reverse
+const module_keys = IdDict{Module,PkgId}() # the reverse of loaded_modules
 
-is_root_module(m::Module) = @lock require_lock haskey(module_keys, m)
 root_module_key(m::Module) = @lock require_lock module_keys[m]
 
+function module_build_id(m::Module)
+    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
+    return (UInt128(hi) << 64) | lo
+end
+
 @constprop :none function register_root_module(m::Module)
     # n.b. This is called from C after creating a new module in `Base.__toplevel__`,
     # instead of adding them to the binding table there.
@@ -2211,7 +2395,7 @@ root_module_key(m::Module) = @lock require_lock module_keys[m]
             end
         end
     end
-    push!(loaded_modules_order, m)
+    haskey(loaded_precompiles, key => module_build_id(m)) || push!(loaded_modules_order, m)
     loaded_modules[key] = m
     explicit_loaded_modules[key] = m
     module_keys[m] = key
@@ -2242,7 +2426,12 @@ maybe_root_module(key::PkgId) = @lock require_lock get(loaded_modules, key, noth
 root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
 loaded_modules_array() = @lock require_lock copy(loaded_modules_order)
 
+# after unreference_module, a subsequent require call will try to load a new copy of it, if stale
+# reload(m) = (unreference_module(m); require(m))
 function unreference_module(key::PkgId)
+    if haskey(explicit_loaded_modules, key)
+        m = pop!(explicit_loaded_modules, key)
+    end
     if haskey(loaded_modules, key)
         m = pop!(loaded_modules, key)
         # need to ensure all modules are GC rooted; will still be referenced
@@ -2262,129 +2451,122 @@ function set_pkgorigin_version_path(pkg::PkgId, path::String)
     nothing
 end
 
-# A hook to allow code load to use Pkg.precompile
+# Unused
 const PKG_PRECOMPILE_HOOK = Ref{Function}()
+disable_parallel_precompile::Bool = false
 
 # Returns `nothing` or the new(ish) module
 function _require(pkg::PkgId, env=nothing)
     assert_havelock(require_lock)
-    loaded = start_loading(pkg)
-    loaded === nothing || return loaded
 
-    last = toplevel_load[]
-    try
-        toplevel_load[] = false
-        # perform the search operation to select the module file require intends to load
-        path = locate_package(pkg, env)
-        if path === nothing
-            throw(ArgumentError("""
-                Package $(repr("text/plain", pkg)) is required but does not seem to be installed:
-                 - Run `Pkg.instantiate()` to install all recorded dependencies.
-                """))
-        end
-        set_pkgorigin_version_path(pkg, path)
-
-        pkg_precompile_attempted = false # being safe to avoid getting stuck in a Pkg.precompile loop
-        reasons = Dict{String,Int}()
-        # attempt to load the module file via the precompile cache locations
-        if JLOptions().use_compiled_modules != 0
-            @label load_from_cache
-            m = _require_search_from_serialized(pkg, path, UInt128(0), true; reasons)
-            if m isa Module
-                return m
-            end
-        end
-
-        if JLOptions().use_compiled_modules == 3
-            error("Precompiled image $pkg not available with flags $(CacheFlags())")
-        end
-
-        # if the module being required was supposed to have a particular version
-        # but it was not handled by the precompile loader, complain
-        for (concrete_pkg, concrete_build_id) in _concrete_dependencies
-            if pkg == concrete_pkg
-                @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
-                     This may mean $(repr("text/plain", pkg)) does not support precompilation but is imported by a module that does."""
-                if JLOptions().incremental != 0
-                    # during incremental precompilation, this should be fail-fast
-                    throw(PrecompilableError())
-                end
+    # perform the search operation to select the module file require intends to load
+    path = locate_package(pkg, env)
+    if path === nothing
+        throw(ArgumentError("""
+            Package $(repr("text/plain", pkg)) is required but does not seem to be installed:
+             - Run `Pkg.instantiate()` to install all recorded dependencies.
+            """))
+    end
+    set_pkgorigin_version_path(pkg, path)
+
+    parallel_precompile_attempted = false # being safe to avoid getting stuck in a precompilepkgs loop
+    reasons = Dict{String,Int}()
+    # attempt to load the module file via the precompile cache locations
+    if JLOptions().use_compiled_modules != 0
+        @label load_from_cache
+        loaded = _require_search_from_serialized(pkg, path, UInt128(0), true; reasons)
+        if loaded isa Module
+            return loaded
+        end
+    end
+
+    if JLOptions().use_compiled_modules == 3
+        error("Precompiled image $pkg not available with flags $(CacheFlags())")
+    end
+
+    # if the module being required was supposed to have a particular version
+    # but it was not handled by the precompile loader, complain
+    for (concrete_pkg, concrete_build_id) in _concrete_dependencies
+        if pkg == concrete_pkg
+            @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
+                 This may mean $(repr("text/plain", pkg)) does not support precompilation but is imported by a module that does."""
+            if JLOptions().incremental != 0
+                # during incremental precompilation, this should be fail-fast
+                throw(PrecompilableError())
             end
         end
+    end
 
-        if JLOptions().use_compiled_modules == 1
-            if !generating_output(#=incremental=#false)
-                if !pkg_precompile_attempted && isinteractive() && isassigned(PKG_PRECOMPILE_HOOK)
-                    pkg_precompile_attempted = true
-                    unlock(require_lock)
-                    try
-                        @invokelatest PKG_PRECOMPILE_HOOK[](pkg.name, _from_loading = true)
-                    finally
-                        lock(require_lock)
-                    end
-                    @goto load_from_cache
-                end
-                # spawn off a new incremental pre-compile task for recursive `require` calls
-                cachefile_or_module = maybe_cachefile_lock(pkg, path) do
-                    # double-check now that we have lock
-                    m = _require_search_from_serialized(pkg, path, UInt128(0), true)
-                    m isa Module && return m
-                    compilecache(pkg, path; reasons)
+    if JLOptions().use_compiled_modules == 1
+        if !generating_output(#=incremental=#false)
+            project = active_project()
+            if !generating_output() && !parallel_precompile_attempted && !disable_parallel_precompile && @isdefined(Precompilation) && project !== nothing &&
+                    isfile(project) && project_file_manifest_path(project) !== nothing
+                parallel_precompile_attempted = true
+                unlock(require_lock)
+                try
+                    Precompilation.precompilepkgs([pkg.name]; _from_loading=true)
+                finally
+                    lock(require_lock)
                 end
-                cachefile_or_module isa Module && return cachefile_or_module::Module
-                cachefile = cachefile_or_module
-                if isnothing(cachefile) # maybe_cachefile_lock returns nothing if it had to wait for another process
-                    @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
-                elseif isa(cachefile, Exception)
-                    if precompilableerror(cachefile)
-                        verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
-                        @logmsg verbosity "Skipping precompilation due to precompilable error. Importing $(repr("text/plain", pkg))." exception=m
-                    else
-                        @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=m
-                    end
-                    # fall-through to loading the file locally if not incremental
+                @goto load_from_cache
+            end
+            # spawn off a new incremental pre-compile task for recursive `require` calls
+            loaded = maybe_cachefile_lock(pkg, path) do
+                # double-check the search now that we have lock
+                m = _require_search_from_serialized(pkg, path, UInt128(0), true)
+                m isa Module && return m
+                return compilecache(pkg, path; reasons)
+            end
+            loaded isa Module && return loaded
+            if isnothing(loaded) # maybe_cachefile_lock returns nothing if it had to wait for another process
+                @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
+            elseif isa(loaded, Exception)
+                if precompilableerror(loaded)
+                    verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
+                    @logmsg verbosity "Skipping precompilation due to precompilable error. Importing $(repr("text/plain", pkg))." exception=loaded
                 else
-                    cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}}
-                    m = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
-                    if !isa(m, Module)
-                        @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=m
-                    else
-                        return m
-                    end
+                    @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=loaded
                 end
-                if JLOptions().incremental != 0
-                    # during incremental precompilation, this should be fail-fast
-                    throw(PrecompilableError())
+                # fall-through to loading the file locally if not incremental
+            else
+                cachefile, ocachefile = loaded::Tuple{String, Union{Nothing, String}}
+                loaded = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
+                if !isa(loaded, Module)
+                    @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=loaded
+                else
+                    return loaded
                 end
             end
+            if JLOptions().incremental != 0
+                # during incremental precompilation, this should be fail-fast
+                throw(PrecompilableError())
+            end
         end
+    end
 
-        # just load the file normally via include
-        # for unknown dependencies
-        uuid = pkg.uuid
-        uuid = (uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid))
-        old_uuid = ccall(:jl_module_uuid, NTuple{2, UInt64}, (Any,), __toplevel__)
+    # just load the file normally via include
+    # for unknown dependencies
+    uuid = pkg.uuid
+    uuid = (uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid))
+    old_uuid = ccall(:jl_module_uuid, NTuple{2, UInt64}, (Any,), __toplevel__)
+    if uuid !== old_uuid
+        ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
+    end
+    unlock(require_lock)
+    try
+        include(__toplevel__, path)
+        loaded = get(loaded_modules, pkg, nothing)
+    finally
+        lock(require_lock)
         if uuid !== old_uuid
-            ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
+            ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
         end
-        unlock(require_lock)
-        try
-            include(__toplevel__, path)
-            loaded = get(loaded_modules, pkg, nothing)
-        finally
-            lock(require_lock)
-            if uuid !== old_uuid
-                ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
-            end
-        end
-    finally
-        toplevel_load[] = last
-        end_loading(pkg, loaded)
     end
     return loaded
 end
 
-# load a serialized file directly
+# load a serialized file directly, including dependencies (without checking staleness except for immediate conflicts)
 function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing}, sourcepath::String)
     @lock require_lock begin
     set_pkgorigin_version_path(uuidkey, sourcepath)
@@ -2398,51 +2580,53 @@ function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Unio
 end
 
 # load a serialized file directly from append_bundled_depot_path for uuidkey without stalechecks
-function require_stdlib(uuidkey::PkgId, ext::Union{Nothing, String}=nothing)
+function require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing)
     @lock require_lock begin
-    if root_module_exists(uuidkey)
-        return loaded_modules[uuidkey]
+    # the PkgId of the ext, or package if not an ext
+    this_uuidkey = ext isa String ? PkgId(uuid5(package_uuidkey.uuid, ext), ext) : package_uuidkey
+    newm = maybe_root_module(this_uuidkey)
+    if newm isa Module
+        return newm
     end
     # first since this is a stdlib, try to look there directly first
     env = Sys.STDLIB
     #sourcepath = ""
     if ext === nothing
-        sourcepath = normpath(env, uuidkey.name, "src", uuidkey.name * ".jl")
+        sourcepath = normpath(env, this_uuidkey.name, "src", this_uuidkey.name * ".jl")
     else
-        sourcepath = find_ext_path(normpath(joinpath(env, uuidkey.name)), ext)
-        uuidkey = PkgId(uuid5(uuidkey.uuid, ext), ext)
+        sourcepath = find_ext_path(normpath(joinpath(env, package_uuidkey.name)), ext)
     end
-    #mbypath = manifest_uuid_path(env, uuidkey)
-    #if mbypath isa String
-    #    sourcepath = entry_path(mbypath, uuidkey.name)
+    #mbypath = manifest_uuid_path(env, this_uuidkey)
+    #if mbypath isa String && isfile_casesensitive(mbypath)
+    #    sourcepath = mbypath
     #else
     #    # if the user deleted the stdlib folder, we next try using their environment
-    #    sourcepath = locate_package_env(uuidkey)
+    #    sourcepath = locate_package_env(this_uuidkey)
     #    if sourcepath !== nothing
     #        sourcepath, env = sourcepath
     #    end
     #end
     #if sourcepath === nothing
     #    throw(ArgumentError("""
-    #        Package $(repr("text/plain", uuidkey)) is required but does not seem to be installed.
+    #        Package $(repr("text/plain", this_uuidkey)) is required but does not seem to be installed.
     #        """))
     #end
-    set_pkgorigin_version_path(uuidkey, sourcepath)
+    set_pkgorigin_version_path(this_uuidkey, sourcepath)
     depot_path = append_bundled_depot_path!(empty(DEPOT_PATH))
-    newm = start_loading(uuidkey)
+    newm = start_loading(this_uuidkey, UInt128(0), true)
     newm === nothing || return newm
     try
-        newm = _require_search_from_serialized(uuidkey, sourcepath, UInt128(0), false; DEPOT_PATH=depot_path)
+        newm = _require_search_from_serialized(this_uuidkey, sourcepath, UInt128(0), false; DEPOT_PATH=depot_path)
     finally
-        end_loading(uuidkey, newm)
+        end_loading(this_uuidkey, newm)
     end
     if newm isa Module
         # After successfully loading, notify downstream consumers
-        insert_extension_triggers(env, uuidkey)
-        run_package_callbacks(uuidkey)
+        insert_extension_triggers(env, this_uuidkey)
+        run_package_callbacks(this_uuidkey)
     else
         # if the user deleted their bundled depot, next try to load it completely normally
-        newm = _require(uuidkey)
+        newm = _require_prelocked(this_uuidkey)
     end
     return newm
     end
@@ -2677,9 +2861,9 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
     @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
     output_o === nothing || rm(output_o, force=true)
-    depot_path = map(abspath, DEPOT_PATH)
-    dl_load_path = map(abspath, DL_LOAD_PATH)
-    load_path = map(abspath, Base.load_path())
+    depot_path = String[abspath(x) for x in DEPOT_PATH]
+    dl_load_path = String[abspath(x) for x in DL_LOAD_PATH]
+    load_path = String[abspath(x) for x in Base.load_path()]
     # if pkg is a stdlib, append its parent Project.toml to the load path
     parentid = get(EXT_PRIMED, pkg, nothing)
     if parentid !== nothing
@@ -2726,7 +2910,7 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
         opts = `-O0 --output-ji $(output) --output-incremental=yes`
     end
 
-    trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : ``
+    trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[]) --trace-compile-timing` : ``
     io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd)
                              $(flags)
                              $(opts)
@@ -2767,7 +2951,7 @@ function compilecache_dir(pkg::PkgId)
     return joinpath(DEPOT_PATH[1], entrypath)
 end
 
-function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=something(Base.active_project(), ""))::String
+function compilecache_path(pkg::PkgId, prefs_hash::UInt64; flags::CacheFlags=CacheFlags(), project::String=something(Base.active_project(), ""))::String
     entrypath, entryfile = cache_file_entry(pkg)
     cachepath = joinpath(DEPOT_PATH[1], entrypath)
     isdir(cachepath) || mkpath(cachepath)
@@ -2777,7 +2961,7 @@ function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=somet
         crc = _crc32c(project)
         crc = _crc32c(unsafe_string(JLOptions().image_file), crc)
         crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc)
-        crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc)
+        crc = _crc32c(_cacheflag_to_uint8(flags), crc)
 
         cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
         if cpu_target === nothing
@@ -2809,20 +2993,23 @@ end
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
 function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
-                      keep_loaded_modules::Bool = true; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}())
+                      keep_loaded_modules::Bool = true; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
+                      reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}())
 
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
     cachepath = compilecache_dir(pkg)
 
     # build up the list of modules that we want the precompile process to preserve
-    concrete_deps = copy(_concrete_dependencies)
     if keep_loaded_modules
-        for mod in loaded_modules_array()
-            if !(mod === Main || mod === Core || mod === Base)
-                push!(concrete_deps, PkgId(mod) => module_build_id(mod))
+        concrete_deps = copy(_concrete_dependencies)
+        for (pkgreq, modreq) in loaded_modules # TODO: convert all relevant staleness heuristics to use explicit_loaded_modules instead
+            if !(pkgreq === Main || pkgreq === Core || pkgreq === Base)
+                push!(concrete_deps, pkgreq => module_build_id(modreq))
             end
         end
+    else
+        concrete_deps = empty(_concrete_dependencies)
     end
     # run the expression and cache the result
     verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
@@ -2858,7 +3045,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             # Read preferences hash back from .ji file (we can't precompute because
             # we don't actually know what the list of compile-time preferences are without compiling)
             prefs_hash = preferences_hash(tmppath)
-            cachefile = compilecache_path(pkg, prefs_hash)
+            cachefile = compilecache_path(pkg, prefs_hash; flags=cacheflags)
             ocachefile = cache_objects ? ocachefile_from_cachefile(cachefile) : nothing
 
             # append checksum for so to the end of the .ji file:
@@ -2912,7 +3099,9 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
                 end
             end
             # this is atomic according to POSIX (not Win32):
-            rename(tmppath, cachefile; force=true)
+            # but force=true means it will fall back to non atomic
+            # move if the initial rename fails.
+            mv(tmppath, cachefile; force=true)
             return cachefile, ocachefile
         end
     finally
@@ -2931,7 +3120,7 @@ end
 
 function rename_unique_ocachefile(tmppath_so::String, ocachefile_orig::String, ocachefile::String = ocachefile_orig, num = 0)
     try
-        rename(tmppath_so, ocachefile; force=true)
+        mv(tmppath_so, ocachefile; force=true)
     catch e
         e isa IOError || rethrow()
         # If `rm` was called on a dir containing a loaded DLL, we moved it to temp for cleanup
@@ -2950,9 +3139,12 @@ function rename_unique_ocachefile(tmppath_so::String, ocachefile_orig::String, o
     return ocachefile
 end
 
-function module_build_id(m::Module)
-    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
-    return (UInt128(hi) << 64) | lo
+function object_build_id(obj)
+    mod = ccall(:jl_object_top_module, Any, (Any,), obj)
+    if mod === nothing
+        return nothing
+    end
+    return module_build_id(mod::Module)
 end
 
 function isvalid_cache_header(f::IOStream)
@@ -3014,7 +3206,7 @@ function resolve_depot(inc::AbstractString)
 end
 
 
-function parse_cache_header(f::IO, cachefile::AbstractString)
+function _parse_cache_header(f::IO, cachefile::AbstractString)
     flags = read(f, UInt8)
     modules = Vector{Pair{PkgId, UInt64}}()
     while true
@@ -3098,9 +3290,16 @@ function parse_cache_header(f::IO, cachefile::AbstractString)
 
     srcfiles = srctext_files(f, srctextpos, includes)
 
+    return modules, (includes, srcfiles, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
+end
+
+function parse_cache_header(f::IO, cachefile::AbstractString)
+    modules, (includes, srcfiles, requires), required_modules,
+        srctextpos, prefs, prefs_hash, clone_targets, flags = _parse_cache_header(f, cachefile)
+
     includes_srcfiles = CacheHeaderIncludes[]
     includes_depfiles = CacheHeaderIncludes[]
-    for (i, inc) in enumerate(includes)
+    for inc in includes
         if inc.filename ∈ srcfiles
             push!(includes_srcfiles, inc)
         else
@@ -3108,23 +3307,63 @@ function parse_cache_header(f::IO, cachefile::AbstractString)
         end
     end
 
-    # determine depot for @depot replacement for include() files and include_dependency() files separately
-    srcfiles_depot = resolve_depot(first(srcfiles))
-    if srcfiles_depot === :no_depot_found
-        @debug("Unable to resolve @depot tag include() files from cache file $cachefile", srcfiles, _group=:relocatable)
-    elseif srcfiles_depot === :not_relocatable
-        @debug("include() files from $cachefile are not relocatable", srcfiles, _group=:relocatable)
-    else
+
+    # The @depot resolution logic for include() files:
+    # 1. If the cache is not relocatable because of an absolute path,
+    #    we ignore that path for the depot search.
+    #    Recompilation will be triggered by stale_cachefile() if that absolute path does not exist.
+    # 2. If we can't find a depot for a relocatable path,
+    #    we still replace it with the depot we found from other files.
+    #    Recompilation will be triggered by stale_cachefile() because the resolved path does not exist.
+    # 3. We require that relocatable paths all resolve to the same depot.
+    # 4. We explicitly check that all relocatable paths resolve to the same depot. This has two reasons:
+    #    - We want to scan all source files in order to provide logs for 1. and 2. above.
+    #    - It is possible that a depot might be missing source files.
+    #      Assume that we have two depots on DEPOT_PATH, depot_complete and depot_incomplete.
+    #      If DEPOT_PATH=["depot_complete","depot_incomplete"] then no recompilation shall happen,
+    #      because depot_complete will be picked.
+    #      If DEPOT_PATH=["depot_incomplete","depot_complete"] we trigger recompilation and
+    #      hopefully a meaningful error about missing files is thrown.
+    #      If we were to just select the first depot we find, then whether recompilation happens would
+    #      depend on whether the first relocatable file resolves to depot_complete or depot_incomplete.
+    srcdepot = nothing
+    any_not_relocatable = false
+    any_no_depot_found = false
+    multiple_depots_found = false
+    for src in srcfiles
+        depot = resolve_depot(src)
+        if depot === :not_relocatable
+            any_not_relocatable = true
+        elseif depot === :no_depot_found
+            any_no_depot_found = true
+        elseif isnothing(srcdepot)
+            srcdepot = depot
+        elseif depot != srcdepot
+            multiple_depots_found = true
+        end
+    end
+    if any_no_depot_found
+        @debug("Unable to resolve @depot tag for at least one include() file from cache file $cachefile", srcfiles, _group=:relocatable)
+    end
+    if any_not_relocatable
+        @debug("At least one include() file from $cachefile is not relocatable", srcfiles, _group=:relocatable)
+    end
+    if multiple_depots_found
+        @debug("Some include() files from $cachefile are distributed over multiple depots", srcfiles, _group=:relocatable)
+    elseif !isnothing(srcdepot)
         for inc in includes_srcfiles
-            inc.filename = restore_depot_path(inc.filename, srcfiles_depot)
+            inc.filename = restore_depot_path(inc.filename, srcdepot)
         end
     end
+
+    # unlike include() files, we allow each relocatable include_dependency() file to resolve
+    # to a separate depot, #52161
     for inc in includes_depfiles
         depot = resolve_depot(inc.filename)
         if depot === :no_depot_found
-            @debug("Unable to resolve @depot tag for include_dependency() file $(inc.filename) from cache file $cachefile", srcfiles, _group=:relocatable)
+            @debug("Unable to resolve @depot tag for include_dependency() file $(inc.filename) from cache file $cachefile", _group=:relocatable)
         elseif depot === :not_relocatable
-            @debug("include_dependency() file $(inc.filename) from $cachefile is not relocatable", srcfiles, _group=:relocatable)
+            @debug("include_dependency() file $(inc.filename) from $cachefile is not relocatable", _group=:relocatable)
         else
             inc.filename = restore_depot_path(inc.filename, depot)
         end
@@ -3340,9 +3579,27 @@ function recursive_prefs_merge(base::Dict{String, Any}, overrides::Dict{String,
     return new_base
 end
 
+function get_projects_workspace_to_root(project_file)
+    projects = String[project_file]
+    while true
+        project_file = base_project(project_file)
+        if project_file === nothing
+            return projects
+        end
+        push!(projects, project_file)
+    end
+end
+
 function get_preferences(uuid::Union{UUID,Nothing} = nothing)
     merged_prefs = Dict{String,Any}()
-    for env in reverse(load_path())
+    loadpath = load_path()
+    projects_to_merge_prefs = String[]
+    append!(projects_to_merge_prefs, Iterators.drop(loadpath, 1))
+    if length(loadpath) >= 1
+        prepend!(projects_to_merge_prefs, get_projects_workspace_to_root(first(loadpath)))
+    end
+
+    for env in reverse(projects_to_merge_prefs)
         project_toml = env_project_file(env)
         if !isa(project_toml, String)
             continue
@@ -3400,14 +3657,14 @@ function check_clone_targets(clone_targets)
 end
 
 # Set by FileWatching.__init__()
-global mkpidlock_hook
-global trymkpidlock_hook
-global parse_pidfile_hook
+global mkpidlock_hook::Any
+global trymkpidlock_hook::Any
+global parse_pidfile_hook::Any
 
 # The preferences hash is only known after precompilation so just assume no preferences.
 # Also ignore the active project, which means that if all other conditions are equal,
 # the same package cannot be precompiled from different projects and/or different preferences at the same time.
-compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"
+compilecache_pidfile_path(pkg::PkgId; flags::CacheFlags=CacheFlags()) = compilecache_path(pkg, UInt64(0); project="", flags) * ".pidfile"
 
 const compilecache_pidlock_stale_age = 10
 
@@ -3460,8 +3717,14 @@ end
 @constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String;
                                           ignore_loaded::Bool=false, requested_flags::CacheFlags=CacheFlags(),
                                           reasons::Union{Dict{String,Int},Nothing}=nothing, stalecheck::Bool=true)
-    # XXX: this function appears to dl all of the file validation, not just those checks related to stale
-    io = open(cachefile, "r")
+    # n.b.: this function does nearly all of the file validation, not just those checks related to stale, so the name is potentially unclear
+    io = try
+        open(cachefile, "r")
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+        @debug "Rejecting cache file $cachefile for $modkey because it could not be opened" isfile(cachefile)
+        return true
+    end
     try
         checksum = isvalid_cache_header(io)
         if iszero(checksum)
@@ -3514,8 +3777,8 @@ end
             record_reason(reasons, "for different pkgid")
             return true
         end
+        id_build = (UInt128(checksum) << 64) | id.second
         if build_id != UInt128(0)
-            id_build = (UInt128(checksum) << 64) | id.second
             if id_build != build_id
                 @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it does not provide desired build_id ($((UUID(build_id))))"
                 record_reason(reasons, "for different buildid")
@@ -3530,17 +3793,25 @@ end
         depmods = Vector{Any}(undef, ndeps)
         for i in 1:ndeps
             req_key, req_build_id = required_modules[i]
-            # Module is already loaded
-            if root_module_exists(req_key)
+            # Check if module is already loaded
+            if !stalecheck && haskey(loaded_precompiles, req_key => req_build_id)
+                M = loaded_precompiles[req_key => req_build_id]
+                @assert PkgId(M) == req_key && module_build_id(M) === req_build_id
+                depmods[i] = M
+            elseif root_module_exists(req_key)
                 M = root_module(req_key)
                 if PkgId(M) == req_key && module_build_id(M) === req_build_id
                     depmods[i] = M
+                elseif M == Core
+                    @debug "Rejecting cache file $cachefile because it was made with a different julia version"
+                    record_reason(reasons, "wrong julia version")
+                    return true # Won't be able to fulfill dependency
                 elseif ignore_loaded || !stalecheck
                     # Used by Pkg.precompile given that there it's ok to precompile different versions of loaded packages
                     @goto locate_branch
                 else
                     @debug "Rejecting cache file $cachefile because module $req_key is already loaded and incompatible."
-                    record_reason(reasons, req_key == PkgId(Core) ? "wrong julia version" : "wrong dep version loaded")
+                    record_reason(reasons, "wrong dep version loaded")
                     return true # Won't be able to fulfill dependency
                 end
             else
@@ -3558,17 +3829,19 @@ end
         # check if this file is going to provide one of our concrete dependencies
         # or if it provides a version that conflicts with our concrete dependencies
         # or neither
-        for (req_key, req_build_id) in _concrete_dependencies
-            build_id = get(modules, req_key, UInt64(0))
-            if build_id !== UInt64(0)
-                build_id |= UInt128(checksum) << 64
-                if build_id === req_build_id
-                    stalecheck = false
-                    break
+        if stalecheck
+            for (req_key, req_build_id) in _concrete_dependencies
+                build_id = get(modules, req_key, UInt64(0))
+                if build_id !== UInt64(0)
+                    build_id |= UInt128(checksum) << 64
+                    if build_id === req_build_id
+                        stalecheck = false
+                        break
+                    end
+                    @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
+                    record_reason(reasons, "wrong dep buildid")
+                    return true # cachefile doesn't provide the required version of the dependency
                 end
-                @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
-                record_reason(reasons, "wrong dep buildid")
-                return true # cachefile doesn't provide the required version of the dependency
             end
         end
 
@@ -3597,7 +3870,7 @@ end
                 end
                 if !ispath(f)
                     _f = fixup_stdlib_path(f)
-                    if isfile(_f) && startswith(_f, Sys.STDLIB)
+                    if _f != f && isfile(_f) && startswith(_f, Sys.STDLIB)
                         continue
                     end
                     @debug "Rejecting stale cache file $cachefile because file $f does not exist"
@@ -3619,13 +3892,14 @@ end
                         return true
                     end
                 else
-                    fsize = filesize(f)
+                    fstat = stat(f)
+                    fsize = filesize(fstat)
                     if fsize != fsize_req
                         @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)"
                         record_reason(reasons, "include_dependency fsize change")
                         return true
                     end
-                    hash = isdir(f) ? _crc32c(join(readdir(f))) : open(_crc32c, f, "r")
+                    hash = isdir(fstat) ? _crc32c(join(readdir(f))) : open(_crc32c, f, "r")
                     if hash != hash_req
                         @debug "Rejecting stale cache file $cachefile because hash of $f has changed (hash $hash, before $hash_req)"
                         record_reason(reasons, "include_dependency fhash change")
@@ -3656,7 +3930,7 @@ end
             return true
         end
 
-        return depmods, ocachefile # fresh cachefile
+        return depmods, ocachefile, id_build # fresh cachefile
     finally
         close(io)
     end
@@ -3750,7 +4024,7 @@ function precompile(@nospecialize(argt::Type), m::Method)
     return precompile(mi)
 end
 
-@assert precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
-@assert precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String))
-@assert precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, IO, IO))
-@assert precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, IO, IO))
+precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing)) || @assert false
+precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String)) || @assert false
+precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, IO, IO)) || @assert false
+precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, IO, IO)) || @assert false
diff --git a/base/lock.jl b/base/lock.jl
index 7cbb023a78ee4..b473045e5809d 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -51,6 +51,20 @@ end
 
 assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
 
+show(io::IO, ::ReentrantLock) = print(io, ReentrantLock, "()")
+
+function show(io::IO, ::MIME"text/plain", l::ReentrantLock)
+    show(io, l)
+    if !(get(io, :compact, false)::Bool)
+        locked_by = l.locked_by
+        if locked_by isa Task
+            print(io, " (locked by ", locked_by === current_task() ? "current " : "", locked_by, ")")
+        else
+            print(io, " (unlocked)")
+        end
+    end
+end
+
 """
     islocked(lock) -> Status (Boolean)
 
@@ -498,10 +512,10 @@ This provides an acquire & release memory ordering on notify/wait.
     The `autoreset` functionality and memory ordering guarantee requires at least Julia 1.8.
 """
 mutable struct Event
-    const notify::ThreadSynchronizer
+    const notify::Threads.Condition
     const autoreset::Bool
     @atomic set::Bool
-    Event(autoreset::Bool=false) = new(ThreadSynchronizer(), autoreset, false)
+    Event(autoreset::Bool=false) = new(Threads.Condition(), autoreset, false)
 end
 
 function wait(e::Event)
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/base/logging/ConsoleLogger.jl
similarity index 86%
rename from stdlib/Logging/src/ConsoleLogger.jl
rename to base/logging/ConsoleLogger.jl
index 08e4a8c6b2efe..c4596dd86c3f5 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/base/logging/ConsoleLogger.jl
@@ -12,10 +12,10 @@ Log levels less than `min_level` are filtered out.
 Message formatting can be controlled by setting keyword arguments:
 
 * `meta_formatter` is a function which takes the log event metadata
-  `(level, _module, group, id, file, line)` and returns a face name (used in
-  the constructed [`AnnotatedString`](@ref Base.AnnotatedString)), prefix and
-  suffix for the log message.  The default is to prefix with the log level and
-  a suffix containing the module, file and line location.
+  `(level, _module, group, id, file, line)` and returns a color (as would be
+  passed to printstyled), prefix and suffix for the log message.  The
+  default is to prefix with the log level and a suffix containing the module,
+  file and line location.
 * `show_limited` limits the printing of large data structures to something
   which can fit on the screen by setting the `:limit` `IOContext` key during
   formatting.
@@ -58,10 +58,10 @@ end
 showvalue(io, ex::Exception) = showerror(io, ex)
 
 function default_logcolor(level::LogLevel)
-    level < Info  ? :log_debug :
-    level < Warn  ? :log_info  :
-    level < Error ? :log_warn  :
-                    :log_error
+    level < Info  ? Base.debug_color() :
+    level < Warn  ? Base.info_color()  :
+    level < Error ? Base.warn_color()  :
+                    Base.error_color()
 end
 
 function default_metafmt(level::LogLevel, _module, group, id, file, line)
@@ -103,8 +103,6 @@ function termlength(str)
     return N
 end
 
-termlength(str::Base.AnnotatedString) = textwidth(str)
-
 function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module, group, id,
                         filepath, line; kwargs...)
     @nospecialize
@@ -156,10 +154,6 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     # Format lines as text with appropriate indentation and with a box
     # decoration on the left.
     color, prefix, suffix = logger.meta_formatter(level, _module, group, id, filepath, line)::Tuple{Union{Symbol,Int},String,String}
-    lcolor = StyledStrings.Legacy.legacy_color(color)
-    if !isnothing(lcolor)
-        color = StyledStrings.Face(foreground=lcolor)
-    end
     minsuffixpad = 2
     buf = IOBuffer()
     iob = IOContext(buf, stream)
@@ -173,19 +167,19 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
         nonpadwidth = 2 + length(suffix)
     end
     for (i, (indent, msg)) in enumerate(msglines)
-        boxstr = length(msglines) == 1 ? "[" :
-                 i == 1                ? "┌" :
-                 i < length(msglines)  ? "│" :
-                                         "└"
-        print(iob, styled"{$color,bold:$boxstr} ")
+        boxstr = length(msglines) == 1 ? "[ " :
+                 i == 1                ? "┌ " :
+                 i < length(msglines)  ? "│ " :
+                                         "└ "
+        printstyled(iob, boxstr, bold=true, color=color)
         if i == 1 && !isempty(prefix)
-            print(iob, styled"{$color,bold:$prefix} ")
+            printstyled(iob, prefix, " ", bold=true, color=color)
         end
         print(iob, " "^indent, msg)
         if i == length(msglines) && !isempty(suffix)
             npad = max(0, justify_width - nonpadwidth) + minsuffixpad
             print(iob, " "^npad)
-            print(iob, styled"{shadow:$suffix}")
+            printstyled(iob, suffix, color=:light_black)
         end
         println(iob)
     end
diff --git a/base/logging.jl b/base/logging/logging.jl
similarity index 99%
rename from base/logging.jl
rename to base/logging/logging.jl
index bef8a89118371..5cf3882a300ec 100644
--- a/base/logging.jl
+++ b/base/logging/logging.jl
@@ -3,6 +3,7 @@
 module CoreLogging
 
 import Base: isless, +, -, convert, show
+import Base.ScopedValues: ScopedValue, with, @with
 
 export
     AbstractLogger,
@@ -59,7 +60,7 @@ function min_enabled_level end
     catch_exceptions(logger)
 
 Return `true` if the logger should catch exceptions which happen during log
-record construction.  By default, messages are caught
+record construction.  By default, messages are caught.
 
 By default all exceptions are caught to prevent log message generation from
 crashing the program.  This lets users confidently toggle little-used
@@ -699,4 +700,6 @@ end
 
 _global_logstate = LogState(SimpleLogger())
 
+include("logging/ConsoleLogger.jl")
+
 end # CoreLogging
diff --git a/base/math.jl b/base/math.jl
index c3c15505c5f8f..da51ab3a17bd0 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -23,7 +23,7 @@ import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
 using .Base: sign_mask, exponent_mask, exponent_one,
             exponent_half, uinttype, significand_mask,
             significand_bits, exponent_bits, exponent_bias,
-            exponent_max, exponent_raw_max
+            exponent_max, exponent_raw_max, clamp, clamp!
 
 using Core.Intrinsics: sqrt_llvm
 
@@ -69,98 +69,6 @@ end
     return Txy, T(xy-Txy)
 end
 
-"""
-    clamp(x, lo, hi)
-
-Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
-are promoted to a common type.
-
-See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` as the first argument requires at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
-3-element Vector{BigFloat}:
- 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
- 2.0
- 9.0
-
-julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
-3-element Vector{Int64}:
-  6
-  6
- 10
-```
-"""
-clamp(x::X, lo::L, hi::H) where {X,L,H} =
-    ifelse(x > hi, convert(promote_type(X,L,H), hi),
-           ifelse(x < lo,
-                  convert(promote_type(X,L,H), lo),
-                  convert(promote_type(X,L,H), x)))
-
-"""
-    clamp(x, T)::T
-
-Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
-
-See also [`trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> clamp(200, Int8)
-127
-
-julia> clamp(-200, Int8)
--128
-
-julia> trunc(Int, 4pi^2)
-39
-```
-"""
-clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
-
-
-"""
-    clamp!(array::AbstractArray, lo, hi)
-
-Restrict values in `array` to the specified range, in-place.
-See also [`clamp`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` entries in `array` require at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> row = collect(-4:4)';
-
-julia> clamp!(row, 0, Inf)
-1×9 adjoint(::Vector{Int64}) with eltype Int64:
- 0  0  0  0  0  1  2  3  4
-
-julia> clamp.((-4:4)', 0, Inf)
-1×9 Matrix{Float64}:
- 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
-```
-"""
-function clamp!(x::AbstractArray, lo, hi)
-    @inbounds for i in eachindex(x)
-        x[i] = clamp(x[i], lo, hi)
-    end
-    x
-end
-
-"""
-    clamp(x::Integer, r::AbstractUnitRange)
-
-Clamp `x` to lie within range `r`.
-
-!!! compat "Julia 1.6"
-     This method requires at least Julia 1.6.
-"""
-clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
 
 """
     evalpoly(x, p)
@@ -1255,6 +1163,10 @@ function modf(x::T) where T<:IEEEFloat
     return (rx, ix)
 end
 
+@inline function use_power_by_squaring(n::Integer)
+    -2^12 <= n <= 3 * 2^13
+end
+
 # @constprop aggressive to help the compiler see the switch between the integer and float
 # variants for callers with constant `y`
 @constprop :aggressive function ^(x::Float64, y::Float64)
@@ -1267,24 +1179,33 @@ end
         y = sign(y)*0x1.8p62
     end
     yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result
-    y == yint && return @noinline x^yint
-    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x==0
-    x<0 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
-    !isfinite(x) && return x*(y>0 || isnan(x))           # x is inf or NaN
+    yisint = y == yint
+    if yisint
+        yint == 0 && return 1.0
+        use_power_by_squaring(yint) && return @noinline pow_body(x, yint)
+    end
+    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x === +0.0 or -0.0 (Inf * false === 0.0)
+    s = 1
+    if x < 0
+        !yisint && throw_exp_domainerror(x) # y isn't an integer
+        s = ifelse(isodd(yint), -1, 1)
+    end
+    !isfinite(x) && return copysign(x,s)*(y>0 || isnan(x))           # x is inf or NaN
+    return copysign(pow_body(abs(x), y), s)
+end
+
+@assume_effects :foldable @noinline function pow_body(x::Float64, y::Float64)
+    xu = reinterpret(UInt64, x)
     if xu < (UInt64(1)<<52) # x is subnormal
         xu = reinterpret(UInt64, x * 0x1p52) # normalize x
         xu &= ~sign_mask(Float64)
         xu -= UInt64(52) << 52 # mess with the exponent
     end
-    return pow_body(xu, y)
-end
-
-@inline function pow_body(xu::UInt64, y::Float64)
     logxhi,logxlo = _log_ext(xu)
     xyhi, xylo = two_mul(logxhi,y)
     xylo = muladd(logxlo, y, xylo)
     hi = xyhi+xylo
-    return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
+    return @inline Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
 end
 
 @constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
@@ -1308,12 +1229,27 @@ end
     return T(exp2(log2(abs(widen(x))) * y))
 end
 
-# compensated power by squaring
 @constprop :aggressive @inline function ^(x::Float64, n::Integer)
+    n = clamp(n, Int64)
     n == 0 && return one(x)
-    return pow_body(x, n)
+    if use_power_by_squaring(n)
+        return pow_body(x, n)
+    else
+        s = ifelse(x < 0 && isodd(n), -1.0, 1.0)
+        x = abs(x)
+        y = float(n)
+        if y == n
+            return copysign(pow_body(x, y), s)
+        else
+            n2 = n % 1024
+            y = float(n - n2)
+            return pow_body(x, y) * copysign(pow_body(x, n2), s)
+        end
+    end
 end
 
+# compensated power by squaring
+# this method is only reliable for -2^20 < n < 2^20 (cf. #53881 #53886)
 @assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer)
     y = 1.0
     xnlo = ynlo = 0.0
@@ -1656,7 +1592,6 @@ end
 
 exp2(x::AbstractFloat) = 2^x
 exp10(x::AbstractFloat) = 10^x
-clamp(::Missing, lo, hi) = missing
 fourthroot(::Missing) = missing
 
 end # module
diff --git a/base/meta.jl b/base/meta.jl
index bc07a7efa6c74..e648df29c12f9 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -5,8 +5,6 @@ Convenience functions for metaprogramming.
 """
 module Meta
 
-using ..CoreLogging
-
 export quot,
        isexpr,
        isidentifier,
diff --git a/base/missing.jl b/base/missing.jl
index ce174edc297e3..1f34195efed88 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -135,6 +135,7 @@ min(::Any,     ::Missing) = missing
 max(::Missing, ::Missing) = missing
 max(::Missing, ::Any)     = missing
 max(::Any,     ::Missing) = missing
+clamp(::Missing, lo, hi) = missing
 
 missing_conversion_msg(@nospecialize T) =
     LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead")
diff --git a/base/mpfr.jl b/base/mpfr.jl
index d3045371829f6..d393469aa26a1 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -109,9 +109,9 @@ end
 tie_breaker_is_to_even(::MPFRRoundingMode) = true
 
 const ROUNDING_MODE = Ref{MPFRRoundingMode}(MPFRRoundNearest)
-const CURRENT_ROUNDING_MODE = ScopedValue{MPFRRoundingMode}()
+const CURRENT_ROUNDING_MODE = Base.ScopedValues.ScopedValue{MPFRRoundingMode}()
 const DEFAULT_PRECISION = Ref{Clong}(256)
-const CURRENT_PRECISION = ScopedValue{Clong}()
+const CURRENT_PRECISION = Base.ScopedValues.ScopedValue{Clong}()
 # Basic type and initialization definitions
 
 # Warning: the constants are MPFR implementation details from
@@ -162,7 +162,7 @@ significand_limb_count(x::BigFloat) = div(sizeof(x._d), sizeof(Limb), RoundToZer
 rounding_raw(::Type{BigFloat}) = something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[])
 setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r
 function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode)
-    @with(CURRENT_ROUNDING_MODE => r, f())
+    Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f())
 end
 
 
@@ -1109,7 +1109,7 @@ Note: `nextfloat()`, `prevfloat()` do not use the precision mentioned by
     The `base` keyword requires at least Julia 1.8.
 """
 function setprecision(f::Function, ::Type{BigFloat}, prec::Integer; base::Integer=2)
-    @with(CURRENT_PRECISION => _convert_precision_from_base(prec, base), f())
+    Base.ScopedValues.@with(CURRENT_PRECISION => _convert_precision_from_base(prec, base), f())
 end
 
 setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigFloat, prec; base)
@@ -1199,7 +1199,7 @@ function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
         y = _BigFloat(x.prec, x.sign, x.exp, d′)
         #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
         return y
-    end
+    end::BigFloat
 end
 
 function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
@@ -1222,7 +1222,8 @@ end
 # flags
 clear_flags() = ccall((:mpfr_clear_flags, libmpfr), Cvoid, ())
 had_underflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
-had_overflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
+had_overflow() = ccall((:mpfr_overflow_p, libmpfr), Cint, ()) != 0
+had_divbyzero() = ccall((:mpfr_divby0_p, libmpfr), Cint, ()) != 0
 had_nan() = ccall((:mpfr_nanflag_p, libmpfr), Cint, ()) != 0
 had_inexact_exception() = ccall((:mpfr_inexflag_p, libmpfr), Cint, ()) != 0
 had_range_exception() = ccall((:mpfr_erangeflag_p, libmpfr), Cint, ()) != 0
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index fe29e5c1d8e5f..99f41f2404e47 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -4,7 +4,7 @@
 module IteratorsMD
     import .Base: eltype, length, size, first, last, in, getindex, setindex!,
                   min, max, zero, oneunit, isless, eachindex,
-                  convert, show, iterate, promote_rule, to_indices
+                  convert, show, iterate, promote_rule, to_indices, copy
 
     import .Base: +, -, *, (:)
     import .Base: simd_outer_range, simd_inner_length, simd_index, setindex
@@ -115,6 +115,7 @@ module IteratorsMD
     oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(1), Val(N)))
 
     # arithmetic, min/max
+    @inline (+)(index::CartesianIndex) = index
     @inline (-)(index::CartesianIndex{N}) where {N} =
         CartesianIndex{N}(map(-, index.I))
     @inline (+)(index1::CartesianIndex{N}, index2::CartesianIndex{N}) where {N} =
@@ -365,7 +366,7 @@ module IteratorsMD
     end
 
     # getindex for a 0D CartesianIndices is necessary for disambiguation
-    @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
+    @inline function Base.getindex(iter::CartesianIndices{0,R}) where {R}
         CartesianIndex()
     end
     @inline function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
@@ -476,6 +477,8 @@ module IteratorsMD
     @inline in(i::CartesianIndex, r::CartesianIndices) = false
     @inline in(i::CartesianIndex{N}, r::CartesianIndices{N}) where {N} = all(map(in, i.I, r.indices))
 
+    copy(iter::CartesianIndices) = iter
+
     simd_outer_range(iter::CartesianIndices{0}) = iter
     function simd_outer_range(iter::CartesianIndices)
         CartesianIndices(tail(iter.indices))
@@ -589,7 +592,7 @@ module IteratorsMD
         else
             # Given the fact that StepRange 1:2:4 === 1:2:3, we lost the original size information
             # and thus cannot calculate the correct linear indices when the steps are not 1.
-            throw(ArgumentError("LinearIndices for $(typeof(inds)) with non-1 step size is not yet supported."))
+            throw(ArgumentError(LazyString("LinearIndices for ", typeof(inds), " with non-1 step size is not yet supported.")))
         end
     end
 
@@ -682,6 +685,40 @@ end  # IteratorsMD
 
 using .IteratorsMD
 
+# from genericmemory.jl:
+## generate vararg methods for atomic indexing
+for ex in (
+    :(getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)),
+    :(setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)),
+    :(setindexonce_atomic!(mem::GenericMemory, success_order::Symbol, fail_order::Symbol, val, i::Int)),
+    :(modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)),
+    :(swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)),
+    :(replaceindex_atomic!(mem::GenericMemory, success_order::Symbol, fail_order::Symbol, expected, desired, i::Int,)),
+)
+    fn = ex.args[1]
+    args = ex.args[2:end-1]
+
+    @eval begin
+        function $fn($(args...), i::Union{Integer,CartesianIndex}...)
+            return $fn($(args...), CartesianIndex(to_indices($(args[1]), i)))
+        end
+
+        function $fn($(args...), i::CartesianIndex)
+            return $fn($(args...), Tuple(i)...)
+        end
+
+        function $fn($(args...), i::Integer...)
+            idcs = to_indices($(args[1]), i)
+            S = IndexStyle($(args[1]))
+            if isa(S, IndexLinear)
+                return $fn($(args...), _to_linear_index($(args[1]), idcs...))
+            else
+                return $fn($(args...), _to_subscript_indices($(args[1]), idcs...))
+            end
+        end
+    end
+end
+
 ## Bounds-checking with CartesianIndex
 # Disallow linear indexing with CartesianIndex
 @inline checkbounds(::Type{Bool}, A::AbstractArray, i::CartesianIndex) =
@@ -693,6 +730,8 @@ using .IteratorsMD
 end
 @inline checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndex) =
     checkbounds_indices(Bool, inds, I.I)
+@inline checkindex(::Type{Bool}, inds::Tuple, i::AbstractRange{<:CartesianIndex}) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
 
 # Indexing into Array with mixtures of Integers and CartesianIndices is
 # extremely performance-sensitive. While the abstract fallbacks support this,
@@ -1044,25 +1083,34 @@ end
 
 ### from abstractarray.jl
 
-# In the common case where we have two views into the same parent, aliasing checks
-# are _much_ easier and more important to get right
-function mightalias(A::SubArray{T,<:Any,P}, B::SubArray{T,<:Any,P}) where {T,P}
-    if !_parentsmatch(A.parent, B.parent)
-        # We cannot do any better than the usual dataids check
-        return !_isdisjoint(dataids(A), dataids(B))
-    end
-    # Now we know that A.parent === B.parent. This means that the indices of A
-    # and B are the same length and indexing into the same dimensions. We can
-    # just walk through them and check for overlaps: O(ndims(A)). We must finally
-    # ensure that the indices don't alias with either parent
-    return _indicesmightoverlap(A.indices, B.indices) ||
-        !_isdisjoint(dataids(A.parent), _splatmap(dataids, B.indices)) ||
-        !_isdisjoint(dataids(B.parent), _splatmap(dataids, A.indices))
+function mightalias(A::SubArray, B::SubArray)
+    # There are three ways that SubArrays might _problematically_ alias one another:
+    #   1. The parents are the same we can conservatively check if the indices might overlap OR
+    #   2. The parents alias eachother in a more complicated manner (and we can't trace indices) OR
+    #   3. One's parent is used in the other's indices
+    # Note that it's ok for just the indices to alias each other as those should not be mutated,
+    # so we can always do better than the default !_isdisjoint(dataids(A), dataids(B))
+    if isbits(A.parent) || isbits(B.parent)
+        return false # Quick out for immutables
+    elseif _parentsmatch(A.parent, B.parent)
+        # Each SubArray unaliases its own parent from its own indices upon construction, so if
+        # the two parents are the same, then by construction one cannot alias the other's indices
+        # and therefore this is the only test we need to perform:
+        return _indicesmightoverlap(A.indices, B.indices)
+    else
+        A_parent_ids = dataids(A.parent)
+        B_parent_ids = dataids(B.parent)
+        return !_isdisjoint(A_parent_ids, B_parent_ids) ||
+            !_isdisjoint(A_parent_ids, _splatmap(dataids, B.indices)) ||
+            !_isdisjoint(B_parent_ids, _splatmap(dataids, A.indices))
+    end
 end
+# Test if two arrays are backed by exactly the same memory in exactly the same order
 _parentsmatch(A::AbstractArray, B::AbstractArray) = A === B
-# Two reshape(::Array)s of the same size aren't `===` because they have different headers
-_parentsmatch(A::Array, B::Array) = pointer(A) == pointer(B) && size(A) == size(B)
+_parentsmatch(A::DenseArray, B::DenseArray) = elsize(A) == elsize(B) && pointer(A) == pointer(B) && size(A) == size(B)
+_parentsmatch(A::StridedArray, B::StridedArray) = elsize(A) == elsize(B) && pointer(A) == pointer(B) && strides(A) == strides(B)
 
+# Given two SubArrays with the same parent, check if the indices might overlap (returning true if unsure)
 _indicesmightoverlap(A::Tuple{}, B::Tuple{}) = true
 _indicesmightoverlap(A::Tuple{}, B::Tuple) = error("malformed subarray")
 _indicesmightoverlap(A::Tuple, B::Tuple{}) = error("malformed subarray")
@@ -1610,12 +1658,6 @@ end
     end
 end
 
-# _unsetindex
-@propagate_inbounds function Base._unsetindex!(A::AbstractArray, i::CartesianIndex)
-    Base._unsetindex!(A, to_indices(A, (i,))...)
-    return A
-end
-
 ## permutedims
 
 ## Permute array dims ##
@@ -1629,12 +1671,11 @@ function permutedims(B::StridedArray, perm)
     permutedims!(P, B, perm)
 end
 
-function checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N}
-    indsB = axes(B)
-    length(perm) == N || throw(ArgumentError("expected permutation of size $N, but length(perm)=$(length(perm))"))
+checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N} = checkdims_perm(axes(P), axes(B), perm)
+function checkdims_perm(indsP::NTuple{N, AbstractUnitRange}, indsB::NTuple{N, AbstractUnitRange}, perm) where {N}
+    length(perm) == N || throw(ArgumentError(LazyString("expected permutation of size ", N, ", but length(perm)=", length(perm))))
     isperm(perm) || throw(ArgumentError("input is not a permutation"))
-    indsP = axes(P)
-    for i = 1:length(perm)
+    for i in eachindex(perm)
         indsP[i] == indsB[perm[i]] || throw(DimensionMismatch("destination tensor of incorrect size"))
     end
     nothing
@@ -1643,7 +1684,7 @@ end
 for (V, PT, BT) in Any[((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArray)]
     @eval @generated function permutedims!(P::$PT{$(V...)}, B::$BT{$(V...)}, perm) where $(V...)
         quote
-            checkdims_perm(P, B, perm)
+            checkdims_perm(axes(P), axes(B), perm)
 
             #calculates all the strides
             native_strides = size_to_strides(1, size(B)...)
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index cbcade94f1c0d..e316dbd37ccf5 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -112,24 +112,24 @@ Core.NamedTuple
 
 if nameof(@__MODULE__) === :Base
 
-@eval function NamedTuple{names,T}(args::Tuple) where {names, T <: Tuple}
+@eval function (NT::Type{NamedTuple{names,T}})(args::Tuple) where {names, T <: Tuple}
     if length(args) != length(names::Tuple)
         throw(ArgumentError("Wrong number of arguments to named tuple constructor."))
     end
     # Note T(args) might not return something of type T; e.g.
     # Tuple{Type{Float64}}((Float64,)) returns a Tuple{DataType}
-    $(Expr(:splatnew, :(NamedTuple{names,T}), :(T(args))))
+    $(Expr(:splatnew, :NT, :(T(args))))
 end
 
-function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple}
+function (NT::Type{NamedTuple{names, T}})(nt::NamedTuple) where {names, T <: Tuple}
     if @generated
-        Expr(:new, :(NamedTuple{names, T}),
-             Any[ :(let Tn = fieldtype(T, $n),
+        Expr(:new, :NT,
+             Any[ :(let Tn = fieldtype(NT, $n),
                       ntn = getfield(nt, $(QuoteNode(names[n])))
                       ntn isa Tn ? ntn : convert(Tn, ntn)
                   end) for n in 1:length(names) ]...)
     else
-        NamedTuple{names, T}(map(Fix1(getfield, nt), names))
+        NT(map(Fix1(getfield, nt), names))
     end
 end
 
@@ -145,14 +145,11 @@ function NamedTuple{names}(nt::NamedTuple) where {names}
     end
 end
 
-NamedTuple{names, T}(itr) where {names, T <: Tuple} = NamedTuple{names, T}(T(itr))
-NamedTuple{names}(itr) where {names} = NamedTuple{names}(Tuple(itr))
+(NT::Type{NamedTuple{names, T}})(itr) where {names, T <: Tuple} = NT(T(itr))
+(NT::Type{NamedTuple{names}})(itr) where {names} = NT(Tuple(itr))
 
 NamedTuple(itr) = (; itr...)
 
-# avoids invalidating Union{}(...)
-NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTuple{names, Union{}}, (itr,)))
-
 end # if Base
 
 # Like NamedTuple{names, T} as a constructor, but omits the additional
@@ -427,6 +424,24 @@ function diff_fallback(a::NamedTuple, an::Tuple{Vararg{Symbol}}, bn::Tuple{Varar
     _new_NamedTuple(NamedTuple{names, types}, (A...,))
 end
 
+"""
+    delete(a::NamedTuple, field::Symbol)
+
+Construct a new named tuple from `a` by removing the named field.
+
+```jldoctest
+julia> Base.delete((a=1, b=2, c=3), :a)
+(b = 2, c = 3)
+
+julia> Base.delete((a=1, b=2, c=3), :b)
+(a = 1, c = 3)
+```
+"""
+@constprop :aggressive function delete(a::NamedTuple{an}, field::Symbol) where {an}
+    names = diff_names(an, (field,))
+    NamedTuple{names}(a)
+end
+
 """
     structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}})
 
diff --git a/base/ntuple.jl b/base/ntuple.jl
index 74dc9f3796dbf..f81d2686b9764 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -72,9 +72,10 @@ julia> ntuple(i -> 2*i, Val(4))
     if @generated
         :(@ntuple $N i -> f(i))
     else
-        Tuple(f(i) for i = 1:N)
+        Tuple(f(i) for i = 1:(N::Int))
     end
 end
+typeof(function ntuple end).name.max_methods = UInt8(5)
 
 @inline function fill_to_length(t::Tuple, val, ::Val{_N}) where {_N}
     M = length(t)
diff --git a/base/number.jl b/base/number.jl
index 39908222027c1..72df50a9c3134 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -316,7 +316,7 @@ zero(::Type{Union{}}, slurp...) = Union{}(0)
 
 """
     one(x)
-    one(T::type)
+    one(T::Type)
 
 Return a multiplicative identity for `x`: a value such that
 `one(x)*x == x*one(x) == x`. If the multiplicative identity can
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index 9891db20249fb..0f1fdf47afed8 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -18,11 +18,23 @@ the argument type may be fixed length even if the function is variadic.
     This interface is experimental and subject to change or removal without notice.
 """
 macro opaque(ex)
-    esc(Expr(:opaque_closure, ex))
+    esc(Expr(:opaque_closure, nothing, nothing, nothing, #= allow_partial =# true, ex))
 end
 
 macro opaque(ty, ex)
-    esc(Expr(:opaque_closure, ty, ex))
+    if Base.isexpr(ty, :->)
+        (AT, body) = ty.args
+        filter!((n)->!isa(n, Core.LineNumberNode), body.args)
+        if !Base.isexpr(body, :block) || length(body.args) != 1
+            error("Opaque closure type must be specified in the form Tuple{T,U...}->RT")
+        end
+        RT = only(body.args)
+    else
+        error("Opaque closure type must be specified in the form Tuple{T,U...}->RT")
+    end
+    AT = (AT !== :_) ? AT : nothing
+    RT = (RT !== :_) ? RT : nothing
+    return esc(Expr(:opaque_closure, AT, RT, RT, #= allow_partial =# true, ex))
 end
 
 # OpaqueClosure construction from pre-inferred CodeInfo/IRCode
@@ -58,22 +70,34 @@ end
 
 function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
                             isva::Bool = false,
-                            do_compile::Bool = true)
+                            slotnames::Union{Nothing,Vector{Symbol}}=nothing,
+                            kwargs...)
     # NOTE: we need ir.argtypes[1] == typeof(env)
     ir = Core.Compiler.copy(ir)
-    nargs = length(ir.argtypes)-1
+    # if the user didn't specify a definition MethodInstance or filename Symbol to use for the debuginfo, set a filename now
+    ir.debuginfo.def === nothing && (ir.debuginfo.def = :var"generated IR for OpaqueClosure")
+    nargtypes = length(ir.argtypes)
+    nargs = nargtypes-1
     sig = compute_oc_signature(ir, nargs, isva)
     rt = compute_ir_rettype(ir)
     src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-    src.slotnames = fill(:none, nargs+1)
-    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
+    if slotnames === nothing
+        src.slotnames = fill(:none, nargtypes)
+    else
+        length(slotnames) == nargtypes || error("mismatched `argtypes` and `slotnames`")
+        src.slotnames = slotnames
+    end
+    src.slotflags = fill(zero(UInt8), nargtypes)
     src.slottypes = copy(ir.argtypes)
+    src.isva = isva
+    src.nargs = nargtypes
     src = Core.Compiler.ir_to_codeinf!(src, ir)
-    return generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; do_compile)
+    src.rettype = rt
+    return generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; kwargs...)
 end
 
-function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...; rettype, sig, nargs, isva=false)
-    return generate_opaque_closure(sig, Union{}, rettype, src, nargs, isva, env...)
+function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...; rettype, sig, nargs, isva=false, kwargs...)
+    return generate_opaque_closure(sig, Union{}, rettype, src, nargs, isva, env...; kwargs...)
 end
 
 function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nospecialize(rt_ub),
@@ -81,8 +105,8 @@ function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nosp
                                  mod::Module=@__MODULE__,
                                  lineno::Int=0,
                                  file::Union{Nothing,Symbol}=nothing,
-                                 isinferred::Bool=true,
-                                 do_compile::Bool=true)
+                                 do_compile::Bool=true,
+                                 isinferred::Bool=true)
     return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint, Cint),
         sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile, isinferred)
 end
diff --git a/base/operators.jl b/base/operators.jl
index 26274307ba05c..d01902e302359 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -58,22 +58,29 @@ but which do not execute the operator or return a Bool:
 """
 (<:)
 
+import Core: >:
+
 """
     >:(T1, T2)
 
 Supertype operator, equivalent to `T2 <: T1`.
 """
-(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+>:
 
 """
-    supertype(T::DataType)
+    supertype(T::Union{DataType, UnionAll})
 
-Return the supertype of DataType `T`.
+Return the direct supertype of type `T`.
+`T` can be a [`DataType`](@ref) or a [`UnionAll`](@ref) type. Does not support
+type [`Union`](@ref)s. Also see info on [Types](@ref man-types).
 
 # Examples
 ```jldoctest
 julia> supertype(Int32)
 Signed
+
+julia> supertype(Vector)
+DenseVector (alias for DenseArray{T, 1} where T)
 ```
 """
 supertype(T::DataType) = (@_total_meta; T.super)
@@ -106,9 +113,10 @@ Use [`isequal`](@ref) or [`===`](@ref) to always get a `Bool` result.
 New numeric types should implement this function for two arguments of the new type, and
 handle comparison to other types via promotion rules where possible.
 
-[`isequal`](@ref) falls back to `==`, so new methods of `==` will be used by the
-[`Dict`](@ref) type to compare keys. If your type will be used as a dictionary key, it
-should therefore also implement [`hash`](@ref).
+Equality and hashing are intimately related; two values that are considered [`isequal`](@ref) **must**
+have the same [`hash`](@ref) and by default `isequal` falls back to `==`. If a type customizes the behavior of `==` and/or [`isequal`](@ref),
+then [`hash`](@ref) must be similarly implemented to ensure `isequal` and `hash` agree. `Set`s, `Dict`s, and many other internal
+implementations assume that this invariant holds.
 
 If some type defines `==`, [`isequal`](@ref), and [`isless`](@ref) then it should
 also implement [`<`](@ref) to ensure consistency of comparisons.
@@ -340,6 +348,7 @@ true
 ===
 const ≡ = ===
 
+import Core: !==
 """
     !==(x, y)
     ≢(x,y)
@@ -357,7 +366,8 @@ julia> a ≢ a
 false
 ```
 """
-!==(@nospecialize(x), @nospecialize(y)) = !(x === y)
+!==
+
 const ≢ = !==
 
 """
@@ -1144,40 +1154,55 @@ julia> filter(!isletter, str)
 !(f::ComposedFunction{typeof(!)}) = f.inner #allows !!f === f
 
 """
-    Fix1(f, x)
+    Fix{N}(f, x)
 
-A type representing a partially-applied version of the two-argument function
-`f`, with the first argument fixed to the value "x". In other words,
-`Fix1(f, x)` behaves similarly to `y->f(x, y)`.
+A type representing a partially-applied version of a function `f`, with the argument
+`x` fixed at position `N::Int`. In other words, `Fix{3}(f, x)` behaves similarly to
+`(y1, y2, y3...; kws...) -> f(y1, y2, x, y3...; kws...)`.
 
-See also [`Fix2`](@ref Base.Fix2).
+!!! compat "Julia 1.12"
+    This general functionality requires at least Julia 1.12, while `Fix1` and `Fix2`
+    are available earlier.
+
+!!! note
+    When nesting multiple `Fix`, note that the `N` in `Fix{N}` is _relative_ to the current
+    available arguments, rather than an absolute ordering on the target function. For example,
+    `Fix{1}(Fix{2}(f, 4), 4)` fixes the first and second arg, while `Fix{2}(Fix{1}(f, 4), 4)`
+    fixes the first and third arg.
 """
-struct Fix1{F,T} <: Function
+struct Fix{N,F,T} <: Function
     f::F
     x::T
 
-    Fix1(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix1(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
+    function Fix{N}(f::F, x) where {N,F}
+        if !(N isa Int)
+            throw(ArgumentError(LazyString("expected type parameter in `Fix` to be `Int`, but got `", N, "::", typeof(N), "`")))
+        elseif N < 1
+            throw(ArgumentError(LazyString("expected `N` in `Fix{N}` to be integer greater than 0, but got ", N)))
+        end
+        new{N,_stable_typeof(f),_stable_typeof(x)}(f, x)
+    end
+end
+
+function (f::Fix{N})(args::Vararg{Any,M}; kws...) where {N,M}
+    M < N-1 && throw(ArgumentError(LazyString("expected at least ", N-1, " arguments to `Fix{", N, "}`, but got ", M)))
+    return f.f(args[begin:begin+(N-2)]..., f.x, args[begin+(N-1):end]...; kws...)
 end
 
-(f::Fix1)(y) = f.f(f.x, y)
+# Special cases for improved constant propagation
+(f::Fix{1})(arg; kws...) = f.f(f.x, arg; kws...)
+(f::Fix{2})(arg; kws...) = f.f(arg, f.x; kws...)
 
 """
-    Fix2(f, x)
-
-A type representing a partially-applied version of the two-argument function
-`f`, with the second argument fixed to the value "x". In other words,
-`Fix2(f, x)` behaves similarly to `y->f(y, x)`.
+Alias for `Fix{1}`. See [`Fix`](@ref Base.Fix).
 """
-struct Fix2{F,T} <: Function
-    f::F
-    x::T
+const Fix1{F,T} = Fix{1,F,T}
 
-    Fix2(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix2(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
-end
+"""
+Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).
+"""
+const Fix2{F,T} = Fix{2,F,T}
 
-(f::Fix2)(y) = f.f(y, f.x)
 
 """
     isequal(x)
@@ -1314,8 +1339,7 @@ struct Splat{F} <: Function
     Splat(f) = new{Core.Typeof(f)}(f)
 end
 (s::Splat)(args) = s.f(args...)
-print(io::IO, s::Splat) = print(io, "splat(", s.f, ')')
-show(io::IO, s::Splat) = print(io, s)
+show(io::IO, s::Splat) = (print(io, "splat("); show(io, s.f); print(io, ")"))
 
 ## in and related operators
 
@@ -1332,24 +1356,43 @@ used to implement specialized methods.
 """
 in(x) = Fix2(in, x)
 
-for ItrT = (Tuple,Any)
-    # define a generic method and a specialized version for `Tuple`,
-    # whose method bodies are identical, while giving better effects to the later
-    @eval function in(x, itr::$ItrT)
-        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
-        anymissing = false
-        for y in itr
-            v = (y == x)
-            if ismissing(v)
-                anymissing = true
-            elseif v
-                return true
-            end
+function in(x, itr::Any)
+    anymissing = false
+    for y in itr
+        v = (y == x)
+        if ismissing(v)
+            anymissing = true
+        elseif v
+            return true
         end
+    end
+    return anymissing ? missing : false
+end
+
+# Specialized variant of in for Tuple, which can generate typed comparisons for each element
+# of the tuple, skipping values that are statically known to be != at compile time.
+in(x, itr::Tuple) = _in_tuple(x, itr, false)
+# This recursive function will be unrolled at compiletime, and will not generate separate
+# llvm-compiled specializations for each step of the recursion.
+function _in_tuple(x, @nospecialize(itr::Tuple), anymissing::Bool)
+    @inline
+    # Base case
+    if isempty(itr)
         return anymissing ? missing : false
     end
+    # Recursive case
+    v = (itr[1] == x)
+    if ismissing(v)
+        anymissing = true
+    elseif v
+        return true
+    end
+    return _in_tuple(x, tail(itr), anymissing)
 end
 
+# fallback to the loop implementation after some number of arguments to avoid inference blowup
+in(x, itr::Any32) = invoke(in, Tuple{Any,Any}, x, itr)
+
 const ∈ = in
 ∉(x, itr) = !∈(x, itr)
 ∉(itr) = Fix2(∉, itr)
@@ -1382,11 +1425,15 @@ a function equivalent to `y -> item in y`.
 
 Determine whether an item is in the given collection, in the sense that it is
 [`==`](@ref) to one of the values generated by iterating over the collection.
+Can equivalently be used with infix syntax:
+
+    item in collection
+    item ∈ collection
+
 Return a `Bool` value, except if `item` is [`missing`](@ref) or `collection`
 contains `missing` but not `item`, in which case `missing` is returned
 ([three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 matching the behavior of [`any`](@ref) and [`==`](@ref)).
-
 Some collections follow a slightly different definition. For example,
 [`Set`](@ref)s check whether the item [`isequal`](@ref) to one of the elements;
 [`Dict`](@ref)s look for `key=>value` pairs, and the `key` is compared using
diff --git a/base/options.jl b/base/options.jl
index a94936391fa8d..41ce3c9e20909 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -57,6 +57,7 @@ struct JLOptions
     strip_ir::Int8
     permalloc_pkgimg::Int8
     heap_size_hint::UInt64
+    trace_compile_timing::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
@@ -67,6 +68,18 @@ end
 
 JLOptions() = unsafe_load(cglobal(:jl_options, JLOptions))
 
+function colored_text(opts::JLOptions)
+    return if opts.color != 0
+        opts.color == 1
+    elseif !isempty(get(ENV, "FORCE_COLOR", ""))
+        true
+    elseif !isempty(get(ENV, "NO_COLOR", ""))
+        false
+    else
+        nothing
+    end
+end
+
 function show(io::IO, opt::JLOptions)
     print(io, "JLOptions(")
     fields = fieldnames(JLOptions)
diff --git a/base/osutils.jl b/base/osutils.jl
index 95d0562540e5a..5daf58f5b8f4f 100644
--- a/base/osutils.jl
+++ b/base/osutils.jl
@@ -3,13 +3,23 @@
 """
     @static
 
-Partially evaluate an expression at parse time.
+Partially evaluate an expression at macro expansion time.
 
-For example, `@static Sys.iswindows() ? foo : bar` will evaluate `Sys.iswindows()` and insert
-either `foo` or `bar` into the expression.
-This is useful in cases where a construct would be invalid on other platforms,
-such as a `ccall` to a non-existent function.
-`@static if Sys.isapple() foo end` and `@static foo <&&,||> bar` are also valid syntax.
+This is useful in cases where a construct would be invalid in some cases, such as a `ccall`
+to an os-dependent function, or macros defined in packages that are not imported.
+
+`@static` requires a conditional. The conditional can be in an `if` statement, a ternary
+operator, or `&&`\`||`. The conditional is evaluated by recursively expanding macros,
+lowering and executing the resulting expressions. Then, the matching branch (if any) is
+returned. All the other branches of the conditional are deleted before they are
+macro-expanded (and lowered or executed).
+
+# Example
+
+Suppose we want to parse an expression `expr` that is valid only on macOS. We could solve
+this problem using `@static` with `@static if Sys.isapple() expr end`. In case we had
+`expr_apple` for macOS and `expr_others` for the other operating systems, the solution with
+`@static` would be `@static Sys.isapple() ? expr_apple : expr_others`.
 """
 macro static(ex)
     if isa(ex, Expr)
diff --git a/base/parse.jl b/base/parse.jl
index 3f714934206fc..2530e5a46146a 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -386,7 +386,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, raise::Bool; kwargs...)
     return result
 end
 @noinline _parse_failure(T, s::AbstractString, startpos = firstindex(s), endpos = lastindex(s)) =
-    throw(ArgumentError("cannot parse $(repr(s[startpos:endpos])) as $T"))
+    throw(ArgumentError(LazyString("cannot parse ", repr(s[startpos:endpos]), " as ", T)))
 
 tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, raise::Bool) where T<:Integer =
     tryparse_internal(T, s, startpos, endpos, 10, raise)
diff --git a/base/partr.jl b/base/partr.jl
index 8c95e3668ee74..6053a584af5ba 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -20,7 +20,60 @@ const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
 const heaps_lock = [SpinLock(), SpinLock()]
 
 
-cong(max::UInt32) = iszero(max) ? UInt32(0) : ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)
+"""
+    cong(max::UInt32)
+
+Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0.
+"""
+cong(max::UInt32) = iszero(max) ? UInt32(0) : rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check
+
+get_ptls_rng() = ccall(:jl_get_ptls_rng, UInt64, ())
+
+set_ptls_rng(seed::UInt64) = ccall(:jl_set_ptls_rng, Cvoid, (UInt64,), seed)
+
+"""
+    rand_ptls(max::UInt32)
+
+Return a random UInt32 in the range `0:max-1` using the thread-local RNG
+state. Max must be greater than 0.
+"""
+Base.@assume_effects :removable :inaccessiblememonly :notaskstate function rand_ptls(max::UInt32)
+    rngseed = get_ptls_rng()
+    val, seed = rand_uniform_max_int32(max, rngseed)
+    set_ptls_rng(seed)
+    return val % UInt32
+end
+
+# This implementation is based on OpenSSLs implementation of rand_uniform
+# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99
+# Comments are vendored from their implementation as well.
+# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143.
+
+# Essentially it boils down to incrementally generating a fixed point
+# number on the interval [0, 1) and multiplying this number by the upper
+# range limit.  Once it is certain what the fractional part contributes to
+# the integral part of the product, the algorithm has produced a definitive
+# result.
+"""
+    rand_uniform_max_int32(max::UInt32, seed::UInt64)
+
+Return a random UInt32 in the range `0:max-1` using the given seed.
+Max must be greater than 0.
+"""
+Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64)
+    if max == UInt32(1)
+        return UInt32(0), seed
+    end
+    # We are generating a fixed point number on the interval [0, 1).
+    # Multiplying this by the range gives us a number on [0, upper).
+    # The high word of the multiplication result represents the integral part
+    # This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes
+    seed = UInt64(69069) * seed + UInt64(362437)
+    prod = (UInt64(max)) * (seed % UInt32) # 64 bit product
+    i = prod >> 32 % UInt32 # integral part
+    return i % UInt32, seed
+end
+
 
 
 function multiq_sift_up(heap::taskheap, idx::Int32)
diff --git a/base/pcre.jl b/base/pcre.jl
index 614ffa694af0e..b357bbc8f3bf9 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -7,7 +7,7 @@ module PCRE
 import ..RefValue
 
 # include($BUILDROOT/base/pcre_h.jl)
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
+include(string(Base.BUILDROOT, "pcre_h.jl"))
 
 const PCRE_LIB = "libpcre2-8"
 
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index c7fe5a9dfe0db..cf9748168aac2 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -39,7 +39,7 @@ julia> B[3,1,2] == A[1,2,3]
 true
 ```
 """
-function PermutedDimsArray(data::AbstractArray{T,N}, perm) where {T,N}
+Base.@constprop :aggressive function PermutedDimsArray(data::AbstractArray{T,N}, perm) where {T,N}
     length(perm) == N || throw(ArgumentError(string(perm, " is not a valid permutation of dimensions 1:", N)))
     iperm = invperm(perm)
     PermutedDimsArray{T,N,(perm...,),(iperm...,),typeof(data)}(data)
@@ -282,7 +282,7 @@ regions.
 See also [`permutedims`](@ref).
 """
 function permutedims!(dest, src::AbstractArray, perm)
-    Base.checkdims_perm(dest, src, perm)
+    Base.checkdims_perm(axes(dest), axes(src), perm)
     P = PermutedDimsArray(dest, invperm(perm))
     _copy!(P, src)
     return dest
diff --git a/base/pkgid.jl b/base/pkgid.jl
index a3d3ead184bb8..8c776d79a69cb 100644
--- a/base/pkgid.jl
+++ b/base/pkgid.jl
@@ -37,7 +37,8 @@ end
 
 function binunpack(s::String)
     io = IOBuffer(s)
-    @assert read(io, UInt8) === 0x00
+    z = read(io, UInt8)
+    @assert z === 0x00
     uuid = read(io, UInt128)
     name = read(io, String)
     return PkgId(UUID(uuid), name)
diff --git a/base/pointer.jl b/base/pointer.jl
index 86513c076ade6..b1580ef17d562 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -64,6 +64,7 @@ unsafe_convert(::Type{Ptr{Int8}}, s::String) = ccall(:jl_string_ptr, Ptr{Int8},
 
 cconvert(::Type{<:Ptr}, a::Array) = getfield(a, :ref)
 unsafe_convert(::Type{Ptr{S}}, a::AbstractArray{T}) where {S,T} = convert(Ptr{S}, unsafe_convert(Ptr{T}, a))
+unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = unsafe_convert(Ptr{T}, a.ref)
 unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))")
 # TODO: add this deprecation to give a better error:
 # cconvert(::Type{<:Ptr}, a::AbstractArray) = error("conversion to pointer not defined for $(typeof(a))")
@@ -117,7 +118,7 @@ end
 function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
                      p::Ptr{T}, dims::Tuple{Int}; own::Bool = false) where {kind,T}
     ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
-          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, dim[1], own)
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, dims[1], own)
 end
 function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
                      p::Ptr{T}, d::Integer; own::Bool = false) where {kind,T}
@@ -312,8 +313,8 @@ isless(x::Ptr{T}, y::Ptr{T}) where {T} = x < y
 <(x::Ptr,  y::Ptr) = UInt(x) < UInt(y)
 -(x::Ptr,  y::Ptr) = UInt(x) - UInt(y)
 
-+(x::Ptr, y::Integer) = oftype(x, add_ptr(UInt(x), (y % UInt) % UInt))
--(x::Ptr, y::Integer) = oftype(x, sub_ptr(UInt(x), (y % UInt) % UInt))
++(x::Ptr, y::Integer) = add_ptr(x, (y % UInt) % UInt)
+-(x::Ptr, y::Integer) = sub_ptr(x, (y % UInt) % UInt)
 +(x::Integer, y::Ptr) = y + x
 
 unsigned(x::Ptr) = UInt(x)
diff --git a/base/precompilation.jl b/base/precompilation.jl
new file mode 100644
index 0000000000000..d3f076633f386
--- /dev/null
+++ b/base/precompilation.jl
@@ -0,0 +1,1059 @@
+module Precompilation
+
+using Base: PkgId, UUID, SHA1, parsed_toml, project_file_name_uuid, project_names,
+            project_file_manifest_path, get_deps, preferences_names, isaccessibledir, isfile_casesensitive,
+            base_project
+
+# This is currently only used for pkgprecompile but the plan is to use this in code loading in the future
+# see the `kc/codeloading2.0` branch
+struct ExplicitEnv
+    path::String
+    project_deps::Dict{String, UUID} # [deps] in Project.toml
+    project_weakdeps::Dict{String, UUID} # [weakdeps] in Project.toml
+    project_extras::Dict{String, UUID} # [extras] in Project.toml
+    project_extensions::Dict{String, Vector{UUID}} # [exts] in Project.toml
+    deps::Dict{UUID, Vector{UUID}} # all dependencies in Manifest.toml
+    weakdeps::Dict{UUID, Vector{UUID}} # all weak dependencies in Manifest.toml
+    extensions::Dict{UUID, Dict{String, Vector{UUID}}}
+    # Lookup name for a UUID
+    names::Dict{UUID, String}
+    lookup_strategy::Dict{UUID, Union{
+                                      SHA1,     # `git-tree-sha1` entry
+                                      String,   # `path` entry
+                                      Nothing,  # stdlib (no `path` nor `git-tree-sha1`)
+                                      Missing}} # not present in the manifest
+    #prefs::Union{Nothing, Dict{String, Any}}
+    #local_prefs::Union{Nothing, Dict{String, Any}}
+end
+
+function ExplicitEnv(envpath::String=Base.active_project())
+    if !isfile(envpath)
+        error("expected a project file at $(repr(envpath))")
+    end
+    envpath = abspath(envpath)
+    project_d = parsed_toml(envpath)
+
+    # TODO: Perhaps verify that two packages with the same UUID do not have different names?
+    names = Dict{UUID, String}()
+    project_uuid_to_name = Dict{String, UUID}()
+
+    project_deps = Dict{String, UUID}()
+    project_weakdeps = Dict{String, UUID}()
+    project_extras = Dict{String, UUID}()
+
+    # Collect all direct dependencies of the project
+    for key in ["deps", "weakdeps", "extras"]
+        for (name, _uuid) in get(Dict{String, Any}, project_d, key)::Dict{String, Any}
+            v = key == "deps" ? project_deps :
+                key == "weakdeps" ? project_weakdeps :
+                key == "extras" ? project_extras :
+                error()
+            uuid = UUID(_uuid)
+            v[name] = uuid
+            names[UUID(uuid)] = name
+            project_uuid_to_name[name] = UUID(uuid)
+        end
+    end
+
+    # A package in both deps and weakdeps is in fact only a weakdep
+    for (name, _) in project_weakdeps
+        delete!(project_deps, name)
+    end
+
+    # This project might be a package, in that case, that is also a "dependency"
+    # of the project.
+    proj_name = get(project_d, "name", nothing)::Union{String, Nothing}
+    _proj_uuid = get(project_d, "uuid", nothing)::Union{String, Nothing}
+    proj_uuid = _proj_uuid === nothing ? nothing : UUID(_proj_uuid)
+
+    project_is_package = proj_name !== nothing && proj_uuid !== nothing
+    if project_is_package
+        # TODO: Error on missing uuid?
+        project_deps[proj_name] = UUID(proj_uuid)
+        names[UUID(proj_uuid)] = proj_name
+    end
+
+    project_extensions = Dict{String, Vector{UUID}}()
+    # Collect all extensions of the project
+    for (name, triggers::Union{String, Vector{String}}) in get(Dict{String, Any}, project_d, "extensions")::Dict{String, Any}
+        if triggers isa String
+            triggers = [triggers]
+        end
+        uuids = UUID[]
+        for trigger in triggers
+            uuid = get(project_uuid_to_name, trigger, nothing)
+            if uuid === nothing
+                error("Trigger $trigger for extension $name not found in project")
+            end
+            push!(uuids, uuid)
+        end
+        project_extensions[name] = uuids
+    end
+
+    manifest = project_file_manifest_path(envpath)
+    manifest_d = manifest === nothing ? Dict{String, Any}() : parsed_toml(manifest)
+
+    # Dependencies in a manifest can either be stored compressed (when name is unique among all packages)
+    # in which case it is a `Vector{String}` or expanded where it is a `name => uuid` mapping.
+    deps = Dict{UUID, Union{Vector{String}, Vector{UUID}}}()
+    weakdeps = Dict{UUID, Union{Vector{String}, Vector{UUID}}}()
+    extensions = Dict{UUID, Dict{String, Vector{String}}}()
+    name_to_uuid = Dict{String, UUID}()
+    lookup_strategy = Dict{UUID, Union{SHA1, String, Nothing, Missing}}()
+
+    sizehint!(deps, length(manifest_d))
+    sizehint!(weakdeps, length(manifest_d))
+    sizehint!(extensions, length(manifest_d))
+    sizehint!(name_to_uuid, length(manifest_d))
+    sizehint!(lookup_strategy, length(manifest_d))
+
+    for (name, pkg_infos) in get_deps(manifest_d)
+        pkg_infos = pkg_infos::Vector{Any}
+        for pkg_info in pkg_infos
+            m_uuid = UUID(pkg_info["uuid"]::String)
+
+            # If we have multiple packages with the same name we will overwrite things here
+            # but that is fine since we will only use the information in here for packages
+            # with unique names
+            names[m_uuid] = name
+            name_to_uuid[name] = m_uuid
+
+            for key in ["deps", "weakdeps"]
+                deps_pkg = get(Vector{String}, pkg_info, key)::Union{Vector{String}, Dict{String, Any}}
+                d = key == "deps" ? deps :
+                    key == "weakdeps" ? weakdeps :
+                    error()
+
+                # Compressed format with unique names:
+                if deps_pkg isa Vector{String}
+                    d[m_uuid] = deps_pkg
+                # Expanded format:
+                else
+                    uuids = UUID[]
+                    for (name_dep, _dep_uuid::String) in deps_pkg
+                        dep_uuid = UUID(_dep_uuid)
+                        push!(uuids, dep_uuid)
+                        names[dep_uuid] = name_dep
+                    end
+                    d[m_uuid] = uuids
+                end
+            end
+
+            # Extensions
+            deps_pkg = get(Dict{String, Any}, pkg_info, "extensions")::Dict{String, Any}
+            for (ext, triggers) in deps_pkg
+                triggers = triggers::Union{String, Vector{String}}
+                if triggers isa String
+                    triggers = [triggers]
+                end
+                deps_pkg[ext] = triggers
+            end
+            extensions[m_uuid] = deps_pkg
+
+            # Determine strategy to find package
+            lookup_strat = begin
+                if (path = get(pkg_info, "path", nothing)::Union{String, Nothing}) !== nothing
+                    path
+                elseif (git_tree_sha_str = get(pkg_info, "git-tree-sha1", nothing)::Union{String, Nothing}) !== nothing
+                    SHA1(git_tree_sha_str)
+                else
+                    nothing
+                end
+            end
+            lookup_strategy[m_uuid] = lookup_strat
+        end
+    end
+
+    # No matter if the deps were stored compressed or not in the manifest,
+    # we internally store them expanded
+    deps_expanded = Dict{UUID, Vector{UUID}}()
+    weakdeps_expanded = Dict{UUID, Vector{UUID}}()
+    extensions_expanded = Dict{UUID, Dict{String, Vector{UUID}}}()
+    sizehint!(deps_expanded, length(deps))
+    sizehint!(weakdeps_expanded, length(deps))
+    sizehint!(extensions_expanded, length(deps))
+
+    if proj_name !== nothing && proj_uuid !== nothing
+        deps_expanded[proj_uuid] = filter!(!=(proj_uuid), collect(values(project_deps)))
+        extensions_expanded[proj_uuid] = project_extensions
+        path = get(project_d, "path", nothing)
+        entry_point = path !== nothing ? path : dirname(envpath)
+        lookup_strategy[proj_uuid] = entry_point
+    end
+
+    for key in ["deps", "weakdeps"]
+        d = key == "deps" ? deps :
+            key == "weakdeps" ? weakdeps :
+            error()
+        d_expanded = key == "deps" ? deps_expanded :
+                     key == "weakdeps" ? weakdeps_expanded :
+                     error()
+        for (pkg, deps) in d
+            # dependencies was already expanded so use it directly:
+            if deps isa Vector{UUID}
+                d_expanded[pkg] = deps
+                for dep in deps
+                    name_to_uuid[names[dep]] = dep
+                end
+            # find the (unique) UUID associated with the name
+            else
+                deps_pkg = UUID[]
+                sizehint!(deps_pkg, length(deps))
+                for dep in deps
+                    push!(deps_pkg, name_to_uuid[dep])
+                end
+                d_expanded[pkg] = deps_pkg
+            end
+        end
+    end
+
+    for (pkg, exts) in extensions
+        exts_expanded = Dict{String, Vector{UUID}}()
+        for (ext, triggers) in exts
+            triggers_expanded = UUID[]
+            sizehint!(triggers_expanded, length(triggers))
+            for trigger in triggers
+                push!(triggers_expanded, name_to_uuid[trigger])
+            end
+            exts_expanded[ext] = triggers_expanded
+        end
+        extensions_expanded[pkg] = exts_expanded
+    end
+
+    # Everything that does not yet have a lookup_strategy is missing from the manifest
+    for (_, uuid) in project_deps
+        get!(lookup_strategy, uuid, missing)
+    end
+
+    #=
+    # Preferences:
+    prefs = get(project_d, "preferences", nothing)
+
+    # `(Julia)LocalPreferences.toml`
+    project_dir = dirname(envpath)
+    local_prefs = nothing
+    for name in preferences_names
+        toml_path = joinpath(project_dir, name)
+        if isfile(toml_path)
+            local_prefs = parsed_toml(toml_path)
+            break
+        end
+    end
+    =#
+
+    return ExplicitEnv(envpath, project_deps, project_weakdeps, project_extras,
+                       project_extensions, deps_expanded, weakdeps_expanded, extensions_expanded,
+                       names, lookup_strategy, #=prefs, local_prefs=#)
+end
+
+## PROGRESS BAR
+
+# using Printf
+Base.@kwdef mutable struct MiniProgressBar
+    max::Int = 1.0
+    header::String = ""
+    color::Symbol = :nothing
+    width::Int = 40
+    current::Int = 0.0
+    prev::Int = 0.0
+    has_shown::Bool = false
+    time_shown::Float64 = 0.0
+    percentage::Bool = true
+    always_reprint::Bool = false
+    indent::Int = 4
+end
+
+const PROGRESS_BAR_TIME_GRANULARITY = Ref(1 / 30.0) # 30 fps
+const PROGRESS_BAR_PERCENTAGE_GRANULARITY = Ref(0.1)
+
+function start_progress(io::IO, _::MiniProgressBar)
+    ansi_disablecursor = "\e[?25l"
+    print(io, ansi_disablecursor)
+end
+
+function show_progress(io::IO, p::MiniProgressBar; termwidth=nothing, carriagereturn=true)
+    if p.max == 0
+        perc = 0.0
+        prev_perc = 0.0
+    else
+        perc = p.current / p.max * 100
+        prev_perc = p.prev / p.max * 100
+    end
+    # Bail early if we are not updating the progress bar,
+    # Saves printing to the terminal
+    if !p.always_reprint && p.has_shown && !((perc - prev_perc) > PROGRESS_BAR_PERCENTAGE_GRANULARITY[])
+        return
+    end
+    t = time()
+    if !p.always_reprint && p.has_shown && (t - p.time_shown) < PROGRESS_BAR_TIME_GRANULARITY[]
+        return
+    end
+    p.time_shown = t
+    p.prev = p.current
+    p.has_shown = true
+
+    progress_text = if false # p.percentage
+        # @sprintf "%2.1f %%" perc
+    else
+        string(p.current, "/",  p.max)
+    end
+    termwidth = @something termwidth displaysize(io)[2]
+    max_progress_width = max(0, min(termwidth - textwidth(p.header) - textwidth(progress_text) - 10 , p.width))
+    n_filled = ceil(Int, max_progress_width * perc / 100)
+    n_left = max_progress_width - n_filled
+    headers = split(p.header, ' ')
+    to_print = sprint(; context=io) do io
+        print(io, " "^p.indent)
+        printstyled(io, headers[1], " "; color=:green, bold=true)
+        printstyled(io, join(headers[2:end], ' '))
+        print(io, " ")
+        printstyled(io, "━"^n_filled; color=p.color)
+        printstyled(io, perc >= 95 ? "━" : "╸"; color=p.color)
+        printstyled(io, "━"^n_left, " "; color=:light_black)
+        print(io, progress_text)
+        carriagereturn && print(io, "\r")
+    end
+    # Print everything in one call
+    print(io, to_print)
+end
+
+function end_progress(io, p::MiniProgressBar)
+    ansi_enablecursor = "\e[?25h"
+    ansi_clearline = "\e[2K"
+    print(io, ansi_enablecursor * ansi_clearline)
+end
+
+function print_progress_bottom(io::IO)
+    ansi_clearline = "\e[2K"
+    ansi_movecol1 = "\e[1G"
+    ansi_moveup(n::Int) = string("\e[", n, "A")
+    print(io, "\e[S" * ansi_moveup(1) * ansi_clearline * ansi_movecol1)
+end
+
+
+############
+struct PkgPrecompileError <: Exception
+    msg::String
+end
+Base.showerror(io::IO, err::PkgPrecompileError) = print(io, err.msg)
+Base.showerror(io::IO, err::PkgPrecompileError, bt; kw...) = Base.showerror(io, err) # hide stacktrace
+
+# This needs a show method to make `julia> err` show nicely
+Base.show(io::IO, err::PkgPrecompileError) = print(io, "PkgPrecompileError: ", err.msg)
+
+import Base: StaleCacheKey
+
+can_fancyprint(io::IO) = io isa Base.TTY && (get(ENV, "CI", nothing) != "true")
+
+function printpkgstyle(io, header, msg; color=:green)
+    printstyled(io, header; color, bold=true)
+    println(io, " ", msg)
+end
+
+const Config = Pair{Cmd, Base.CacheFlags}
+const PkgConfig = Tuple{Base.PkgId,Config}
+
+function precompilepkgs(pkgs::Vector{String}=String[];
+                        internal_call::Bool=false,
+                        strict::Bool = false,
+                        warn_loaded::Bool = true,
+                        timing::Bool = false,
+                        _from_loading::Bool=false,
+                        configs::Union{Config,Vector{Config}}=(``=>Base.CacheFlags()),
+                        io::IO=stderr,
+                        # asking for timing disables fancy mode, as timing is shown in non-fancy mode
+                        fancyprint::Bool = can_fancyprint(io) && !timing,
+                        manifest::Bool=false,)
+
+    configs = configs isa Config ? [configs] : configs
+    requested_pkgs = copy(pkgs) # for understanding user intent
+
+    time_start = time_ns()
+
+    env = ExplicitEnv()
+
+    # Windows sometimes hits a ReadOnlyMemoryError, so we halve the default number of tasks. Issue #2323
+    # TODO: Investigate why this happens in windows and restore the full task limit
+    default_num_tasks = Sys.iswindows() ? div(Sys.CPU_THREADS::Int, 2) + 1 : Sys.CPU_THREADS::Int + 1
+    default_num_tasks = min(default_num_tasks, 16) # limit for better stability on shared resource systems
+
+    num_tasks = parse(Int, get(ENV, "JULIA_NUM_PRECOMPILE_TASKS", string(default_num_tasks)))
+    parallel_limiter = Base.Semaphore(num_tasks)
+
+    if _from_loading && !Sys.isinteractive() && Base.get_bool_env("JULIA_TESTS", false)
+        # suppress passive loading printing in julia test suite. `JULIA_TESTS` is set in Base.runtests
+        io = devnull
+    end
+
+    hascolor = get(io, :color, false)::Bool
+    color_string(cstr::String, col::Union{Int64, Symbol}) = _color_string(cstr, col, hascolor)
+
+    stale_cache = Dict{StaleCacheKey, Bool}()
+    exts = Dict{Base.PkgId, String}() # ext -> parent
+    # make a flat map of each dep and its direct deps
+    depsmap = Dict{Base.PkgId, Vector{Base.PkgId}}()
+    pkg_exts_map = Dict{Base.PkgId, Vector{Base.PkgId}}()
+
+    for (dep, deps) in env.deps
+        pkg = Base.PkgId(dep, env.names[dep])
+        Base.in_sysimage(pkg) && continue
+        deps = [Base.PkgId(x, env.names[x]) for x in deps]
+        depsmap[pkg] = filter!(!Base.in_sysimage, deps)
+        # add any extensions
+        pkg_exts = Dict{Base.PkgId, Vector{Base.PkgId}}()
+        for (ext_name, extdep_uuids) in env.extensions[dep]
+            ext_deps = Base.PkgId[]
+            push!(ext_deps, pkg) # depends on parent package
+            all_extdeps_available = true
+            for extdep_uuid in extdep_uuids
+                extdep_name = env.names[extdep_uuid]
+                if extdep_uuid in keys(env.deps)
+                    push!(ext_deps, Base.PkgId(extdep_uuid, extdep_name))
+                else
+                    all_extdeps_available = false
+                    break
+                end
+            end
+            all_extdeps_available || continue
+            ext_uuid = Base.uuid5(pkg.uuid, ext_name)
+            ext = Base.PkgId(ext_uuid, ext_name)
+            filter!(!Base.in_sysimage, ext_deps)
+            depsmap[ext] = ext_deps
+            exts[ext] = pkg.name
+            pkg_exts[ext] = ext_deps
+        end
+        if !isempty(pkg_exts)
+            pkg_exts_map[pkg] = collect(keys(pkg_exts))
+        end
+    end
+
+    direct_deps = [
+        Base.PkgId(uuid, name)
+        for (name, uuid) in env.project_deps if !Base.in_sysimage(Base.PkgId(uuid, name))
+    ]
+
+    # consider exts of direct deps to be direct deps so that errors are reported
+    append!(direct_deps, keys(filter(d->last(d) in keys(env.project_deps), exts)))
+
+    # An extension effectively depends on another extension if it has all the the
+    # dependencies of that other extension
+    function expand_dependencies(depsmap)
+        function visit!(visited, node, all_deps)
+            if node in visited
+                return
+            end
+            push!(visited, node)
+            for dep in get(Set{Base.PkgId}, depsmap, node)
+                if !(dep in all_deps)
+                    push!(all_deps, dep)
+                    visit!(visited, dep, all_deps)
+                end
+            end
+        end
+
+        depsmap_transitive = Dict{Base.PkgId, Set{Base.PkgId}}()
+        for package in keys(depsmap)
+            # Initialize a set to keep track of all dependencies for 'package'
+            all_deps = Set{Base.PkgId}()
+            visited = Set{Base.PkgId}()
+            visit!(visited, package, all_deps)
+            # Update depsmap with the complete set of dependencies for 'package'
+            depsmap_transitive[package] = all_deps
+        end
+        return depsmap_transitive
+    end
+
+    depsmap_transitive = expand_dependencies(depsmap)
+
+    for (_, extensions_1) in pkg_exts_map
+        for extension_1 in extensions_1
+            deps_ext_1 = depsmap_transitive[extension_1]
+            for (_, extensions_2) in pkg_exts_map
+                for extension_2 in extensions_2
+                    extension_1 == extension_2 && continue
+                    deps_ext_2 = depsmap_transitive[extension_2]
+                    if issubset(deps_ext_2, deps_ext_1)
+                        push!(depsmap[extension_1], extension_2)
+                    end
+                end
+            end
+        end
+    end
+
+    @debug "precompile: deps collected"
+    # this loop must be run after the full depsmap has been populated
+    for (pkg, pkg_exts) in pkg_exts_map
+        # find any packages that depend on the extension(s)'s deps and replace those deps in their deps list with the extension(s),
+        # basically injecting the extension into the precompile order in the graph, to avoid race to precompile extensions
+        for (_pkg, deps) in depsmap # for each manifest dep
+            if !in(_pkg, keys(exts)) && pkg in deps # if not an extension and depends on pkg
+                append!(deps, pkg_exts) # add the package extensions to deps
+                filter!(!isequal(pkg), deps) # remove the pkg from deps
+            end
+        end
+    end
+    @debug "precompile: extensions collected"
+
+    # return early if no deps
+    if isempty(depsmap)
+        if isempty(pkgs)
+            return
+        elseif _from_loading
+            # if called from loading precompilation it may be a package from another environment stack so
+            # don't error and allow serial precompilation to try
+            # TODO: actually handle packages from other envs in the stack
+            return
+        else
+            error("No direct dependencies outside of the sysimage found matching $(pkgs)")
+        end
+    end
+
+    # initialize signalling
+    started = Dict{PkgConfig,Bool}()
+    was_processed = Dict{PkgConfig,Base.Event}()
+    was_recompiled = Dict{PkgConfig,Bool}()
+    for config in configs
+        for pkgid in keys(depsmap)
+            pkg_config = (pkgid, config)
+            started[pkg_config] = false
+            was_processed[pkg_config] = Base.Event()
+            was_recompiled[pkg_config] = false
+        end
+    end
+    @debug "precompile: signalling initialized"
+
+
+    # find and guard against circular deps
+    circular_deps = Base.PkgId[]
+    # Three states
+    # !haskey -> never visited
+    # true -> cannot be compiled due to a cycle (or not yet determined)
+    # false -> not depending on a cycle
+    could_be_cycle = Dict{Base.PkgId, Bool}()
+    function scan_pkg!(pkg, dmap)
+        did_visit_dep = true
+        inpath = get!(could_be_cycle, pkg) do
+            did_visit_dep = false
+            return true
+        end
+        if did_visit_dep ? inpath : scan_deps!(pkg, dmap)
+            # Found a cycle. Delete this and all parents
+            return true
+        end
+        return false
+    end
+    function scan_deps!(pkg, dmap)
+        for dep in dmap[pkg]
+            scan_pkg!(dep, dmap) && return true
+        end
+        could_be_cycle[pkg] = false
+        return false
+    end
+    for pkg in keys(depsmap)
+        if scan_pkg!(pkg, depsmap)
+            push!(circular_deps, pkg)
+            for pkg_config in keys(was_processed)
+                # notify all to allow skipping
+                pkg_config[1] == pkg && notify(was_processed[pkg_config])
+            end
+        end
+    end
+    if !isempty(circular_deps)
+        @warn """Circular dependency detected. Precompilation will be skipped for:\n  $(join(string.(circular_deps), "\n  "))"""
+    end
+    @debug "precompile: circular dep check done"
+
+    if !manifest
+        if isempty(pkgs)
+            pkgs = [pkg.name for pkg in direct_deps]
+        end
+        # restrict to dependencies of given packages
+        function collect_all_deps(depsmap, dep, alldeps=Set{Base.PkgId}())
+            for _dep in depsmap[dep]
+                if !(_dep in alldeps)
+                    push!(alldeps, _dep)
+                    collect_all_deps(depsmap, _dep, alldeps)
+                end
+            end
+            return alldeps
+        end
+        keep = Set{Base.PkgId}()
+        for dep in depsmap
+            dep_pkgid = first(dep)
+            if dep_pkgid.name in pkgs
+                push!(keep, dep_pkgid)
+                collect_all_deps(depsmap, dep_pkgid, keep)
+            end
+        end
+        for ext in keys(exts)
+            if issubset(collect_all_deps(depsmap, ext), keep) # if all extension deps are kept
+                push!(keep, ext)
+            end
+        end
+        filter!(d->in(first(d), keep), depsmap)
+        if isempty(depsmap)
+            if _from_loading
+                # if called from loading precompilation it may be a package from another environment stack so
+                # don't error and allow serial precompilation to try
+                # TODO: actually handle packages from other envs in the stack
+                return
+            else
+                return
+            end
+        end
+    end
+
+    nconfigs = length(configs)
+    target = nothing
+    if nconfigs == 1
+        if !isempty(only(configs)[1])
+            target = "for configuration $(join(only(configs)[1], " "))"
+        end
+    else
+        target = "for $nconfigs compilation configurations..."
+    end
+    @debug "precompile: packages filtered"
+
+    pkg_queue = PkgConfig[]
+    failed_deps = Dict{PkgConfig, String}()
+    precomperr_deps = PkgConfig[] # packages that may succeed after a restart (i.e. loaded packages with no cache file)
+
+    print_lock = io isa Base.LibuvStream ? io.lock::ReentrantLock : ReentrantLock()
+    first_started = Base.Event()
+    printloop_should_exit::Bool = !fancyprint # exit print loop immediately if not fancy printing
+    interrupted_or_done = Base.Event()
+
+    ansi_moveup(n::Int) = string("\e[", n, "A")
+    ansi_movecol1 = "\e[1G"
+    ansi_cleartoend = "\e[0J"
+    ansi_cleartoendofline = "\e[0K"
+    ansi_enablecursor = "\e[?25h"
+    ansi_disablecursor = "\e[?25l"
+    n_done::Int = 0
+    n_already_precomp::Int = 0
+    n_loaded::Int = 0
+    interrupted = false
+
+    function handle_interrupt(err, in_printloop = false)
+        notify(interrupted_or_done)
+        in_printloop || wait(t_print) # wait to let the print loop cease first
+        if err isa InterruptException
+            lock(print_lock) do
+                println(io, " Interrupted: Exiting precompilation...", ansi_cleartoendofline)
+            end
+            interrupted = true
+            return true
+        else
+            return false
+        end
+    end
+    std_outputs = Dict{PkgConfig,String}()
+    taskwaiting = Set{PkgConfig}()
+    pkgspidlocked = Dict{PkgConfig,String}()
+    pkg_liveprinted = nothing
+
+    function monitor_std(pkg_config, pipe; single_requested_pkg=false)
+        pkg, config = pkg_config
+        try
+            liveprinting = false
+            while !eof(pipe)
+                str = readline(pipe, keep=true)
+                if single_requested_pkg && (liveprinting || !isempty(str))
+                    lock(print_lock) do
+                        if !liveprinting
+                            printpkgstyle(io, :Info, "Given $(pkg.name) was explicitly requested, output will be shown live $ansi_cleartoendofline",
+                                color = Base.info_color())
+                            liveprinting = true
+                            pkg_liveprinted = pkg
+                        end
+                        print(io, ansi_cleartoendofline, str)
+                    end
+                end
+                std_outputs[pkg_config] = string(get(std_outputs, pkg_config, ""), str)
+                if !in(pkg_config, taskwaiting) && occursin("waiting for IO to finish", str)
+                    !fancyprint && lock(print_lock) do
+                        println(io, pkg.name, color_string(" Waiting for background task / IO / timer.", Base.warn_color()))
+                    end
+                    push!(taskwaiting, pkg_config)
+                end
+                if !fancyprint && in(pkg_config, taskwaiting)
+                    lock(print_lock) do
+                        print(io, str)
+                    end
+                end
+            end
+        catch err
+            err isa InterruptException || rethrow()
+        end
+    end
+
+    ## fancy print loop
+    t_print = @async begin
+        try
+            wait(first_started)
+            (isempty(pkg_queue) || interrupted_or_done.set) && return
+            lock(print_lock) do
+                if target !== nothing
+                    printpkgstyle(io, :Precompiling, target)
+                end
+                if fancyprint
+                    print(io, ansi_disablecursor)
+                end
+            end
+            t = Timer(0; interval=1/10)
+            anim_chars = ["◐","◓","◑","◒"]
+            i = 1
+            last_length = 0
+            bar = MiniProgressBar(; indent=0, header = "Precompiling packages ", color = :green, percentage=false, always_reprint=true)
+            n_total = length(depsmap) * length(configs)
+            bar.max = n_total - n_already_precomp
+            final_loop = false
+            n_print_rows = 0
+            while !printloop_should_exit
+                lock(print_lock) do
+                    term_size = Base.displaysize(io)::Tuple{Int,Int}
+                    num_deps_show = max(term_size[1] - 3, 2) # show at least 2 deps
+                    pkg_queue_show = if !interrupted_or_done.set && length(pkg_queue) > num_deps_show
+                        last(pkg_queue, num_deps_show)
+                    else
+                        pkg_queue
+                    end
+                    str_ = sprint() do iostr
+                        if i > 1
+                            print(iostr, ansi_cleartoend)
+                        end
+                        bar.current = n_done - n_already_precomp
+                        bar.max = n_total - n_already_precomp
+                        # when sizing to the terminal width subtract a little to give some tolerance to resizing the
+                        # window between print cycles
+                        termwidth = displaysize(io)[2] - 4
+                        if !final_loop
+                            str = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io)
+                            print(iostr, Base._truncate_at_width_or_chars(true, str, termwidth), "\n")
+                        end
+                        for pkg_config in pkg_queue_show
+                            dep, config = pkg_config
+                            loaded = warn_loaded && haskey(Base.loaded_modules, dep)
+                            _name = haskey(exts, dep) ? string(exts[dep], " → ", dep.name) : dep.name
+                            name = dep in direct_deps ? _name : string(color_string(_name, :light_black))
+                            if nconfigs > 1 && !isempty(config[1])
+                                config_str = "$(join(config[1], " "))"
+                                name *= color_string(" $(config_str)", :light_black)
+                            end
+                            line = if pkg_config in precomperr_deps
+                                string(color_string("  ? ", Base.warn_color()), name)
+                            elseif haskey(failed_deps, pkg_config)
+                                string(color_string("  ✗ ", Base.error_color()), name)
+                            elseif was_recompiled[pkg_config]
+                                !loaded && interrupted_or_done.set && continue
+                                loaded || @async begin # keep successful deps visible for short period
+                                    sleep(1);
+                                    filter!(!isequal(pkg_config), pkg_queue)
+                                end
+                                string(color_string("  ✓ ", loaded ? Base.warn_color() : :green), name)
+                            elseif started[pkg_config]
+                                # Offset each spinner animation using the first character in the package name as the seed.
+                                # If not offset, on larger terminal fonts it looks odd that they all sync-up
+                                anim_char = anim_chars[(i + Int(dep.name[1])) % length(anim_chars) + 1]
+                                anim_char_colored = dep in direct_deps ? anim_char : color_string(anim_char, :light_black)
+                                waiting = if haskey(pkgspidlocked, pkg_config)
+                                    who_has_lock = pkgspidlocked[pkg_config]
+                                    color_string(" Being precompiled by $(who_has_lock)", Base.info_color())
+                                elseif pkg_config in taskwaiting
+                                    color_string(" Waiting for background task / IO / timer. Interrupt to inspect", Base.warn_color())
+                                else
+                                    ""
+                                end
+                                string("  ", anim_char_colored, " ", name, waiting)
+                            else
+                                string("    ", name)
+                            end
+                            println(iostr, Base._truncate_at_width_or_chars(true, line, termwidth))
+                        end
+                    end
+                    last_length = length(pkg_queue_show)
+                    n_print_rows = count("\n", str_)
+                    print(io, str_)
+                    printloop_should_exit = interrupted_or_done.set && final_loop
+                    final_loop = interrupted_or_done.set # ensures one more loop to tidy last task after finish
+                    i += 1
+                    printloop_should_exit || print(io, ansi_moveup(n_print_rows), ansi_movecol1)
+                end
+                wait(t)
+            end
+        catch err
+            handle_interrupt(err, true) || rethrow()
+        finally
+            fancyprint && print(io, ansi_enablecursor)
+        end
+    end
+    tasks = Task[]
+    if !_from_loading
+        Base.LOADING_CACHE[] = Base.LoadingCache()
+    end
+    @debug "precompile: starting precompilation loop" depsmap direct_deps
+    ## precompilation loop
+
+    for (pkg, deps) in depsmap
+        cachepaths = Base.find_all_in_cache_path(pkg)
+        sourcepath = Base.locate_package(pkg)
+        single_requested_pkg = length(requested_pkgs) == 1 && only(requested_pkgs) == pkg.name
+        for config in configs
+            pkg_config = (pkg, config)
+            if sourcepath === nothing
+                failed_deps[pkg_config] = "Error: Missing source file for $(pkg)"
+                notify(was_processed[pkg_config])
+                continue
+            end
+            # Heuristic for when precompilation is disabled
+            if occursin(r"\b__precompile__\(\s*false\s*\)", read(sourcepath, String))
+                notify(was_processed[pkg_config])
+                continue
+            end
+            flags, cacheflags = config
+            task = @async begin
+                try
+                    loaded = haskey(Base.loaded_modules, pkg)
+                    for dep in deps # wait for deps to finish
+                        wait(was_processed[(dep,config)])
+                    end
+                    circular = pkg in circular_deps
+                    is_stale = !Base.isprecompiled(pkg; ignore_loaded=true, stale_cache, cachepaths, sourcepath, flags=cacheflags)
+                    if !circular && is_stale
+                        Base.acquire(parallel_limiter)
+                        is_direct_dep = pkg in direct_deps
+
+                        # std monitoring
+                        std_pipe = Base.link_pipe!(Pipe(); reader_supports_async=true, writer_supports_async=true)
+                        t_monitor = @async monitor_std(pkg_config, std_pipe; single_requested_pkg)
+
+                        _name = haskey(exts, pkg) ? string(exts[pkg], " → ", pkg.name) : pkg.name
+                        name = is_direct_dep ? _name : string(color_string(_name, :light_black))
+                        if nconfigs > 1 && !isempty(flags)
+                            config_str = "$(join(flags, " "))"
+                            name *= color_string(" $(config_str)", :light_black)
+                        end
+                        lock(print_lock) do
+                            if !fancyprint && target === nothing && isempty(pkg_queue)
+                                printpkgstyle(io, :Precompiling, "packages...")
+                            end
+                        end
+                        push!(pkg_queue, pkg_config)
+                        started[pkg_config] = true
+                        fancyprint && notify(first_started)
+                        if interrupted_or_done.set
+                            notify(was_processed[pkg_config])
+                            Base.release(parallel_limiter)
+                            return
+                        end
+                        try
+                            # allows processes to wait if another process is precompiling a given package to
+                            # a functionally identical package cache (except for preferences, which may differ)
+                            t = @elapsed ret = precompile_pkgs_maybe_cachefile_lock(io, print_lock, fancyprint, pkg_config, pkgspidlocked, hascolor) do
+                                Base.with_logger(Base.NullLogger()) do
+                                    # The false here means we ignore loaded modules, so precompile for a fresh session
+                                    Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags)
+                                end
+                            end
+                            if ret isa Base.PrecompilableError
+                                push!(precomperr_deps, pkg_config)
+                                !fancyprint && lock(print_lock) do
+                                    println(io, _timing_string(t), color_string("  ? ", Base.warn_color()), name)
+                                end
+                            else
+                                !fancyprint && lock(print_lock) do
+                                    println(io, _timing_string(t), color_string("  ✓ ", loaded ? Base.warn_color() : :green), name)
+                                end
+                                was_recompiled[pkg_config] = true
+                            end
+                            loaded && (n_loaded += 1)
+                        catch err
+                            # @show err
+                            close(std_pipe.in) # close pipe to end the std output monitor
+                            wait(t_monitor)
+                            if err isa ErrorException || (err isa ArgumentError && startswith(err.msg, "Invalid header in cache file"))
+                                failed_deps[pkg_config] = (strict || is_direct_dep) ? string(sprint(showerror, err), "\n", strip(get(std_outputs, pkg_config, ""))) : ""
+                                delete!(std_outputs, pkg_config) # so it's not shown as warnings, given error report
+                                !fancyprint && lock(print_lock) do
+                                    println(io, " "^9, color_string("  ✗ ", Base.error_color()), name)
+                                end
+                            else
+                                rethrow()
+                            end
+                        finally
+                            isopen(std_pipe.in) && close(std_pipe.in) # close pipe to end the std output monitor
+                            wait(t_monitor)
+                            Base.release(parallel_limiter)
+                        end
+                    else
+                        is_stale || (n_already_precomp += 1)
+                    end
+                    n_done += 1
+                    notify(was_processed[pkg_config])
+                catch err_outer
+                    # For debugging:
+                    # println("Task failed $err_outer")
+                    # Base.display_error(ErrorException(""), Base.catch_backtrace())# logging doesn't show here
+                    handle_interrupt(err_outer) || rethrow()
+                    notify(was_processed[pkg_config])
+                finally
+                    filter!(!istaskdone, tasks)
+                    length(tasks) == 1 && notify(interrupted_or_done)
+                end
+            end
+            Base.errormonitor(task) # interrupts are handled separately so ok to watch for other errors like this
+            push!(tasks, task)
+        end
+    end
+    isempty(tasks) && notify(interrupted_or_done)
+    try
+        wait(interrupted_or_done)
+    catch err
+        handle_interrupt(err) || rethrow()
+    finally
+        Base.LOADING_CACHE[] = nothing
+    end
+    notify(first_started) # in cases of no-op or !fancyprint
+    fancyprint && wait(t_print)
+    quick_exit = !all(istaskdone, tasks) || interrupted # if some not finished internal error is likely
+    seconds_elapsed = round(Int, (time_ns() - time_start) / 1e9)
+    ndeps = count(values(was_recompiled))
+    if ndeps > 0 || !isempty(failed_deps) || (quick_exit && !isempty(std_outputs))
+        str = sprint(context=io) do iostr
+            if !quick_exit
+                if fancyprint # replace the progress bar
+                    what = isempty(requested_pkgs) ? "packages finished." : "$(join(requested_pkgs, ", ", " and ")) finished."
+                    printpkgstyle(iostr, :Precompiling, what)
+                end
+                plural = length(configs) > 1 ? "dependency configurations" : ndeps == 1 ? "dependency" : "dependencies"
+                print(iostr, "  $(ndeps) $(plural) successfully precompiled in $(seconds_elapsed) seconds")
+                if n_already_precomp > 0 || !isempty(circular_deps)
+                    n_already_precomp > 0 && (print(iostr, ". $n_already_precomp already precompiled"))
+                    !isempty(circular_deps) && (print(iostr, ". $(length(circular_deps)) skipped due to circular dependency"))
+                    print(iostr, ".")
+                end
+                if n_loaded > 0
+                    plural1 = length(configs) > 1 ? "dependency configurations" : n_loaded == 1 ? "dependency" : "dependencies"
+                    plural2 = n_loaded == 1 ? "a different version is" : "different versions are"
+                    plural3 = n_loaded == 1 ? "" : "s"
+                    print(iostr, "\n  ",
+                        color_string(string(n_loaded), Base.warn_color()),
+                        " $(plural1) precompiled but $(plural2) currently loaded. Restart julia to access the new version$(plural3)"
+                    )
+                end
+                if !isempty(precomperr_deps)
+                    pluralpc = length(configs) > 1 ? "dependency configurations" : precomperr_deps == 1 ? "dependency" : "dependencies"
+                    print(iostr, "\n  ",
+                        color_string(string(length(precomperr_deps)), Base.warn_color()),
+                        " $(pluralpc) failed but may be precompilable after restarting julia"
+                    )
+                end
+            end
+            # show any stderr output, even if Pkg.precompile has been interrupted (quick_exit=true), given user may be
+            # interrupting a hanging precompile job with stderr output. julia#48371
+            filter!(kv -> !isempty(strip(last(kv))), std_outputs) # remove empty output
+            if !isempty(std_outputs)
+                plural1 = length(std_outputs) == 1 ? "y" : "ies"
+                plural2 = length(std_outputs) == 1 ? "" : "s"
+                print(iostr, "\n  ", color_string("$(length(std_outputs))", Base.warn_color()), " dependenc$(plural1) had output during precompilation:")
+                for (pkg_config, err) in std_outputs
+                    pkg, config = pkg_config
+                    err = if pkg == pkg_liveprinted
+                        "[Output was shown above]"
+                    else
+                        join(split(strip(err), "\n"), color_string("\n│  ", Base.warn_color()))
+                    end
+                    name = haskey(exts, pkg) ? string(exts[pkg], " → ", pkg.name) : pkg.name
+                    print(iostr, color_string("\n┌ ", Base.warn_color()), name, color_string("\n│  ", Base.warn_color()), err, color_string("\n└  ", Base.warn_color()))
+                end
+            end
+        end
+        let str=str
+            lock(print_lock) do
+                println(io, str)
+            end
+        end
+        quick_exit && return
+        err_str = ""
+        n_direct_errs = 0
+        for (pkg_config, err) in failed_deps
+            dep, config = pkg_config
+            if strict || (dep in direct_deps)
+                config_str = isempty(config[1]) ? "" : "$(join(config[1], " ")) "
+                err_str = string(err_str, "\n$(dep.name) $config_str\n\n$err", (n_direct_errs > 0 ? "\n" : ""))
+                n_direct_errs += 1
+            end
+        end
+        if err_str != ""
+            pluralde = n_direct_errs == 1 ? "y" : "ies"
+            direct = strict ? "" : "direct "
+            err_msg = "The following $n_direct_errs $(direct)dependenc$(pluralde) failed to precompile:\n$(err_str[1:end-1])"
+            if internal_call # aka. auto-precompilation
+                if isinteractive() && !get(ENV, "CI", false)
+                    plural1 = length(failed_deps) == 1 ? "y" : "ies"
+                    println(io, "  ", color_string("$(length(failed_deps))", Base.error_color()), " dependenc$(plural1) errored.")
+                    println(io, "  For a report of the errors see `julia> err`. To retry use `pkg> precompile`")
+                    setglobal!(Base.MainInclude, :err, PkgPrecompileError(err_msg))
+                else
+                    # auto-precompilation shouldn't throw but if the user can't easily access the
+                    # error messages, just show them
+                    print(io, "\n", err_msg)
+                end
+            else
+                println(io)
+                throw(PkgPrecompileError(err_msg))
+            end
+        end
+    end
+    nothing
+end
+
+_timing_string(t) = string(lpad(round(t * 1e3, digits = 1), 9), " ms")
+
+function _color_string(cstr::String, col::Union{Int64, Symbol}, hascolor)
+    if hascolor
+        enable_ansi  = get(Base.text_colors, col, Base.text_colors[:default])
+        disable_ansi = get(Base.disable_text_style, col, Base.text_colors[:default])
+        return string(enable_ansi, cstr, disable_ansi)
+    else
+        return cstr
+    end
+end
+
+# Can be merged with `maybe_cachefile_lock` in loading?
+function precompile_pkgs_maybe_cachefile_lock(f, io::IO, print_lock::ReentrantLock, fancyprint::Bool, pkg_config, pkgspidlocked, hascolor)
+    pkg, config = pkg_config
+    flags, cacheflags = config
+    FileWatching = Base.loaded_modules[Base.PkgId(Base.UUID("7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"), "FileWatching")]
+    stale_age = Base.compilecache_pidlock_stale_age
+    pidfile = Base.compilecache_pidfile_path(pkg, flags=cacheflags)
+    cachefile = FileWatching.trymkpidlock(f, pidfile; stale_age)
+    if cachefile === false
+        pid, hostname, age = FileWatching.Pidfile.parse_pidfile(pidfile)
+        pkgspidlocked[pkg_config] = if isempty(hostname) || hostname == gethostname()
+            if pid == getpid()
+                "an async task in this process (pidfile: $pidfile)"
+            else
+                "another process (pid: $pid, pidfile: $pidfile)"
+            end
+        else
+            "another machine (hostname: $hostname, pid: $pid, pidfile: $pidfile)"
+        end
+        !fancyprint && lock(print_lock) do
+            println(io, "    ", pkg.name, _color_string(" Being precompiled by $(pkgspidlocked[pkg_config])", Base.info_color(), hascolor))
+        end
+        # wait until the lock is available
+        FileWatching.mkpidlock(pidfile; stale_age) do
+            # double-check in case the other process crashed or the lock expired
+            if Base.isprecompiled(pkg; ignore_loaded=true, flags=cacheflags) # don't use caches for this as the env state will have changed
+                return nothing # returning nothing indicates a process waited for another
+            else
+                delete!(pkgspidlocked, pkg_config)
+                return f() # precompile
+            end
+        end
+    end
+    return cachefile
+end
+
+end
diff --git a/base/promotion.jl b/base/promotion.jl
index 1d4fea8c404eb..689a4e4be8f39 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -19,7 +19,8 @@ Number
 """
 typejoin() = Bottom
 typejoin(@nospecialize(t)) = (@_nospecializeinfer_meta; t)
-typejoin(@nospecialize(t), ts...) = (@_foldable_meta; @_nospecializeinfer_meta; typejoin(t, typejoin(ts...)))
+typejoin(@nospecialize(t), @nospecialize(s), @nospecialize(u)) = (@_foldable_meta; @_nospecializeinfer_meta; typejoin(typejoin(t, s), u))
+typejoin(@nospecialize(t), @nospecialize(s), @nospecialize(u), ts...) = (@_foldable_meta; @_nospecializeinfer_meta; afoldl(typejoin, typejoin(t, s, u), ts...))
 function typejoin(@nospecialize(a), @nospecialize(b))
     @_foldable_meta
     @_nospecializeinfer_meta
@@ -299,7 +300,8 @@ function promote_type end
 
 promote_type()  = Bottom
 promote_type(T) = T
-promote_type(T, S, U, V...) = (@inline; promote_type(T, promote_type(S, U, V...)))
+promote_type(T, S, U) = (@inline; promote_type(promote_type(T, S), U))
+promote_type(T, S, U, V...) = (@inline; afoldl(promote_type, promote_type(T, S, U), V...))
 
 promote_type(::Type{Bottom}, ::Type{Bottom}) = Bottom
 promote_type(::Type{T}, ::Type{T}) where {T} = T
@@ -373,7 +375,9 @@ function _promote(x::T, y::S) where {T,S}
     return (convert(R, x), convert(R, y))
 end
 promote_typeof(x) = typeof(x)
-promote_typeof(x, xs...) = (@inline; promote_type(typeof(x), promote_typeof(xs...)))
+promote_typeof(x, y) = (@inline; promote_type(typeof(x), typeof(y)))
+promote_typeof(x, y, z) = (@inline; promote_type(typeof(x), typeof(y), typeof(z)))
+promote_typeof(x, y, z, a...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, typeof(y)), promote_typeof(x, y, z), a...))
 function _promote(x, y, z)
     @inline
     R = promote_typeof(x, y, z)
diff --git a/base/public.jl b/base/public.jl
index 912953795c801..803766a0cec1b 100644
--- a/base/public.jl
+++ b/base/public.jl
@@ -5,13 +5,16 @@ public
     Checked,
     Filesystem,
     Order,
+    ScopedValues,
     Sort,
 
 # Types
     AbstractLock,
+    AbstractPipe,
     AsyncCondition,
     CodeUnits,
     Event,
+    Fix,
     Fix1,
     Fix2,
     Generator,
@@ -43,6 +46,7 @@ public
     split_rest,
     tail,
     checked_length,
+    elsize,
 
 # Loading
     DL_LOAD_PATH,
@@ -63,7 +67,7 @@ public
     ispublic,
     remove_linenums!,
 
-# Opperators
+# Operators
     operator_associativity,
     operator_precedence,
     isbinaryoperator,
@@ -84,6 +88,17 @@ public
     @locals,
     @propagate_inbounds,
 
+# External processes
+    shell_escape,
+    shell_split,
+    shell_escape_posixly,
+    shell_escape_csh,
+    shell_escape_wincmd,
+    escape_microsoft_c_args,
+
+# Strings
+    escape_raw_string,
+
 # IO
     # types
     BufferStream,
@@ -95,7 +110,11 @@ public
     reseteof,
     link_pipe!,
 
+# filesystem operations
+    rename,
+
 # misc
     notnothing,
     runtests,
-    text_colors
+    text_colors,
+    depwarn
diff --git a/base/range.jl b/base/range.jl
index 4f92b305564cd..8b30222382c9a 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -32,7 +32,7 @@ _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
     (:)(start, [step], stop)
 
 Range operator. `a:b` constructs a range from `a` to `b` with a step size
-equal to 1, which produces:
+equal to +1, which produces:
 
 * a [`UnitRange`](@ref) when `a` and `b` are integers, or
 * a [`StepRange`](@ref) when `a` and `b` are characters, or
@@ -41,6 +41,9 @@ equal to 1, which produces:
 `a:s:b` is similar but uses a step size of `s` (a [`StepRange`](@ref) or
 [`StepRangeLen`](@ref)). See also [`range`](@ref) for more control.
 
+To create a descending range, use `reverse(a:b)` or a negative step size, e.g. `b:-1:a`.
+Otherwise, when `b < a`, an empty range will be constructed and normalized to `a:a-1`.
+
 The operator `:` is also used in indexing to select whole dimensions, e.g. in `A[:, 1]`.
 
 `:` is also used to [`quote`](@ref) code, e.g. `:(x + y) isa Expr` and `:x isa Symbol`.
@@ -66,8 +69,12 @@ Mathematically a range is uniquely determined by any three of `start`, `step`, `
 Valid invocations of range are:
 * Call `range` with any three of `start`, `step`, `stop`, `length`.
 * Call `range` with two of `start`, `stop`, `length`. In this case `step` will be assumed
-  to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
-* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be one.
+  to be positive one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be positive one.
+
+To construct a descending range, specify a negative step size, e.g. `range(5, 1; step = -1)` => [5,4,3,2,1]. Otherwise,
+a `stop` value less than the `start` value, with the default `step` of `+1`, constructs an empty range. Empty ranges
+are normalized such that the `stop` is one less than the `start`, e.g. `range(5, 1) == 5:4`.
 
 See Extended Help for additional details on the returned type.
 See also [`logrange`](@ref) for logarithmically spaced points.
@@ -1287,6 +1294,9 @@ promote_rule(a::Type{OneTo{T1}}, b::Type{OneTo{T2}}) where {T1,T2} =
 OneTo{T}(r::OneTo{T}) where {T<:Integer} = r
 OneTo{T}(r::OneTo) where {T<:Integer} = OneTo{T}(r.stop)
 
+promote_rule(a::Type{OneTo{T1}}, ::Type{UR}) where {T1,UR<:AbstractUnitRange} =
+    promote_rule(UnitRange{T1}, UR)
+
 promote_rule(a::Type{UnitRange{T1}}, ::Type{UR}) where {T1,UR<:AbstractUnitRange} =
     promote_rule(a, UnitRange{eltype(UR)})
 UnitRange{T}(r::AbstractUnitRange) where {T<:Real} = UnitRange{T}(first(r), last(r))
diff --git a/base/rational.jl b/base/rational.jl
index 6b54a409588d2..be5a5b62fb84e 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -27,7 +27,7 @@ end
 checked_den(num::T, den::T) where T<:Integer = checked_den(T, num, den)
 checked_den(num::Integer, den::Integer) = checked_den(promote(num, den)...)
 
-@noinline __throw_rational_argerror_zero(T) = throw(ArgumentError("invalid rational: zero($T)//zero($T)"))
+@noinline __throw_rational_argerror_zero(T) = throw(ArgumentError(LazyString("invalid rational: zero(", T, ")//zero(", T, ")")))
 function Rational{T}(num::Integer, den::Integer) where T<:Integer
     iszero(den) && iszero(num) && __throw_rational_argerror_zero(T)
     if T <: Union{Unsigned, Bool}
@@ -293,8 +293,14 @@ julia> numerator(4)
 4
 ```
 """
-numerator(x::Integer) = x
+numerator(x::Union{Integer,Complex{<:Integer}}) = x
 numerator(x::Rational) = x.num
+function numerator(z::Complex{<:Rational})
+    den = denominator(z)
+    reim = (real(z), imag(z))
+    result = checked_mul.(numerator.(reim), div.(den, denominator.(reim)))
+    complex(result...)
+end
 
 """
     denominator(x)
@@ -310,8 +316,9 @@ julia> denominator(4)
 1
 ```
 """
-denominator(x::Integer) = one(x)
+denominator(x::Union{Integer,Complex{<:Integer}}) = one(x)
 denominator(x::Rational) = x.den
+denominator(z::Complex{<:Rational}) = lcm(denominator(real(z)), denominator(imag(z)))
 
 sign(x::Rational) = oftype(x, sign(x.num))
 signbit(x::Rational) = signbit(x.num)
@@ -332,7 +339,7 @@ function -(x::Rational{T}) where T<:BitSigned
     x.num == typemin(T) && __throw_rational_numerator_typemin(T)
     unsafe_rational(-x.num, x.den)
 end
-@noinline __throw_rational_numerator_typemin(T) = throw(OverflowError("rational numerator is typemin($T)"))
+@noinline __throw_rational_numerator_typemin(T) = throw(OverflowError(LazyString("rational numerator is typemin(", T, ")")))
 
 function -(x::Rational{T}) where T<:Unsigned
     x.num != zero(T) && __throw_negate_unsigned()
@@ -555,7 +562,7 @@ lcm(x::Rational, y::Rational) = unsafe_rational(lcm(x.num, y.num), gcd(x.den, y.
 function gcdx(x::Rational, y::Rational)
     c = gcd(x, y)
     if iszero(c.num)
-        a, b = one(c.num), c.num
+        a, b = zero(c.num), c.num
     elseif iszero(c.den)
         a = ifelse(iszero(x.den), one(c.den), c.den)
         b = ifelse(iszero(y.den), one(c.den), c.den)
diff --git a/base/reduce.jl b/base/reduce.jl
index 6a0d46c61fcd9..0c37256b64fb5 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -4,14 +4,6 @@
 
 ###### Generic (map)reduce functions ######
 
-if Int === Int32
-    const SmallSigned = Union{Int8,Int16}
-    const SmallUnsigned = Union{UInt8,UInt16}
-else
-    const SmallSigned = Union{Int8,Int16,Int32}
-    const SmallUnsigned = Union{UInt8,UInt16,UInt32}
-end
-
 abstract type AbstractBroadcasted end
 const AbstractArrayOrBroadcasted = Union{AbstractArray, AbstractBroadcasted}
 
@@ -22,8 +14,8 @@ The reduction operator used in `sum`. The main difference from [`+`](@ref) is th
 integers are promoted to `Int`/`UInt`.
 """
 add_sum(x, y) = x + y
-add_sum(x::SmallSigned, y::SmallSigned) = Int(x) + Int(y)
-add_sum(x::SmallUnsigned, y::SmallUnsigned) = UInt(x) + UInt(y)
+add_sum(x::BitSignedSmall, y::BitSignedSmall) = Int(x) + Int(y)
+add_sum(x::BitUnsignedSmall, y::BitUnsignedSmall) = UInt(x) + UInt(y)
 add_sum(x::Real, y::Real)::Real = x + y
 
 """
@@ -33,8 +25,8 @@ The reduction operator used in `prod`. The main difference from [`*`](@ref) is t
 integers are promoted to `Int`/`UInt`.
 """
 mul_prod(x, y) = x * y
-mul_prod(x::SmallSigned, y::SmallSigned) = Int(x) * Int(y)
-mul_prod(x::SmallUnsigned, y::SmallUnsigned) = UInt(x) * UInt(y)
+mul_prod(x::BitSignedSmall, y::BitSignedSmall) = Int(x) * Int(y)
+mul_prod(x::BitUnsignedSmall, y::BitUnsignedSmall) = UInt(x) * UInt(y)
 mul_prod(x::Real, y::Real)::Real = x * y
 
 ## foldl && mapfoldl
@@ -305,7 +297,7 @@ implementations may reuse the return value of `f` for elements that appear multi
 guaranteed left or right associativity and invocation of `f` for every value.
 """
 mapreduce(f, op, itr; kw...) = mapfoldl(f, op, itr; kw...)
-mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...)
+mapreduce(f, op, itr, itrs...; kw...) = reduce(op, Generator(f, itr, itrs...); kw...)
 
 # Note: sum_seq usually uses four or more accumulators after partial
 # unrolling, so each accumulator gets at most 256 numbers
@@ -348,11 +340,11 @@ reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
-reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
-reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:BitSignedSmall}  = zero(Int)
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:BitUnsignedSmall} = zero(UInt)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
-reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
-reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitSignedSmall}  = one(Int)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitUnsignedSmall} = one(UInt)
 
 reduce_empty(op::BottomRF, ::Type{T}) where {T} = reduce_empty(op.rf, T)
 reduce_empty(op::MappingRF, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf, T)
@@ -402,11 +394,11 @@ reduce_first(::typeof(+), x::Bool) = Int(x)
 reduce_first(::typeof(*), x::AbstractChar) = string(x)
 
 reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
-reduce_first(::typeof(add_sum), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(add_sum), x::BitSignedSmall)   = Int(x)
+reduce_first(::typeof(add_sum), x::BitUnsignedSmall) = UInt(x)
 reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
-reduce_first(::typeof(mul_prod), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(mul_prod), x::BitSignedSmall)   = Int(x)
+reduce_first(::typeof(mul_prod), x::BitUnsignedSmall) = UInt(x)
 
 """
     Base.mapreduce_first(f, op, x)
@@ -480,8 +472,8 @@ elements are not reordered if you use an ordered collection.
 julia> reduce(*, [2; 3; 4])
 24
 
-julia> reduce(*, [2; 3; 4]; init=-1)
--24
+julia> reduce(*, Int[]; init=1)
+1
 ```
 """
 reduce(op, itr; kw...) = mapreduce(identity, op, itr; kw...)
@@ -646,11 +638,11 @@ function mapreduce_impl(f, op::Union{typeof(max), typeof(min)},
     start = first + 1
     simdstop  = start + chunk_len - 4
     while simdstop <= last - 3
-        @inbounds for i in start:4:simdstop
-            v1 = _fast(op, v1, f(A[i+0]))
-            v2 = _fast(op, v2, f(A[i+1]))
-            v3 = _fast(op, v3, f(A[i+2]))
-            v4 = _fast(op, v4, f(A[i+3]))
+        for i in start:4:simdstop
+            v1 = _fast(op, v1, f(@inbounds(A[i+0])))
+            v2 = _fast(op, v2, f(@inbounds(A[i+1])))
+            v3 = _fast(op, v3, f(@inbounds(A[i+2])))
+            v4 = _fast(op, v4, f(@inbounds(A[i+3])))
         end
         checkbounds(A, simdstop+3)
         start += chunk_len
@@ -1126,7 +1118,7 @@ If the input contains [`missing`](@ref) values, return `missing` if all non-miss
 values are `false` (or equivalently, if the input contains no `true` value), following
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
 
-See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), , [`||`](@ref).
+See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), [`||`](@ref).
 
 # Examples
 ```jldoctest
@@ -1164,7 +1156,7 @@ If the input contains [`missing`](@ref) values, return `missing` if all non-miss
 values are `true` (or equivalently, if the input contains no `false` value), following
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
 
-See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), , [`&&`](@ref), [`allunique`](@ref).
+See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), [`&&`](@ref), [`allunique`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/reducedim.jl b/base/reducedim.jl
index 1a7a6feb33d5e..0478afe1a46b6 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -196,11 +196,8 @@ end
 
 ## generic (map)reduction
 
-has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false
-has_fast_linear_indexing(a::Array) = true
-has_fast_linear_indexing(::Union{Number,Ref,AbstractChar}) = true  # 0d objects, for Broadcasted
-has_fast_linear_indexing(bc::Broadcast.Broadcasted) =
-    all(has_fast_linear_indexing, bc.args)
+has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
+has_fast_linear_indexing(a::AbstractVector) = true
 
 function check_reducedims(R, A)
     # Check whether R has compatible dimensions w.r.t. A for reduction
@@ -261,8 +258,9 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
         # use mapreduce_impl, which is probably better tuned to achieve higher performance
         nslices = div(length(A), lsiz)
         ibase = first(LinearIndices(A))-1
-        for i = 1:nslices
-            @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+        for i in eachindex(R)
+            r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+            @inbounds R[i] = r
             ibase += lsiz
         end
         return R
@@ -272,19 +270,20 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
     if reducedim1(R, A)
         # keep the accumulator as a local variable when reducing along the first dimension
         i1 = first(axes1(R))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            r = R[i1,IR]
+            @inbounds r = R[i1,IR]
             @simd for i in axes(A, 1)
-                r = op(r, f(A[i, IA]))
+                r = op(r, f(@inbounds(A[i, IA])))
             end
-            R[i1,IR] = r
+            @inbounds R[i1,IR] = r
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             @simd for i in axes(A, 1)
-                R[i,IR] = op(R[i,IR], f(A[i,IA]))
+                v = op(@inbounds(R[i,IR]), f(@inbounds(A[i,IA])))
+                @inbounds R[i,IR] = v
             end
         end
     end
@@ -326,8 +325,8 @@ julia> mapreduce(isodd, |, a, dims=1)
 """
 mapreduce(f, op, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) =
     _mapreduce_dim(f, op, init, A, dims)
-mapreduce(f, op, A::AbstractArrayOrBroadcasted...; kw...) =
-    reduce(op, map(f, A...); kw...)
+mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted...; kw...) =
+    reduce(op, map(f, A, B...); kw...)
 
 _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
     mapfoldl_impl(f, op, nt, A)
@@ -1028,33 +1027,33 @@ function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
     zi = zero(eltype(ks))
     if reducedim1(Rval, A)
         i1 = first(axes1(Rval))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            tmpRv = Rval[i1,IR]
-            tmpRi = Rind[i1,IR]
+            @inbounds tmpRv = Rval[i1,IR]
+            @inbounds tmpRi = Rind[i1,IR]
             for i in axes(A,1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
+                tmpAv = f(@inbounds(A[i,IA]))
                 if tmpRi == zi || op(tmpRv, tmpAv)
                     tmpRv = tmpAv
                     tmpRi = k
                 end
                 y = iterate(ks, kss)
             end
-            Rval[i1,IR] = tmpRv
-            Rind[i1,IR] = tmpRi
+            @inbounds Rval[i1,IR] = tmpRv
+            @inbounds Rind[i1,IR] = tmpRi
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             for i in axes(A, 1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
-                tmpRv = Rval[i,IR]
-                tmpRi = Rind[i,IR]
+                tmpAv = f(@inbounds(A[i,IA]))
+                @inbounds tmpRv = Rval[i,IR]
+                @inbounds tmpRi = Rind[i,IR]
                 if tmpRi == zi || op(tmpRv, tmpAv)
-                    Rval[i,IR] = tmpAv
-                    Rind[i,IR] = k
+                    @inbounds Rval[i,IR] = tmpAv
+                    @inbounds Rind[i,IR] = k
                 end
                 y = iterate(ks, kss)
             end
diff --git a/base/reflection.jl b/base/reflection.jl
index 446e5825a16cc..2ddd34b0f73c1 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -18,7 +18,9 @@ julia> parentmodule(Base.Broadcast)
 Base
 ```
 """
-parentmodule(m::Module) = ccall(:jl_module_parent, Ref{Module}, (Any,), m)
+parentmodule(m::Module) = (@_total_meta; ccall(:jl_module_parent, Ref{Module}, (Any,), m))
+
+is_root_module(m::Module) = parentmodule(m) === m || (isdefined(Main, :Base) && m === Main.Base)
 
 """
     moduleroot(m::Module) -> Module
@@ -28,6 +30,7 @@ parent modules of `m` which is either a registered root module or which is its
 own parent module.
 """
 function moduleroot(m::Module)
+    @_total_meta
     while true
         is_root_module(m) && return m
         p = parentmodule(m)
@@ -61,6 +64,7 @@ julia> fullname(Main)
 ```
 """
 function fullname(m::Module)
+    @_total_meta
     mn = nameof(m)
     if m === Main || m === Base || m === Core
         return (mn,)
@@ -73,28 +77,33 @@ function fullname(m::Module)
 end
 
 """
-    names(x::Module; all::Bool = false, imported::Bool = false)
+    names(x::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) -> Vector{Symbol}
 
 Get a vector of the public names of a `Module`, excluding deprecated names.
 If `all` is true, then the list also includes non-public names defined in the module,
 deprecated names, and compiler-generated names.
 If `imported` is true, then names explicitly imported from other modules
-are also included. Names are returned in sorted order.
+are also included.
+If `usings` is true, then names explicitly imported via `using` are also included.
+Names are returned in sorted order.
 
 As a special case, all names defined in `Main` are considered \"public\",
 since it is not idiomatic to explicitly mark names from `Main` as public.
 
 !!! note
     `sym ∈ names(SomeModule)` does *not* imply `isdefined(SomeModule, sym)`.
-    `names` will return symbols marked with `public` or `export`, even if
+    `names` may return symbols marked with `public` or `export`, even if
     they are not defined in the module.
 
+!!! warning
+    `names` may return duplicate names. The duplication happens, e.g. if an `import`ed name
+    conflicts with an already existing identifier.
+
 See also: [`Base.isexported`](@ref), [`Base.ispublic`](@ref), [`Base.@locals`](@ref), [`@__MODULE__`](@ref).
 """
-names(m::Module; all::Bool = false, imported::Bool = false) =
-    sort!(unsorted_names(m; all, imported))
-unsorted_names(m::Module; all::Bool = false, imported::Bool = false) =
-    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported)
+names(m::Module; kwargs...) = sort!(unsorted_names(m; kwargs...))
+unsorted_names(m::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) =
+    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint, Cint), m, all, imported, usings)
 
 """
     isexported(m::Module, s::Symbol) -> Bool
@@ -198,11 +207,34 @@ function _fieldnames(@nospecialize t)
     return t.name.names
 end
 
+const BINDING_KIND_GLOBAL       = 0x0
+const BINDING_KIND_CONST        = 0x1
+const BINDING_KIND_CONST_IMPORT = 0x2
+const BINDING_KIND_IMPLICIT     = 0x3
+const BINDING_KIND_EXPLICIT     = 0x4
+const BINDING_KIND_IMPORTED     = 0x5
+const BINDING_KIND_FAILED       = 0x6
+const BINDING_KIND_DECLARED     = 0x7
+const BINDING_KIND_GUARD        = 0x8
+
+function lookup_binding_partition(world::UInt, b::Core.Binding)
+    ccall(:jl_get_binding_partition, Ref{Core.BindingPartition}, (Any, UInt), b, world)
+end
+
+function lookup_binding_partition(world::UInt, gr::Core.GlobalRef)
+    ccall(:jl_get_globalref_partition, Ref{Core.BindingPartition}, (Any, UInt), gr, world)
+end
+
+binding_kind(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_kind, UInt8, (Any,), bpart)
+binding_kind(m::Module, s::Symbol) = binding_kind(lookup_binding_partition(tls_world_age(), GlobalRef(m, s)))
+
 """
     fieldname(x::DataType, i::Integer)
 
 Get the name of field `i` of a `DataType`.
 
+The return type is `Symbol`, except when `x <: Tuple`, in which case the index of the field is returned, of type `Int`.
+
 # Examples
 ```jldoctest
 julia> fieldname(Rational, 1)
@@ -210,6 +242,9 @@ julia> fieldname(Rational, 1)
 
 julia> fieldname(Rational, 2)
 :den
+
+julia> fieldname(Tuple{String,Int}, 2)
+2
 ```
 """
 function fieldname(t::DataType, i::Integer)
@@ -237,6 +272,9 @@ fieldname(t::Type{<:Tuple}, i::Integer) =
 
 Get a tuple with the names of the fields of a `DataType`.
 
+Each name is a `Symbol`, except when `x <: Tuple`, in which case each name (actually the
+index of the field) is an `Int`.
+
 See also [`propertynames`](@ref), [`hasfield`](@ref).
 
 # Examples
@@ -246,6 +284,9 @@ julia> fieldnames(Rational)
 
 julia> fieldnames(typeof(1+im))
 (:re, :im)
+
+julia> fieldnames(Tuple{String,Int})
+(1, 2)
 ```
 """
 fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
@@ -486,8 +527,8 @@ gc_alignment(T::Type) = gc_alignment(Core.sizeof(T))
     Base.datatype_haspadding(dt::DataType) -> Bool
 
 Return whether the fields of instances of this type are packed in memory,
-with no intervening padding bits (defined as bits whose value does not uniquely
-impact the egal test when applied to the struct fields).
+with no intervening padding bits (defined as bits whose value does not impact
+the semantic value of the instance itself).
 Can be called on any `isconcretetype`.
 """
 function datatype_haspadding(dt::DataType)
@@ -497,6 +538,21 @@ function datatype_haspadding(dt::DataType)
     return flags & 1 == 1
 end
 
+"""
+    Base.datatype_isbitsegal(dt::DataType) -> Bool
+
+Return whether egality of the (non-padding bits of the) in-memory representation
+of an instance of this type implies semantic egality of the instance itself.
+This may not be the case if the type contains to other values whose egality is
+independent of their identity (e.g. immutable structs, some types, etc.).
+"""
+function datatype_isbitsegal(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags & (1<<5)) != 0
+end
+
 """
     Base.datatype_nfields(dt::DataType) -> UInt32
 
@@ -510,6 +566,17 @@ function datatype_nfields(dt::DataType)
     return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
 end
 
+"""
+    Base.datatype_npointers(dt::DataType) -> Int
+
+Return the number of pointers in the layout of a datatype.
+"""
+function datatype_npointers(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
+end
+
 """
     Base.datatype_pointerfree(dt::DataType) -> Bool
 
@@ -518,9 +585,7 @@ Can be called on any `isconcretetype`.
 """
 function datatype_pointerfree(dt::DataType)
     @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    npointers = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
-    return npointers == 0
+    return datatype_npointers(dt) == 0
 end
 
 """
@@ -952,7 +1017,7 @@ julia> struct Foo
        end
 
 julia> Base.fieldindex(Foo, :z)
-ERROR: type Foo has no field z
+ERROR: FieldError: type Foo has no field z
 Stacktrace:
 [...]
 
@@ -1001,12 +1066,15 @@ function datatype_fieldcount(t::DataType)
             return fieldcount(types)
         end
         return nothing
-    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
+    elseif isabstracttype(t)
         return nothing
     end
-    if isdefined(t, :types)
+    if t.name === Tuple.name
+        isvatuple(t) && return nothing
         return length(t.types)
     end
+    # Equivalent to length(t.types), but `t.types` is lazy and we do not want
+    # to be forced to compute it.
     return length(t.name.names)
 end
 
@@ -1132,7 +1200,7 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
     for m in method_instances(f, t, world)
         if generated && hasgenerator(m)
             if may_invoke_generator(m)
-                code = ccall(:jl_code_for_staged, Any, (Any, UInt), m, world)::CodeInfo
+                code = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), m, world, C_NULL)
             else
                 error("Could not expand generator for `@generated` method ", m, ". ",
                       "This can happen if the provided argument types (", t, ") are ",
@@ -1152,11 +1220,17 @@ hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
 
 # low-level method lookup functions used by the compiler
 
-unionlen(x::Union) = unionlen(x.a) + unionlen(x.b)
-unionlen(@nospecialize(x)) = 1
+unionlen(@nospecialize(x)) = x isa Union ? unionlen(x.a) + unionlen(x.b) : 1
 
-_uniontypes(x::Union, ts) = (_uniontypes(x.a,ts); _uniontypes(x.b,ts); ts)
-_uniontypes(@nospecialize(x), ts) = (push!(ts, x); ts)
+function _uniontypes(@nospecialize(x), ts::Array{Any,1})
+    if x isa Union
+        _uniontypes(x.a, ts)
+        _uniontypes(x.b, ts)
+    else
+        push!(ts, x)
+    end
+    return ts
+end
 uniontypes(@nospecialize(x)) = _uniontypes(x, Any[])
 
 function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
@@ -1322,9 +1396,12 @@ uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m) :
                              error("Code for this Method is not available.")
 function _uncompressed_ir(m::Method)
     s = m.source
-    s isa String && (s = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s))
+    if s isa String
+        s = ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)
+    end
     return s::CodeInfo
 end
+
 # for backwards compat
 const uncompressed_ast = uncompressed_ir
 const _uncompressed_ast = _uncompressed_ir
@@ -1579,7 +1656,7 @@ julia> code_typed(+, (Float64, Float64))
 """
 function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); kwargs...)
     if isa(f, Core.OpaqueClosure)
-        return code_typed_opaque_closure(f; kwargs...)
+        return code_typed_opaque_closure(f, types; kwargs...)
     end
     tt = signature_type(f, types)
     return code_typed_by_type(tt; kwargs...)
@@ -1597,6 +1674,12 @@ function default_tt(@nospecialize(f))
     end
 end
 
+function raise_match_failure(name::Symbol, @nospecialize(tt))
+    @noinline
+    sig_str = sprint(Base.show_tuple_as_call, Symbol(""), tt)
+    error("$name: unanalyzable call given $sig_str")
+end
+
 """
     code_typed_by_type(types::Type{<:Tuple}; ...)
 
@@ -1619,35 +1702,59 @@ function code_typed_by_type(@nospecialize(tt::Type);
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    matches === nothing && raise_match_failure(:code_typed, tt)
     asts = []
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        (code, ty) = Core.Compiler.typeinf_code(interp, match, optimize)
+        code = Core.Compiler.typeinf_code(interp, match, optimize)
         if code === nothing
             push!(asts, match.method => Any)
         else
             debuginfo === :none && remove_linenums!(code)
-            push!(asts, code => ty)
+            push!(asts, code => code.rettype)
         end
     end
     return asts
 end
 
-function get_oc_code_rt(@nospecialize(oc::Core.OpaqueClosure))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+function get_oc_code_rt(oc::Core.OpaqueClosure, types, optimize::Bool)
+    @nospecialize oc types
+    ccall(:jl_is_in_pure_context, Bool, ()) &&
+        error("code reflection cannot be used from generated functions")
     m = oc.source
     if isa(m, Method)
-        code = _uncompressed_ir(m)
-        return Pair{CodeInfo,Any}(code, typeof(oc).parameters[2])
+        if isdefined(m, :source)
+            if optimize
+                tt = Tuple{typeof(oc.captures), to_tuple_type(types).parameters...}
+                mi = Core.Compiler.specialize_method(m, tt, Core.svec())
+                interp = Core.Compiler.NativeInterpreter(m.primary_world)
+                code = Core.Compiler.typeinf_code(interp, mi, optimize)
+                if code isa CodeInfo
+                    return Pair{CodeInfo, Any}(code, code.rettype)
+                end
+                error("inference not successful")
+            else
+                code = _uncompressed_ir(m)
+                return Pair{CodeInfo, Any}(code, typeof(oc).parameters[2])
+            end
+        else
+            # OC constructed from optimized IR
+            codeinst = m.specializations.cache
+            # XXX: the inferred field is not normally a CodeInfo, but this assumes it is guaranteed to be always
+            return Pair{CodeInfo, Any}(codeinst.inferred, codeinst.rettype)
+        end
     else
         error("encountered invalid Core.OpaqueClosure object")
     end
 end
 
-function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure);
-                                   debuginfo::Symbol=:default, _...)
-    (code, rt) = get_oc_code_rt(oc)
+function code_typed_opaque_closure(oc::Core.OpaqueClosure, types;
+                                   debuginfo::Symbol=:default,
+                                   optimize::Bool=true,
+                                   _...)
+    @nospecialize oc types
+    (code, rt) = get_oc_code_rt(oc, types, optimize)
     debuginfo === :none && remove_linenums!(code)
     return Any[Pair{CodeInfo,Any}(code, rt)]
 end
@@ -1718,9 +1825,10 @@ function code_ircode_by_type(
     (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    matches === nothing && raise_match_failure(:code_ircode, tt)
     asts = []
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
         (code, ty) = Core.Compiler.typeinf_ircode(interp, match, optimize_until)
         if code === nothing
@@ -1746,6 +1854,12 @@ function _builtin_effects(interp::Core.Compiler.AbstractInterpreter,
     return Core.Compiler.builtin_effects(Core.Compiler.typeinf_lattice(interp), f, argtypes, rt)
 end
 
+function _builtin_exception_type(interp::Core.Compiler.AbstractInterpreter,
+                                 @nospecialize(f::Core.Builtin), @nospecialize(types))
+    effects = _builtin_effects(interp, f, types)
+    return Core.Compiler.is_nothrow(effects) ? Union{} : Any
+end
+
 check_generated_context(world::UInt) =
     (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
@@ -1802,17 +1916,16 @@ function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
                       interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
     check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
-        _, rt = only(code_typed_opaque_closure(f))
+        _, rt = only(code_typed_opaque_closure(f, types))
         return Any[rt]
+    elseif isa(f, Core.Builtin)
+        return Any[_builtin_return_type(interp, f, types)]
     end
-    if isa(f, Core.Builtin)
-        rt = _builtin_return_type(interp, f, types)
-        return Any[rt]
-    end
-    rts = Any[]
     tt = signature_type(f, types)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
-    for match in matches
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    matches === nothing && raise_match_failure(:return_types, tt)
+    rts = Any[]
+    for match in matches.matches
         ty = Core.Compiler.typeinf_type(interp, match::Core.MethodMatch)
         push!(rts, something(ty, Any))
     end
@@ -1871,18 +1984,13 @@ function infer_return_type(@nospecialize(f), @nospecialize(types=default_tt(f));
                            interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
     check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
-        return last(only(code_typed_opaque_closure(f)))
-    end
-    if isa(f, Core.Builtin)
+        return last(only(code_typed_opaque_closure(f, types)))
+    elseif isa(f, Core.Builtin)
         return _builtin_return_type(interp, f, types)
     end
     tt = signature_type(f, types)
     matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
-    if matches === nothing
-        # unanalyzable call, i.e. the interpreter world might be newer than the world where
-        # the `f` is defined, return the unknown return type
-        return Any
-    end
+    matches === nothing && raise_match_failure(:infer_return_type, tt)
     rt = Union{}
     for match in matches.matches
         ty = Core.Compiler.typeinf_type(interp, match::Core.MethodMatch)
@@ -1947,18 +2055,15 @@ function infer_exception_types(@nospecialize(f), @nospecialize(types=default_tt(
     check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
         return Any[Any] # TODO
+    elseif isa(f, Core.Builtin)
+        return Any[_builtin_exception_type(interp, f, types)]
     end
-    if isa(f, Core.Builtin)
-        effects = _builtin_effects(interp, f, types)
-        exct = Core.Compiler.is_nothrow(effects) ? Union{} : Any
-        return Any[exct]
-    end
-    excts = Any[]
     tt = signature_type(f, types)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
-    for match in matches
-        match = match::Core.MethodMatch
-        frame = Core.Compiler.typeinf_frame(interp, match, #=run_optimizer=#false)
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    matches === nothing && raise_match_failure(:infer_exception_types, tt)
+    excts = Any[]
+    for match in matches.matches
+        frame = Core.Compiler.typeinf_frame(interp, match::Core.MethodMatch, #=run_optimizer=#false)
         if frame === nothing
             exct = Any
         else
@@ -2029,18 +2134,12 @@ function infer_exception_type(@nospecialize(f), @nospecialize(types=default_tt(f
     check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
         return Any # TODO
-    end
-    if isa(f, Core.Builtin)
-        effects = _builtin_effects(interp, f, types)
-        return Core.Compiler.is_nothrow(effects) ? Union{} : Any
+    elseif isa(f, Core.Builtin)
+        return _builtin_exception_type(interp, f, types)
     end
     tt = signature_type(f, types)
     matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
-    if matches === nothing
-        # unanalyzable call, i.e. the interpreter world might be newer than the world where
-        # the `f` is defined, return the unknown exception type
-        return Any
-    end
+    matches === nothing && raise_match_failure(:infer_exception_type, tt)
     exct = Union{}
     if _may_throw_methoderror(matches)
         # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
@@ -2058,6 +2157,7 @@ end
 """
     Base.infer_effects(
         f, types=default_tt(f);
+        optimize::Bool=true,
         world::UInt=get_world_counter(),
         interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> effects::Effects
 
@@ -2066,6 +2166,7 @@ Returns the possible computation effects of the function call specified by `f` a
 # Arguments
 - `f`: The function to analyze.
 - `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `optimize` (optional): Whether to run additional effects refinements based on post-optimization analysis.
 - `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
 - `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
 
@@ -2113,6 +2214,7 @@ signature, the `:nothrow` bit gets tainted.
 - [`Base.@assume_effects`](@ref): A macro for making assumptions about the effects of a method.
 """
 function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
+                       optimize::Bool=true,
                        world::UInt=get_world_counter(),
                        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
     check_generated_context(world)
@@ -2121,11 +2223,7 @@ function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
     end
     tt = signature_type(f, types)
     matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
-    if matches === nothing
-        # unanalyzable call, i.e. the interpreter world might be newer than the world where
-        # the `f` is defined, return the unknown effects
-        return Core.Compiler.Effects()
-    end
+    matches === nothing && raise_match_failure(:infer_effects, tt)
     effects = Core.Compiler.EFFECTS_TOTAL
     if _may_throw_methoderror(matches)
         # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
@@ -2133,7 +2231,7 @@ function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
     end
     for match in matches.matches
         match = match::Core.MethodMatch
-        frame = Core.Compiler.typeinf_frame(interp, match, #=run_optimizer=#true)
+        frame = Core.Compiler.typeinf_frame(interp, match, #=run_optimizer=#optimize)
         frame === nothing && return Core.Compiler.Effects()
         effects = Core.Compiler.merge_effects(effects, frame.result.ipo_effects)
     end
@@ -2156,13 +2254,14 @@ function print_statement_costs(io::IO, @nospecialize(tt::Type);
                                interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
     tt = to_tuple_type(tt)
     world == typemax(UInt) && error("code reflection cannot be used from generated functions")
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    matches === nothing && raise_match_failure(:print_statement_costs, tt)
     params = Core.Compiler.OptimizationParams(interp)
     cst = Int[]
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
         println(io, match.method)
-        (code, ty) = Core.Compiler.typeinf_code(interp, match, true)
+        code = Core.Compiler.typeinf_code(interp, match, true)
         if code === nothing
             println(io, "  inference not successful")
         else
@@ -2364,7 +2463,7 @@ function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_c
     for kw in kws
         endswith(String(kw), "...") && return true
     end
-    kwnames = Symbol[kwnames[i] for i in 1:length(kwnames)]
+    kwnames = collect(kwnames)
     return issubset(kwnames, kws)
 end
 
diff --git a/base/refpointer.jl b/base/refpointer.jl
index ce2c6fc00560d..5027462eeb6b6 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -171,8 +171,8 @@ if is_primary_base_module
     Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
 end
 
-cconvert(::Type{Ptr{P}}, a::Array{P}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
-cconvert(::Type{Ref{P}}, a::Array{P}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
+cconvert(::Type{Ptr{P}}, a::Array{<:Union{Ptr,Cwstring,Cstring}}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
+cconvert(::Type{Ref{P}}, a::Array{<:Union{Ptr,Cwstring,Cstring}}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
 cconvert(::Type{Ptr{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 cconvert(::Type{Ref{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 
diff --git a/base/regex.jl b/base/regex.jl
index 5018c9623f7e3..38eb4cc512552 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -188,6 +188,11 @@ Methods that accept a `RegexMatch` object are defined for [`iterate`](@ref),
 [`getindex`](@ref), where keys are the names or numbers of a capture group.
 See [`keys`](@ref keys(::RegexMatch)) for more information.
 
+`Tuple(m)`, `NamedTuple(m)`, and `Dict(m)` can be used to construct more flexible collection types from `RegexMatch` objects.
+
+!!! compat "Julia 1.11"
+    Constructing NamedTuples and Dicts from RegexMatches requires Julia 1.11
+
 # Examples
 ```jldoctest
 julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
@@ -210,6 +215,12 @@ julia> hr, min, ampm = m; # destructure capture groups by iteration
 
 julia> hr
 "11"
+
+julia> Dict(m)
+Dict{Any, Union{Nothing, SubString{String}}} with 3 entries:
+  "hour"   => "11"
+  3        => nothing
+  "minute" => "30"
 ```
 """
 struct RegexMatch{S<:AbstractString} <: AbstractMatch
@@ -289,6 +300,9 @@ iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
 length(m::RegexMatch) = length(m.captures)
 eltype(m::RegexMatch) = eltype(m.captures)
 
+NamedTuple(m::RegexMatch) = NamedTuple{Symbol.(Tuple(keys(m)))}(values(m))
+Dict(m::RegexMatch) = Dict(pairs(m))
+
 function occursin(r::Regex, s::AbstractString; offset::Integer=0)
     compile(r)
     return PCRE.exec_r(r.regex, String(s), offset, r.match_options)
@@ -381,9 +395,13 @@ end
     match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
 Search for the first match of the regular expression `r` in `s` and return a [`RegexMatch`](@ref)
-object containing the match, or nothing if the match failed. The matching substring can be
-retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
-`m.captures` The optional `idx` argument specifies an index at which to start the search.
+object containing the match, or nothing if the match failed.
+The optional `idx` argument specifies an index at which to start the search.
+The matching substring can be retrieved by accessing `m.match`, the captured sequences can be retrieved by accessing `m.captures`.
+The resulting [`RegexMatch`](@ref) object can be used to construct other collections: e.g. `Tuple(m)`, `NamedTuple(m)`.
+
+!!! compat "Julia 1.11"
+    Constructing NamedTuples and Dicts requires Julia 1.11
 
 # Examples
 ```jldoctest
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index ff674690a5c66..d31f3ebb5dd2d 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -13,15 +13,16 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
 
     function throwbits(S::Type, T::Type, U::Type)
         @noinline
-        throw(ArgumentError("cannot reinterpret `$(S)` as `$(T)`, type `$(U)` is not a bits type"))
+        throw(ArgumentError(LazyString("cannot reinterpret `", S, "` as `", T, "`, type `", U, "` is not a bits type")))
     end
     function throwsize0(S::Type, T::Type, msg)
         @noinline
-        throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a $msg size"))
+        throw(ArgumentError(LazyString("cannot reinterpret a zero-dimensional `", S, "` array to `", T,
+            "` which is of a ", msg, " size")))
     end
     function throwsingleton(S::Type, T::Type)
         @noinline
-        throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` which is a singleton type"))
+        throw(ArgumentError(LazyString("cannot reinterpret a `", S, "` array to `", T, "` which is a singleton type")))
     end
 
     global reinterpret
@@ -67,14 +68,14 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function thrownonint(S::Type, T::Type, dim)
             @noinline
-            throw(ArgumentError("""
-                cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
-                The resulting array would have non-integral first dimension.
-                """))
+            throw(ArgumentError(LazyString(
+                "cannot reinterpret an `", S, "` array to `", T, "` whose first dimension has size `", dim,
+                "`. The resulting array would have a non-integral first dimension.")))
         end
         function throwaxes1(S::Type, T::Type, ax1)
             @noinline
-            throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
+            throw(ArgumentError(LazyString("cannot reinterpret a `", S, "` array to `", T,
+                "` when the first axis is ", ax1, ". Try reshaping first.")))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
@@ -99,15 +100,19 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     function reinterpret(::typeof(reshape), ::Type{T}, a::A) where {T,S,A<:AbstractArray{S}}
         function throwintmult(S::Type, T::Type)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got $(sizeof(T))) and `sizeof(eltype(a))` (got $(sizeof(S))) be an integer multiple of the other"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got ",
+                sizeof(T), ") and `sizeof(eltype(a))` (got ", sizeof(S), ") be an integer multiple of the other")))
         end
         function throwsize1(a::AbstractArray, T::Type)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $(eltype(a)) requires that `axes(a, 1)` (got $(axes(a, 1))) be equal to 1:$(sizeof(T) ÷ sizeof(eltype(a))) (from the ratio of element sizes)"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, ", T, ", a)` where `eltype(a)` is ", eltype(a),
+                " requires that `axes(a, 1)` (got ", axes(a, 1), ") be equal to 1:",
+                sizeof(T) ÷ sizeof(eltype(a)), " (from the ratio of element sizes)")))
         end
         function throwfromsingleton(S, T)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $S requires that $T be a singleton type, since $S is one"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, ", T, ", a)` where `eltype(a)` is ", S,
+                " requires that ", T, " be a singleton type, since ", S, " is one")))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
@@ -368,6 +373,7 @@ has_offset_axes(a::ReinterpretArray) = has_offset_axes(a.parent)
 
 elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
 cconvert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = cconvert(Ptr{S}, a.parent)
+unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
 
 @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
     if isprimitivetype(T) && isprimitivetype(S)
@@ -386,6 +392,10 @@ check_ptr_indexable(a::AbstractArray, sz) = false
 
 @propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
 
+@propagate_inbounds isassigned(a::ReinterpretArray, inds::Integer...) = checkbounds(Bool, a, inds...) && (check_ptr_indexable(a) || _isassigned_ra(a, inds...))
+@propagate_inbounds isassigned(a::ReinterpretArray, inds::SCartesianIndex2) = isassigned(a.parent, inds.j)
+@propagate_inbounds _isassigned_ra(a::ReinterpretArray, inds...) = true # that is not entirely true, but computing exactly which indexes will be accessed in the parent requires a lot of duplication from the _getindex_ra code
+
 @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
     check_readable(a)
     check_ptr_indexable(a) && return _getindex_ptr(a, inds...)
@@ -847,8 +857,8 @@ end
     inpackedsize = packedsize(In)
     outpackedsize = packedsize(Out)
     inpackedsize == outpackedsize ||
-        throw(ArgumentError("Packed sizes of types $Out and $In do not match; got $outpackedsize \
-            and $inpackedsize, respectively."))
+        throw(ArgumentError(LazyString("Packed sizes of types ", Out, " and ", In,
+            " do not match; got ", outpackedsize, " and ", inpackedsize, ", respectively.")))
     in = Ref{In}(x)
     out = Ref{Out}()
     if struct_subpadding(Out, In)
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index 344858e76764a..019f1d30a25c2 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -39,7 +39,7 @@ eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedI
 # reshaping to same # of dimensions
 @eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
     throw_dmrsa(dims, len) =
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $len"))
+        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len"))
     len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
     if len != length(a)
         throw_dmrsa(dims, length(a))
@@ -51,9 +51,9 @@ eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedI
     ref = a.ref
     if M == 1 && N !== 1
         mem = ref.mem::Memory{T}
-        if !(ref === GenericMemoryRef(mem) && len === mem.length)
+        if !(ref === memoryref(mem) && len === mem.length)
             mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
-            ref = GenericMemoryRef(mem)::typeof(ref)
+            ref = memoryref(mem)::typeof(ref)
         end
     end
     # or we could use `a = Array{T,N}(undef, ntuple(0, Val(N))); a.ref = ref; a.size = dims; return a` here
@@ -132,12 +132,24 @@ reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(
         "may have at most one omitted dimension specified by `Colon()`")))
     @noinline throw2(A, dims) = throw(DimensionMismatch(string("array size $(length(A)) ",
         "must be divisible by the product of the new dimensions $dims")))
-    pre = _before_colon(dims...)
+    pre = _before_colon(dims...)::Tuple{Vararg{Int}}
     post = _after_colon(dims...)
     _any_colon(post...) && throw1(dims)
-    sz, remainder = divrem(length(A), prod(pre)*prod(post))
-    remainder == 0 || throw2(A, dims)
-    (pre..., Int(sz), post...)
+    post::Tuple{Vararg{Int}}
+    len = length(A)
+    sz, is_exact = if iszero(len)
+        (0, true)
+    else
+        let pr = Core.checked_dims(pre..., post...)  # safe product
+            if iszero(pr)
+                throw2(A, dims)
+            end
+            (quo, rem) = divrem(len, pr)
+            (Int(quo), iszero(rem))
+        end
+    end::Tuple{Int,Bool}
+    is_exact || throw2(A, dims)
+    (pre..., sz, post...)::Tuple{Int,Vararg{Int}}
 end
 @inline _any_colon() = false
 @inline _any_colon(dim::Colon, tail...) = true
@@ -225,6 +237,11 @@ elsize(::Type{<:ReshapedArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
 unaliascopy(A::ReshapedArray) = typeof(A)(unaliascopy(A.parent), A.dims, A.mi)
 dataids(A::ReshapedArray) = dataids(A.parent)
+# forward the aliasing check the parent in case there are specializations
+mightalias(A::ReshapedArray, B::ReshapedArray) = mightalias(parent(A), parent(B))
+# special handling for reshaped SubArrays that dispatches to the subarray aliasing check
+mightalias(A::ReshapedArray, B::SubArray) = mightalias(parent(A), B)
+mightalias(A::SubArray, B::ReshapedArray) = mightalias(A, parent(B))
 
 @inline ind2sub_rs(ax, ::Tuple{}, i::Int) = (i,)
 @inline ind2sub_rs(ax, strds, i) = _ind2sub_rs(ax, strds, i - 1)
@@ -238,7 +255,8 @@ offset_if_vec(i::Integer, axs::Tuple) = i
 
 @inline function isassigned(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(Bool, A, index) || return false
-    @inbounds ret = isassigned(parent(A), index)
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds ret = isassigned(parent(A), indexparent)
     ret
 end
 @inline function isassigned(A::ReshapedArray{T,N}, indices::Vararg{Int, N}) where {T,N}
@@ -251,7 +269,8 @@ end
 
 @inline function getindex(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(A, index)
-    @inbounds ret = parent(A)[index]
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds ret = parent(A)[indexparent]
     ret
 end
 @inline function getindex(A::ReshapedArray{T,N}, indices::Vararg{Int,N}) where {T,N}
@@ -275,7 +294,8 @@ end
 
 @inline function setindex!(A::ReshapedArrayLF, val, index::Int)
     @boundscheck checkbounds(A, index)
-    @inbounds parent(A)[index] = val
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds parent(A)[indexparent] = val
     val
 end
 @inline function setindex!(A::ReshapedArray{T,N}, val, indices::Vararg{Int,N}) where {T,N}
@@ -304,6 +324,7 @@ setindex!(A::ReshapedRange, val, index::ReshapedIndex) = _rs_setindex!_err()
 @noinline _rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
 
 cconvert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = cconvert(Ptr{T}, parent(a))
+unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, a.parent)
 
 # Add a few handy specializations to further speed up views of reshaped ranges
 const ReshapedUnitRange{T,N,A<:AbstractUnitRange} = ReshapedArray{T,N,A,Tuple{}}
diff --git a/base/rounding.jl b/base/rounding.jl
index d80edda1e418f..98b4c30822245 100644
--- a/base/rounding.jl
+++ b/base/rounding.jl
@@ -338,6 +338,10 @@ The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the defau
 of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
 results if the global rounding mode is changed (see [`rounding`](@ref)).
 
+When rounding to a floating point type, will round to integers representable by that type
+(and Inf) rather than true integers. Inf is treated as one ulp greater than the
+`floatmax(T)` for purposes of determining "nearest", similar to [`convert`](@ref).
+
 # Examples
 ```jldoctest
 julia> round(1.7)
@@ -363,6 +367,12 @@ julia> round(123.456; sigdigits=2)
 
 julia> round(357.913; sigdigits=4, base=2)
 352.0
+
+julia> round(Float16, typemax(UInt128))
+Inf16
+
+julia> floor(Float16, typemax(UInt128))
+Float16(6.55e4)
 ```
 
 !!! note
@@ -466,6 +476,7 @@ floor(::Type{T}, x) where T = round(T, x, RoundDown)
  ceil(::Type{T}, x) where T = round(T, x, RoundUp)
 round(::Type{T}, x) where T = round(T, x, RoundNearest)
 
-round(::Type{T}, x, r::RoundingMode) where T = convert(T, round(x, r))
+round(::Type{T}, x, r::RoundingMode) where T = _round_convert(T, round(x, r), x, r)
+_round_convert(::Type{T}, x_integer, x, r) where T = convert(T, x_integer)
 
 round(x::Integer, r::RoundingMode) = x
diff --git a/base/scopedvalues.jl b/base/scopedvalues.jl
index ee4571b8ae497..6ccd4687c5c65 100644
--- a/base/scopedvalues.jl
+++ b/base/scopedvalues.jl
@@ -3,6 +3,7 @@
 module ScopedValues
 
 export ScopedValue, with, @with
+public get
 
 """
     ScopedValue(x)
@@ -19,6 +20,8 @@ Dynamic scopes are propagated across tasks.
 # Examples
 
 ```jldoctest
+julia> using Base.ScopedValues;
+
 julia> const sval = ScopedValue(1);
 
 julia> sval[]
@@ -52,7 +55,22 @@ Base.eltype(::ScopedValue{T}) where {T} = T
 """
     isassigned(val::ScopedValue)
 
-Test whether a ScopedValue has an assigned value.
+Test whether a `ScopedValue` has an assigned value.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.@with`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(1); b = ScopedValue{Int}();
+
+julia> isassigned(a)
+true
+
+julia> isassigned(b)
+false
+```
 """
 function Base.isassigned(val::ScopedValue)
     val.has_default && return true
@@ -112,6 +130,21 @@ const novalue = NoValue()
 If the scoped value isn't set and doesn't have a default value,
 return `nothing`. Otherwise returns `Some{T}` with the current
 value.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.@with`](@ref), [`ScopedValues.ScopedValue`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(42); b = ScopedValue{Int}();
+
+julia> ScopedValues.get(a)
+Some(42)
+
+julia> isnothing(ScopedValues.get(b))
+true
+```
 """
 function get(val::ScopedValue{T}) where {T}
     scope = Core.current_scope()::Union{Scope, Nothing}
@@ -149,11 +182,32 @@ function Base.show(io::IO, val::ScopedValue)
 end
 
 """
-    @with vars... expr
+    @with (var::ScopedValue{T} => val)... expr
+
+Macro version of `with`. The expression `@with var=>val expr` evaluates `expr` in a
+new dynamic scope with `var` set to `val`. `val` will be converted to type `T`.
+`@with var=>val expr` is equivalent to `with(var=>val) do expr end`, but `@with`
+avoids creating a closure.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.ScopedValue`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
 
-Macro version of `with(f, vars...)` but with `expr` instead of `f` function.
-This is similar to using [`with`](@ref) with a `do` block, but avoids creating
-a closure.
+julia> const a = ScopedValue(1);
+
+julia> f(x) = a[] + x;
+
+julia> @with a=>2 f(10)
+12
+
+julia> @with a=>3 begin
+           x = 100
+           f(x)
+       end
+103
+```
 """
 macro with(exprs...)
     if length(exprs) > 1
@@ -170,9 +224,44 @@ macro with(exprs...)
 end
 
 """
-    with(f, (var::ScopedValue{T} => val::T)...)
+    with(f, (var::ScopedValue{T} => val)...)
+
+Execute `f` in a new dynamic scope with `var` set to `val`. `val` will be converted
+to type `T`.
+
+See also: [`ScopedValues.@with`](@ref), [`ScopedValues.ScopedValue`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(1);
 
-Execute `f` in a new scope with `var` set to `val`.
+julia> f(x) = a[] + x;
+
+julia> f(10)
+11
+
+julia> with(a=>2) do
+           f(10)
+       end
+12
+
+julia> f(10)
+11
+
+julia> b = ScopedValue(2);
+
+julia> g(x) = a[] + b[] + x;
+
+julia> with(a=>10, b=>20) do
+           g(30)
+       end
+60
+
+julia> with(() -> a[] * b[], a=>3, b=>4)
+12
+```
 """
 function with(f, pair::Pair{<:ScopedValue}, rest::Pair{<:ScopedValue}...)
     @with(pair, rest..., f())
diff --git a/base/shell.jl b/base/shell.jl
index 137150b585d86..e07fff128acfe 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -219,9 +219,9 @@ print_shell_escaped(io::IO; special::String="") = nothing
 """
     shell_escape(args::Union{Cmd,AbstractString...}; special::AbstractString="")
 
-The unexported `shell_escape` function is the inverse of the unexported `shell_split` function:
+The unexported `shell_escape` function is the inverse of the unexported [`Base.shell_split()`](@ref) function:
 it takes a string or command object and escapes any special characters in such a way that calling
-`shell_split` on it would give back the array of words in the original command. The `special`
+[`Base.shell_split()`](@ref) on it would give back the array of words in the original command. The `special`
 keyword argument controls what characters in addition to whitespace, backslashes, quotes and
 dollar signs are considered to be special (default: none).
 
@@ -284,6 +284,8 @@ The unexported `shell_escape_posixly` function
 takes a string or command object and escapes any special characters in such a way that
 it is safe to pass it as an argument to a posix shell.
 
+See also: [`Base.shell_escape()`](@ref)
+
 # Examples
 ```jldoctest
 julia> Base.shell_escape_posixly("cat", "/foo/bar baz", "&&", "echo", "done")
@@ -316,7 +318,7 @@ a backslash.
 This function should also work for a POSIX shell, except if the input
 string contains a linefeed (`"\\n"`) character.
 
-See also: [`shell_escape_posixly`](@ref)
+See also: [`Base.shell_escape_posixly()`](@ref)
 """
 function shell_escape_csh(io::IO, args::AbstractString...)
     first = true
@@ -414,7 +416,7 @@ run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
 With an I/O stream parameter `io`, the result will be written there,
 rather than returned as a string.
 
-See also [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref).
+See also [`Base.escape_microsoft_c_args()`](@ref), [`Base.shell_escape_posixly()`](@ref).
 
 # Examples
 ```jldoctest
@@ -468,7 +470,7 @@ It joins command-line arguments to be passed to a Windows
 C/C++/Julia application into a command line, escaping or quoting the
 meta characters space, TAB, double quote and backslash where needed.
 
-See also [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref).
+See also [`Base.shell_escape_wincmd()`](@ref), [`Base.escape_raw_string()`](@ref).
 """
 function escape_microsoft_c_args(io::IO, args::AbstractString...)
     # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
diff --git a/base/show.jl b/base/show.jl
index df429830b16fd..ec6776d81f2d5 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -514,24 +514,16 @@ function _show_default(io::IO, @nospecialize(x))
 end
 
 function active_module()
-    isassigned(REPL_MODULE_REF) || return Main
-    REPL = REPL_MODULE_REF[]
-    return invokelatest(REPL.active_module)::Module
+    if ccall(:jl_is_in_pure_context, Bool, ())
+        error("active_module() should not be called from a pure context")
+    end
+    if !@isdefined(active_repl) || active_repl === nothing
+        return Main
+    end
+    return invokelatest(active_module, active_repl)::Module
 end
 
-# Check if a particular symbol is exported from a standard library module
-function is_exported_from_stdlib(name::Symbol, mod::Module)
-    !isdefined(mod, name) && return false
-    orig = getfield(mod, name)
-    while !(mod === Base || mod === Core)
-        activemod = active_module()
-        parent = parentmodule(mod)
-        if mod === activemod || mod === parent || parent === activemod
-            return false
-        end
-        mod = parent
-    end
-    return isexported(mod, name) && isdefined(mod, name) && !isdeprecated(mod, name) && getfield(mod, name) === orig
+module UsesCoreAndBaseOnly
 end
 
 function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
@@ -544,13 +536,13 @@ function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
         print(io, mt.name)
     elseif isdefined(mt, :module) && isdefined(mt.module, mt.name) &&
         getfield(mt.module, mt.name) === f
-        mod = active_module()
-        if is_exported_from_stdlib(mt.name, mt.module) || mt.module === mod
-            show_sym(io, mt.name)
-        else
+        # this used to call the removed internal function `is_exported_from_stdlib`, which effectively
+        # just checked for exports from Core and Base.
+        mod = get(io, :module, UsesCoreAndBaseOnly)
+        if !(isvisible(mt.name, mt.module, mod) || mt.module === mod)
             print(io, mt.module, ".")
-            show_sym(io, mt.name)
         end
+        show_sym(io, mt.name)
     else
         fallback(io, f)
     end
@@ -681,7 +673,7 @@ function show_can_elide(p::TypeVar, wheres::Vector, elide::Int, env::SimpleVecto
         has_typevar(v.lb, p) && return false
         has_typevar(v.ub, p) && return false
     end
-    for i = 1:length(env)
+    for i = eachindex(env)
         i == skip && continue
         has_typevar(env[i], p) && return false
     end
@@ -737,9 +729,9 @@ end
 function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, wheres::Vector)
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless alias is visible from module passed to
-        # IOContext. If :module is not set, default to Main (or current active module).
+        # IOContext. If :module is not set, default to Main.
         # nothing can be used to force printing prefix.
-        from = get(io, :module, active_module())
+        from = get(io, :module, Main)
         if (from === nothing || !isvisible(name.name, name.mod, from))
             show(io, name.mod)
             print(io, ".")
@@ -1053,10 +1045,10 @@ function show_type_name(io::IO, tn::Core.TypeName)
     quo = false
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless type is visible from module passed to
-        # IOContext If :module is not set, default to Main (or current active module).
+        # IOContext If :module is not set, default to Main.
         # nothing can be used to force printing prefix
-        from = get(io, :module, active_module())
-        if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from))
+        from = get(io, :module, Main)
+        if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from::Module))
             show(io, tn.module)
             print(io, ".")
             if globfunc && !is_id_start_char(first(string(sym)))
@@ -1188,11 +1180,11 @@ end
 
 function show_at_namedtuple(io::IO, syms::Tuple, types::DataType)
     first = true
-    for i in 1:length(syms)
+    for i in eachindex(syms)
         if !first
             print(io, ", ")
         end
-        print(io, syms[i])
+        show_sym(io, syms[i])
         typ = types.parameters[i]
         if typ !== Any
             print(io, "::")
@@ -1253,8 +1245,6 @@ show(io::IO, n::Signed) = (write(io, string(n)); nothing)
 show(io::IO, n::Unsigned) = print(io, "0x", string(n, pad = sizeof(n)<<1, base = 16))
 print(io::IO, n::Unsigned) = print(io, string(n))
 
-show(io::IO, p::Ptr) = print(io, typeof(p), " @0x$(string(UInt(p), base = 16, pad = Sys.WORD_SIZE>>2))")
-
 has_tight_type(p::Pair) =
     typeof(p.first)  == typeof(p).parameters[1] &&
     typeof(p.second) == typeof(p).parameters[2]
@@ -1336,15 +1326,15 @@ end
 
 show(io::IO, l::Core.MethodInstance) = show_mi(io, l)
 
-function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
-    def = l.def
+function show_mi(io::IO, mi::Core.MethodInstance, from_stackframe::Bool=false)
+    def = mi.def
     if isa(def, Method)
-        if isdefined(def, :generator) && l === def.generator
+        if isdefined(def, :generator) && mi === def.generator
             print(io, "MethodInstance generator for ")
             show(io, def)
         else
             print(io, "MethodInstance for ")
-            show_tuple_as_call(io, def.name, l.specTypes; qualified=true)
+            show_tuple_as_call(io, def.name, mi.specTypes; qualified=true)
         end
     else
         print(io, "Toplevel MethodInstance thunk")
@@ -1352,10 +1342,15 @@ function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
         # MethodInstance is part of a stacktrace, it gets location info
         # added by other means.  But if it isn't, then we should try
         # to print a little more identifying information.
-        if !from_stackframe
-            linetable = l.uninferred.linetable
-            line = isempty(linetable) ? "unknown" : (lt = linetable[1]::Union{LineNumberNode,Core.LineInfoNode}; string(lt.file, ':', lt.line))
-            print(io, " from ", def, " starting at ", line)
+        if !from_stackframe && isdefined(mi, :cache)
+            ci = mi.cache
+            if ci.owner === :uninferred
+                di = ci.inferred.debuginfo
+                file, line = IRShow.debuginfo_firstline(di)
+                file = string(file)
+                line = isempty(file) || line < 0 ? "<unknown>" : "$file:$line"
+                print(io, " from ", def, " starting at ", line)
+            end
         end
     end
 end
@@ -1377,8 +1372,10 @@ function show(io::IO, mi_info::Core.Compiler.Timings.InferenceFrameInfo)
             show_tuple_as_call(io, def.name, mi.specTypes; argnames, qualified=true)
         end
     else
-        linetable = mi.uninferred.linetable
-        line = isempty(linetable) ? "" : (lt = linetable[1]; string(lt.file, ':', lt.line))
+        di = mi.cache.inferred.debuginfo
+        file, line = IRShow.debuginfo_firstline(di)
+        file = string(file)
+        line = isempty(file) || line < 0 ? "<unknown>" : "$file:$line"
         print(io, "Toplevel InferenceFrameInfo thunk from ", def, " starting at ", line)
     end
 end
@@ -1402,11 +1399,11 @@ function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, de
                     x = itr[i]
                     show(recur_io, x)
                 end
-                i += 1
-                if i > l
+                if i == l
                     delim_one && first && print(io, delim)
                     break
                 end
+                i += 1
                 first = false
                 print(io, delim)
                 print(io, ' ')
@@ -2199,8 +2196,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :do && nargs == 2
         iob = IOContext(io, beginsym=>false)
         show_unquoted(iob, args[1], indent, -1, quote_level)
-        print(io, " do ")
-        show_list(iob, (((args[2]::Expr).args[1])::Expr).args, ", ", 0, 0, quote_level)
+        print(io, " do")
+        do_args = (((args[2]::Expr).args[1])::Expr).args
+        if !isempty(do_args)
+            print(io, ' ')
+            show_list(iob, do_args, ", ", 0, 0, quote_level)
+        end
         for stmt in (((args[2]::Expr).args[2])::Expr).args
             print(io, '\n', " "^(indent + indent_width))
             show_unquoted(iob, stmt, indent + indent_width, -1, quote_level)
@@ -2377,7 +2378,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         if get(io, beginsym, false)
             print(io, '(')
             ind = indent + indent_width
-            for i = 1:length(ex.args)
+            for i = eachindex(ex.args)
                 if i > 1
                     # if there was only a comment before the first semicolon, the expression would get parsed as a NamedTuple
                     if !(i == 2 && ex.args[1] isa LineNumberNode)
@@ -2500,6 +2501,11 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :meta && nargs == 2 && args[1] === :pop_loc
         print(io, "# meta: pop locations ($(args[2]::Int))")
     # print anything else as "Expr(head, args...)"
+    elseif head === :toplevel
+        # Reset SOURCE_SLOTNAMES. Raw SlotNumbers are not valid in Expr(:toplevel), but
+        # we want to show bad ASTs reasonably to make errors understandable.
+        lambda_io = IOContext(io, :SOURCE_SLOTNAMES => false)
+        show_unquoted_expr_fallback(lambda_io, ex, indent, quote_level)
     else
         unhandled = true
     end
@@ -2525,7 +2531,7 @@ function show_signature_function(io::IO, @nospecialize(ft), demangle=false, farg
     uw = unwrap_unionall(ft)
     if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) && _isself(uw)
         uwmod = parentmodule(uw)
-        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uwmod) && uwmod !== Main
+        if qualified && !isexported(uwmod, uw.name.mt.name) && uwmod !== Main
             print_within_stacktrace(io, uwmod, '.', bold=true)
         end
         s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.mt.name), context=io)
@@ -2820,7 +2826,7 @@ module IRShow
     import ..Base
     import .Compiler: IRCode, CFG, scan_ssa_use!,
         isexpr, compute_basic_blocks, block_for_inst, IncrementalCompact,
-        Effects, ALWAYS_TRUE, ALWAYS_FALSE
+        Effects, ALWAYS_TRUE, ALWAYS_FALSE, DebugInfoStream, getdebugidx
     Base.getindex(r::Compiler.StmtRange, ind::Integer) = Compiler.getindex(r, ind)
     Base.size(r::Compiler.StmtRange) = Compiler.size(r)
     Base.first(r::Compiler.StmtRange) = Compiler.first(r)
@@ -2833,9 +2839,9 @@ module IRShow
     include("compiler/ssair/show.jl")
 
     const __debuginfo = Dict{Symbol, Any}(
-        # :full => src -> Base.IRShow.statementidx_lineinfo_printer(src), # and add variable slot information
-        :source => src -> Base.IRShow.statementidx_lineinfo_printer(src),
-        # :oneliner => src -> Base.IRShow.statementidx_lineinfo_printer(Base.IRShow.PartialLineInfoPrinter, src),
+        # :full => src -> statementidx_lineinfo_printer(src), # and add variable slot information
+        :source => src -> statementidx_lineinfo_printer(src),
+        # :oneliner => src -> statementidx_lineinfo_printer(PartialLineInfoPrinter, src),
         :none => src -> Base.IRShow.lineinfo_disabled,
         )
     const default_debuginfo = Ref{Symbol}(:none)
@@ -2849,18 +2855,11 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
     if src.slotnames !== nothing
         lambda_io = IOContext(lambda_io, :SOURCE_SLOTNAMES => sourceinfo_slotnames(src))
     end
-    if isempty(src.linetable) || src.linetable[1] isa LineInfoNode
-        println(io)
-        # TODO: static parameter values?
-        # only accepts :source or :none, we can't have a fallback for default since
-        # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
-        IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
-    else
-        # this is a CodeInfo that has not been used as a method yet, so its locations are still LineNumberNodes
-        body = Expr(:block)
-        body.args = src.code
-        show(lambda_io, body)
-    end
+    println(io)
+    # TODO: static parameter values?
+    # only accepts :source or :none, we can't have a fallback for default since
+    # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
+    IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
     print(io, ")")
 end
 
@@ -2888,6 +2887,12 @@ show(io::IO, ::Core.Compiler.NativeInterpreter) =
 show(io::IO, cache::Core.Compiler.CachedMethodTable) =
     print(io, typeof(cache), "(", Core.Compiler.length(cache.cache), " entries)")
 
+function show(io::IO, limited::Core.Compiler.LimitedAccuracy)
+    print(io, "Core.Compiler.LimitedAccuracy(")
+    show(io, limited.typ)
+    print(io, ", #= ", Core.Compiler.length(limited.causes), " cause(s) =#)")
+end
+
 function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     if isempty(x)
         print(io, "empty SimpleVector")
@@ -2895,7 +2900,7 @@ function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     end
     print(io, "SimpleVector")
     if n > 0
-        for i = 1:length(x)
+        for i in eachindex(x)
             println(io)
             print(io, indent, "  ", i, ": ")
             if isassigned(x,i)
@@ -3012,7 +3017,7 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
         end
         fields = fieldnames(x)
         fieldtypes = datatype_fieldtypes(x)
-        for idx in 1:length(fields)
+        for idx in eachindex(fields)
             println(io)
             print(io, indent, "  ")
             is_mut && isconst(x, idx) && print(io, "const ")
@@ -3326,8 +3331,8 @@ bitstring(B::BitArray) = sprint(bitshow, B)
 function show(io::IO, oc::Core.OpaqueClosure)
     A, R = typeof(oc).parameters
     show_tuple_as_call(io, Symbol(""), A; hasfirst=false)
-    print(io, "::", R)
     print(io, "->◌")
+    print(io, "::", R)
 end
 
 function show(io::IO, ::MIME"text/plain", oc::Core.OpaqueClosure{A, R}) where {A, R}
diff --git a/base/some.jl b/base/some.jl
index 46b912243f859..7d7089bf76655 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -166,3 +166,8 @@ macro something(args...)
     end
     return expr
 end
+
+==(a::Some, b::Some) = a.value == b.value
+isequal(a::Some, b::Some)::Bool = isequal(a.value, b.value)
+const hash_some_seed = UInt == UInt64 ? 0xde5c997007a4ca3a : 0x78c29c09
+hash(s::Some, h::UInt) = hash(s.value, hash_some_seed + h)
diff --git a/base/sort.jl b/base/sort.jl
index eb57da376f4ab..ef0f208209fc8 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -62,7 +62,7 @@ function issorted(itr, order::Ordering)
 end
 
 """
-    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Test whether a collection is in sorted order. The keywords modify what
 order is considered sorted, as described in the [`sort!`](@ref) documentation.
@@ -85,9 +85,16 @@ julia> issorted([1, 2, -2, 3], by=abs)
 true
 ```
 """
-issorted(itr;
-    lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) =
-    issorted(itr, ord(lt,by,rev,order))
+function issorted(itr;
+        lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward)
+    # Explicit branching because the compiler can't optimize away the
+    # type instability of the `ord` call with Bool `rev` parameter.
+    if rev === true
+        issorted(itr, ord(lt, by, true, order))
+    else
+        issorted(itr, ord(lt, by, nothing, order))
+    end
+end
 
 function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
     # TODO move k from `alg` to `kw`
@@ -514,7 +521,7 @@ end
 ## sorting algorithm components ##
 
 """
-    _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset)
+    _sort!(v::AbstractVector, a::Base.Sort.Algorithm, o::Base.Order.Ordering, kw; t, offset)
 
 An internal function that sorts `v` using the algorithm `a` under the ordering `o`,
 subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only
@@ -528,7 +535,7 @@ no scratch space is present.
 
 A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There
 are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space
-may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
+may be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
 
 `t` is an appropriate scratch space for the algorithm at hand, to be accessed as
 `t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in
@@ -539,7 +546,7 @@ function _sort! end
 # TODO: delete this optimization when views have no overhead.
 const UnwrappableSubArray = SubArray{T, 1, <:AbstractArray{T}, <:Tuple{AbstractUnitRange, Vararg{Number}}, true} where T
 """
-    SubArrayOptimization(next) <: Algorithm
+    SubArrayOptimization(next) isa Base.Sort.Algorithm
 
 Unwrap certain known SubArrays because views have a performance overhead 😢
 
@@ -566,7 +573,7 @@ function _sort!(v::UnwrappableSubArray, a::SubArrayOptimization, o::Ordering, kw
 end
 
 """
-    MissingOptimization(next) <: Algorithm
+    MissingOptimization(next) isa Base.Sort.Algorithm
 
 Filter out missing values.
 
@@ -625,7 +632,7 @@ function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)
     i - 1
 end
 """
-    send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi)
+    send_to_end!(f::Function, v::AbstractVector, o::Base.Order.DirectOrdering[, end_stable]; lo, hi)
 
 Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end.
 
@@ -679,7 +686,7 @@ end
 
 
 """
-    IEEEFloatOptimization(next) <: Algorithm
+    IEEEFloatOptimization(next) isa Base.Sort.Algorithm
 
 Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers.
 
@@ -724,7 +731,7 @@ end
 
 
 """
-    BoolOptimization(next) <: Algorithm
+    BoolOptimization(next) isa Base.Sort.Algorithm
 
 Sort `AbstractVector{Bool}`s using a specialized version of counting sort.
 
@@ -751,7 +758,7 @@ end
 
 
 """
-    IsUIntMappable(yes, no) <: Algorithm
+    IsUIntMappable(yes, no) isa Base.Sort.Algorithm
 
 Determines if the elements of a vector can be mapped to unsigned integers while preserving
 their order under the specified ordering.
@@ -773,7 +780,7 @@ end
 
 
 """
-    Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm
+    Small{N}(small=SMALL_ALGORITHM, big) isa Base.Sort.Algorithm
 
 Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big`
 algorithm.
@@ -812,6 +819,16 @@ Characteristics:
 it is well-suited to small collections but should not be used for large ones.
 """
 const InsertionSort = InsertionSortAlg()
+
+"""
+    SMALL_ALGORITHM
+
+Default sorting algorithm for small arrays.
+
+This is an alias for a simple low-overhead algorithm that does not scale well
+to large arrays, unlike high-overhead recursive algorithms used for larger arrays.
+`SMALL_ALGORITHM` is a good choice for the base case of a recursive algorithm.
+"""
 const SMALL_ALGORITHM = InsertionSortAlg()
 
 function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw)
@@ -835,7 +852,7 @@ end
 
 
 """
-    CheckSorted(next) <: Algorithm
+    CheckSorted(next) isa Base.Sort.Algorithm
 
 Check if the input is already sorted and for large inputs, also check if it is
 reverse-sorted. The reverse-sorted check is unstable.
@@ -862,7 +879,7 @@ end
 
 
 """
-    ComputeExtrema(next) <: Algorithm
+    ComputeExtrema(next) isa Base.Sort.Algorithm
 
 Compute the extrema of the input under the provided order.
 
@@ -888,7 +905,7 @@ end
 
 
 """
-    ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm
+    ConsiderCountingSort(counting=CountingSort(), next) isa Base.Sort.Algorithm
 
 If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to
 the `next` algorithm.
@@ -916,7 +933,7 @@ _sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v,
 
 
 """
-    CountingSort <: Algorithm
+    CountingSort() isa Base.Sort.Algorithm
 
 Use the counting sort algorithm.
 
@@ -952,7 +969,7 @@ end
 
 
 """
-    ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm
+    ConsiderRadixSort(radix=RadixSort(), next) isa Base.Sort.Algorithm
 
 If the number of bits in the input's range is small enough and the input supports efficient
 bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm.
@@ -975,7 +992,7 @@ end
 
 
 """
-    RadixSort <: Algorithm
+    RadixSort() isa Base.Sort.Algorithm
 
 Use the radix sort algorithm.
 
@@ -1030,8 +1047,8 @@ end
 
 
 """
-    ScratchQuickSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm
-    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm
+    ScratchQuickSort(next::Base.Sort.Algorithm=Base.Sort.SMALL_ALGORITHM) isa Base.Sort.Algorithm
+    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Base.Sort.Algorithm=Base.Sort.SMALL_ALGORITHM) isa Base.Sort.Algorithm
 
 Use the `ScratchQuickSort` algorithm with the `next` algorithm as a base case.
 
@@ -1147,7 +1164,7 @@ end
 
 
 """
-    BracketedSort(target[, next::Algorithm]) <: Algorithm
+    BracketedSort(target[, next::Algorithm]) isa Base.Sort.Algorithm
 
 Perform a partialsort for the elements that fall into the indices specified by the `target`
 using BracketedSort with the `next` algorithm for subproblems.
@@ -1336,7 +1353,7 @@ end
 
 
 """
-    StableCheckSorted(next) <: Algorithm
+    StableCheckSorted(next) isa Base.Sort.Algorithm
 
 Check if an input is sorted and/or reverse-sorted.
 
@@ -1436,7 +1453,7 @@ end
 ## default sorting policy ##
 
 """
-    InitialOptimizations(next) <: Algorithm
+    InitialOptimizations(next) isa Base.Sort.Algorithm
 
 Attempt to apply a suite of low-cost optimizations to the input vector before sorting. These
 optimizations may be automatically applied by the `sort!` family of functions when
@@ -1583,7 +1600,7 @@ defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
 defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
 
 """
-    sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort!(v; alg::Base.Sort.Algorithm=Base.Sort.defalg(v), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort the vector `v` in place. A stable algorithm is used by default: the
 ordering of elements that compare equal is preserved. A specific algorithm can
@@ -1696,7 +1713,7 @@ function sort!(v::AbstractVector{T};
 end
 
 """
-    sort(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(v; alg::Base.Sort.Algorithm=Base.Sort.defalg(v), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Variant of [`sort!`](@ref) that returns a sorted copy of `v` leaving `v` itself unmodified.
 
@@ -1722,7 +1739,7 @@ sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...)
 ## partialsortperm: the permutation to sort the first k elements of an array ##
 
 """
-    partialsortperm(v, k; by=ientity, lt=isless, rev=false)
+    partialsortperm(v, k; by=identity, lt=isless, rev=false)
 
 Return a partial permutation `I` of the vector `v`, so that `v[I]` returns values of a fully
 sorted version of `v` at index `k`. If `k` is a range, a vector of indices is returned; if
@@ -1819,7 +1836,7 @@ end
 ## sortperm: the permutation to sort an array ##
 
 """
-    sortperm(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+    sortperm(A; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward, [dims::Integer])
 
 Return a permutation vector or array `I` that puts `A[I]` in sorted order along the given dimension.
 If `A` has more than one dimension, then the `dims` keyword argument must be specified. The order is specified
@@ -1897,7 +1914,7 @@ end
 
 
 """
-    sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+    sortperm!(ix, A; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward, [dims::Integer])
 
 Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`.
 `ix` is initialized to contain the values `LinearIndices(A)`.
@@ -1985,7 +2002,7 @@ end
 ## sorting multi-dimensional arrays ##
 
 """
-    sort(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(A; dims::Integer, alg::Base.Sort.Algorithm=Base.Sort.defalg(A), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort a multidimensional array `A` along the given dimension.
 See [`sort!`](@ref) for a description of possible
@@ -2057,7 +2074,7 @@ end
 
 
 """
-    sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort!(A; dims::Integer, alg::Base.Sort.Algorithm=Base.Sort.defalg(A), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort the multidimensional array `A` along dimension `dims`.
 See the one-dimensional version of [`sort!`](@ref) for a description of
@@ -2129,7 +2146,7 @@ get_value(::Val{x}) where x = x
 ## uint mapping to allow radix sorting primitives other than UInts ##
 
 """
-    UIntMappable(T::Type, order::Ordering)
+    UIntMappable(T::Type, order::Base.Order.Ordering)
 
 Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and
 [`uint_unmap`](@ref) are implemented.
@@ -2139,7 +2156,7 @@ If either is not implemented, return `nothing`.
 UIntMappable(T::Type, order::Ordering) = nothing
 
 """
-    uint_map(x, order::Ordering)::Unsigned
+    uint_map(x, order::Base.Order.Ordering)::Unsigned
 
 Map `x` to an un unsigned integer, maintaining sort order.
 
@@ -2153,7 +2170,7 @@ See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref)
 function uint_map end
 
 """
-    uint_unmap(T::Type, u::Unsigned, order::Ordering)
+    uint_unmap(T::Type, u::Unsigned, order::Base.Order.Ordering)
 
 Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies
 `x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`.
@@ -2461,7 +2478,7 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
     @getkw lo hi scratch legacy_dispatch_entry
     if legacy_dispatch_entry === a
         # This error prevents infinite recursion for unknown algorithms
-        throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined"))
+        throw(ArgumentError(LazyString("Base.Sort._sort!(::", typeof(v), ", ::", typeof(a), ", ::", typeof(o), ", ::Any) is not defined")))
     else
         sort!(v, lo, hi, a, o)
         scratch
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 8e940a4d85ad9..312197339a086 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -250,7 +250,7 @@ end
             twopk = (k + UInt64(53)) << 52
             return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
         end
-        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+        k == 1024 && return (small_part * 2.0) * 2.0^1023
     end
     twopk = Int64(k) << 52
     return reinterpret(T, twopk + reinterpret(Int64, small_part))
diff --git a/base/special/trig.jl b/base/special/trig.jl
index 15ecb0ca0492f..66e4b46d7d489 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -165,7 +165,7 @@ end
 @noinline sincos_domain_error(x) = throw(DomainError(x, "sincos(x) is only defined for finite x."))
 
 """
-    sincos(x::T) where T -> float(T)
+    sincos(x::T) where T -> Tuple{float(T),float(T)}
 
 Simultaneously compute the sine and cosine of `x`, where `x` is in radians, returning
 a tuple `(sine, cosine)`.
@@ -850,7 +850,7 @@ function cospi(x::T) where T<:IEEEFloat
     end
 end
 """
-    sincospi(x::T) where T -> float(T)
+    sincospi(x::T) where T -> Tuple{float(T),float(T)}
 
 Simultaneously compute [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) (the sine and cosine of `π*x`,
 where `x` is in radians), returning a tuple `(sine, cosine)`.
@@ -1266,9 +1266,10 @@ end
 tand(x::Real) = sind(x) / cosd(x)
 
 """
-    sincosd(x::T) where T -> float(T)
+    sincosd(x::T) where T -> Tuple{float(T),float(T)}
 
-Simultaneously compute the sine and cosine of `x`, where `x` is in degrees.
+Simultaneously compute the sine and cosine of `x`, where `x` is in degrees, returning
+a tuple `(sine, cosine)`.
 
 Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` tuple if `isnan(x)`.
 
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index e82fd8118b2d7..102e415a22de2 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -8,6 +8,7 @@ module StackTraces
 
 import Base: hash, ==, show
 import Core: CodeInfo, MethodInstance
+using Base.IRShow: normalize_method_name, append_scopes!, LineInfoNode
 
 export StackTrace, StackFrame, stacktrace
 
@@ -20,10 +21,10 @@ Stack information representing execution context, with the following fields:
 
   The name of the function containing the execution context.
 
-- `linfo::Union{Core.MethodInstance, Method, Module, Core.CodeInfo, Nothing}`
+- `linfo::Union{Method, Core.MethodInstance, Core.CodeInfo, Nothing}`
 
-  The MethodInstance or CodeInfo containing the execution context (if it could be found), \
-     or Module (for macro expansions)"
+  The Method, MethodInstance, or CodeInfo containing the execution context (if it could be found), \
+     or nothing (for example, if the inlining was a result of macro expansion).
 
 - `file::Symbol`
 
@@ -54,8 +55,8 @@ struct StackFrame # this type should be kept platform-agnostic so that profiles
     "the line number in the file containing the execution context"
     line::Int
     "the MethodInstance or CodeInfo containing the execution context (if it could be found), \
-     or Module (for macro expansions)"
-    linfo::Union{MethodInstance, Method, Module, CodeInfo, Nothing}
+     or nothing (for example, if the inlining was a result of macro expansion)."
+    linfo::Union{MethodInstance, Method, CodeInfo, Nothing}
     "true if the code is from C"
     from_c::Bool
     "true if the code is from an inlined frame"
@@ -112,7 +113,11 @@ Base.@constprop :none function lookup(pointer::Ptr{Cvoid})
     for i in 1:length(infos)
         info = infos[i]::Core.SimpleVector
         @assert(length(info) == 6)
-        res[i] = StackFrame(info[1]::Symbol, info[2]::Symbol, info[3]::Int, info[4], info[5]::Bool, info[6]::Bool, pointer)
+        func = info[1]::Symbol
+        file = info[2]::Symbol
+        linenum = info[3]::Int
+        linfo = info[4]
+        res[i] = StackFrame(func, file, linenum, linfo, info[5]::Bool, info[6]::Bool, pointer)
     end
     return res
 end
@@ -125,28 +130,40 @@ function lookup(ip::Union{Base.InterpreterIP,Core.Compiler.InterpreterIP})
         # interpreted top-level expression with no CodeInfo
         return [StackFrame(top_level_scope_sym, empty_sym, 0, nothing, false, false, 0)]
     end
-    codeinfo = (code isa MethodInstance ? code.uninferred : code)::CodeInfo
     # prepare approximate code info
     if code isa MethodInstance && (meth = code.def; meth isa Method)
         func = meth.name
         file = meth.file
         line = meth.line
+        codeinfo = meth.source
     else
+        if code isa Core.CodeInstance
+            codeinfo = code.inferred::CodeInfo
+        else
+            codeinfo = code::CodeInfo
+        end
         func = top_level_scope_sym
         file = empty_sym
         line = Int32(0)
     end
-    i = max(ip.stmt+1, 1)  # ip.stmt is 0-indexed
-    if i > length(codeinfo.codelocs) || codeinfo.codelocs[i] == 0
+    def = (code isa MethodInstance ? code : StackTraces) # Module just used as a token for top-level code
+    pc::Int = max(ip.stmt + 1, 0) # n.b. ip.stmt is 0-indexed
+    scopes = LineInfoNode[]
+    append_scopes!(scopes, pc, codeinfo.debuginfo, def)
+    if isempty(scopes)
         return [StackFrame(func, file, line, code, false, false, 0)]
     end
-    lineinfo = codeinfo.linetable[codeinfo.codelocs[i]]::Core.LineInfoNode
-    scopes = StackFrame[]
-    while true
-        inlined = lineinfo.inlined_at != 0
-        push!(scopes, StackFrame(Base.IRShow.method_name(lineinfo)::Symbol, lineinfo.file, lineinfo.line, inlined ? nothing : code, false, inlined, 0))
-        inlined || break
-        lineinfo = codeinfo.linetable[lineinfo.inlined_at]::Core.LineInfoNode
+    inlined = false
+    scopes = map(scopes) do lno
+        if inlined
+            def = lno.method
+            def isa Union{Method,MethodInstance} || (def = nothing)
+        else
+            def = codeinfo
+        end
+        sf = StackFrame(normalize_method_name(lno.method), lno.file, lno.line, def, false, inlined, 0)
+        inlined = true
+        return sf
     end
     return scopes
 end
diff --git a/base/stat.jl b/base/stat.jl
index 3931b9d1f3583..506b5644dccbc 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -66,7 +66,7 @@ struct StatStruct
 end
 
 @eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
-  $(let ex = true
+    $(let ex = true
         for fld in fieldnames(StatStruct)[2:end]
             ex = :(getfield(x, $(QuoteNode(fld))) === getfield(y, $(QuoteNode(fld))) && $ex)
         end
@@ -74,7 +74,7 @@ end
     end)
 end
 @eval function Base.hash(obj::StatStruct, h::UInt)
-  $(quote
+    $(quote
         $(Any[:(h = hash(getfield(obj, $(QuoteNode(fld))), h)) for fld in fieldnames(StatStruct)[2:end]]...)
         return h
     end)
@@ -184,15 +184,21 @@ macro stat_call(sym, arg1type, arg)
 end
 
 stat(fd::OS_HANDLE)         = @stat_call jl_fstat OS_HANDLE fd
-stat(path::AbstractString)  = @stat_call jl_stat  Cstring path
-lstat(path::AbstractString) = @stat_call jl_lstat Cstring path
+function stat(path::AbstractString)
+    # @info "stat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_stat  Cstring path
+end
+function lstat(path::AbstractString)
+    # @info "lstat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_lstat Cstring path
+end
 if RawFD !== OS_HANDLE
     global stat(fd::RawFD)  = stat(Libc._get_osfhandle(fd))
 end
-stat(fd::Integer)           = stat(RawFD(fd))
 
 """
     stat(file)
+    stat(joinpath...)
 
 Return a structure whose fields contain information about the file.
 The fields of the structure are:
@@ -213,16 +219,19 @@ The fields of the structure are:
 | mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
 | ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
 """
+stat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("stat not implemented for $(typeof(path))") : stat(path2))
 stat(path...) = stat(joinpath(path...))
 
 """
     lstat(file)
+    lstat(joinpath...)
 
 Like [`stat`](@ref), but for symbolic links gets the info for the link
 itself rather than the file it refers to.
 This function must be called on a file path rather than a file object or a file
 descriptor.
 """
+lstat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("lstat not implemented for $(typeof(path))") : lstat(path2))
 lstat(path...) = lstat(joinpath(path...))
 
 # some convenience functions
diff --git a/base/stream.jl b/base/stream.jl
index feb5ad777fc11..93aeead79eb9c 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -122,7 +122,7 @@ const DEFAULT_READ_BUFFER_SZ = 10485760 # 10 MB
 if Sys.iswindows()
     const MAX_OS_WRITE = UInt(0x1FF0_0000) # 511 MB (determined semi-empirically, limited to 31 MB on XP)
 else
-    const MAX_OS_WRITE = UInt(typemax(Csize_t))
+    const MAX_OS_WRITE = UInt(0x7FFF_0000) # almost 2 GB (both macOS and linux have this kernel restriction, although only macOS documents it)
 end
 
 
@@ -453,14 +453,16 @@ function closewrite(s::LibuvStream)
     sigatomic_begin()
     uv_req_set_data(req, ct)
     iolock_end()
-    status = try
+    local status
+    try
         sigatomic_end()
-        wait()::Cint
+        status = wait()::Cint
+        sigatomic_begin()
     finally
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we won't get spurious notifications later
@@ -1062,17 +1064,19 @@ function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
     sigatomic_begin()
     uv_req_set_data(uvw, ct)
     iolock_end()
-    status = try
+    local status
+    try
         sigatomic_end()
         # wait for the last chunk to complete (or error)
         # assume that any errors would be sticky,
         # (so we don't need to monitor the error status of the intermediate writes)
-        wait()::Cint
+        status = wait()::Cint
+        sigatomic_begin()
     finally
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -1601,7 +1605,7 @@ end
 
 skip(s::BufferStream, n) = skip(s.buffer, n)
 
-function reseteof(x::BufferStream)
+function reseteof(s::BufferStream)
     lock(s.cond) do
         s.status = StatusOpen
         nothing
diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
index a85cdf1b08bbb..be4c6887d4a6d 100644
--- a/base/strings/annotated.jl
+++ b/base/strings/annotated.jl
@@ -25,6 +25,17 @@ and a value (`Any`), paired together as a `Pair{Symbol, <:Any}`.
 Labels do not need to be unique, the same region can hold multiple annotations
 with the same label.
 
+Code written for `AnnotatedString`s in general should conserve the following
+properties:
+- Which characters an annotation is applied to
+- The order in which annotations are applied to each character
+
+Additional semantics may be introduced by specific uses of `AnnotatedString`s.
+
+A corollary of these rules is that adjacent, consecutively placed, annotations
+with identical labels and values are equivalent to a single annotation spanning
+the combined range.
+
 See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref),
 [`annotations`](@ref), and [`annotate!`](@ref).
 
@@ -153,12 +164,28 @@ lastindex(s::AnnotatedString) = lastindex(s.string)
 function getindex(s::AnnotatedString, i::Integer)
     @boundscheck checkbounds(s, i)
     @inbounds if isvalid(s, i)
-        AnnotatedChar(s.string[i], map(last, annotations(s, i)))
+        AnnotatedChar(s.string[i], Pair{Symbol, Any}[last(x) for x in annotations(s, i)])
     else
         string_index_err(s, i)
     end
 end
 
+# To make `AnnotatedString`s repr-evaluable, we need to override
+# the generic `AbstractString` 2-arg show method.
+
+function show(io::IO, s::A) where {A <: AnnotatedString}
+    show(io, A)
+    print(io, '(')
+    show(io, s.string)
+    print(io, ", ")
+    show(IOContext(io, :typeinfo => typeof(annotations(s))), annotations(s))
+    print(io, ')')
+end
+
+# But still use the generic `AbstractString` fallback for the 3-arg show.
+show(io::IO, ::MIME"text/plain", s::AnnotatedString) =
+    invoke(show, Tuple{IO, AbstractString}, io, s)
+
 ## AbstractChar interface ##
 
 ncodeunits(c::AnnotatedChar) = ncodeunits(c.char)
@@ -176,6 +203,24 @@ cmp(a::AnnotatedString, b::AnnotatedString) = cmp(a.string, b.string)
 ==(a::AnnotatedString, b::AbstractString) = isempty(a.annotations) && a.string == b
 ==(a::AbstractString, b::AnnotatedString) = isempty(b.annotations) && a == b.string
 
+# To prevent substring equality from hitting the generic fallback
+
+function ==(a::SubString{<:AnnotatedString}, b::SubString{<:AnnotatedString})
+    SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) ==
+        SubString(b.string.string, b.offset, b.ncodeunits, Val(:noshift)) &&
+        annotations(a) == annotations(b)
+end
+
+==(a::SubString{<:AnnotatedString}, b::AnnotatedString) =
+    annotations(a) == annotations(b) && SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) == b.string
+
+==(a::SubString{<:AnnotatedString}, b::AbstractString) =
+    isempty(annotations(a)) && SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) == b
+
+==(a::AbstractString, b::SubString{<:AnnotatedString}) = b == a
+
+==(a::AnnotatedString, b::SubString{<:AnnotatedString}) = b == a
+
 """
     annotatedstring(values...)
 
@@ -239,36 +284,6 @@ annotatedstring(c::AnnotatedChar) =
 
 AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s)
 
-"""
-    annotatedstring_optimize!(str::AnnotatedString)
-
-Merge contiguous identical annotations in `str`.
-"""
-function annotatedstring_optimize!(s::AnnotatedString)
-    last_seen = Dict{Pair{Symbol, Any}, Int}()
-    i = 1
-    while i <= length(s.annotations)
-        region, keyval = s.annotations[i]
-        prev = get(last_seen, keyval, 0)
-        if prev > 0
-            lregion, _ = s.annotations[prev]
-            if last(lregion) + 1 == first(region)
-                s.annotations[prev] =
-                    setindex(s.annotations[prev],
-                             first(lregion):last(region),
-                             1)
-                deleteat!(s.annotations, i)
-            else
-                delete!(last_seen, keyval)
-            end
-        else
-            last_seen[keyval] = i
-            i += 1
-        end
-    end
-    s
-end
-
 function repeat(str::AnnotatedString, r::Integer)
     r == 0 && return one(AnnotatedString)
     r == 1 && return str
@@ -276,19 +291,19 @@ function repeat(str::AnnotatedString, r::Integer)
     annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()
     len = ncodeunits(str)
     fullregion = firstindex(str):lastindex(str)
-    for (region, annot) in str.annotations
-        if region == fullregion
-            push!(annotations, (firstindex(unannot):lastindex(unannot), annot))
+    if allequal(first, str.annotations) && first(first(str.annotations)) == fullregion
+        newfullregion = firstindex(unannot):lastindex(unannot)
+        for (_, annot) in str.annotations
+            push!(annotations, (newfullregion, annot))
         end
-    end
-    for offset in 0:len:(r-1)*len
-        for (region, annot) in str.annotations
-            if region != fullregion
+    else
+        for offset in 0:len:(r-1)*len
+            for (region, annot) in str.annotations
                 push!(annotations, (region .+ offset, annot))
             end
         end
     end
-    AnnotatedString(unannot, annotations) |> annotatedstring_optimize!
+    AnnotatedString(unannot, annotations)
 end
 
 repeat(str::SubString{<:AnnotatedString}, r::Integer) =
@@ -319,14 +334,9 @@ reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s))
 function _annotate!(annlist::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any}))
     label, val = labelval
     if val === nothing
-        indices = searchsorted(annlist, (range,), by=first)
-        labelindex = filter(i -> first(annlist[i][2]) === label, indices)
-        for index in Iterators.reverse(labelindex)
-            deleteat!(annlist, index)
-        end
+        deleteat!(annlist, findall(ann -> ann[1] == range && first(ann[2]) === label, annlist))
     else
-        sortedindex = searchsortedlast(annlist, (range,), by=first) + 1
-        insert!(annlist, sortedindex, (range, Pair{Symbol, Any}(label, val)))
+        push!(annlist, (range, Pair{Symbol, Any}(label, val)))
     end
 end
 
@@ -336,6 +346,9 @@ end
 
 Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`).
 To remove existing `label` annotations, use a value of `nothing`.
+
+The order in which annotations are applied to `str` is semantically meaningful,
+as described in [`AnnotatedString`](@ref).
 """
 annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) =
     (_annotate!(s.annotations, range, labelval); s)
@@ -368,7 +381,10 @@ annotations that overlap with `position` will be returned.
 Annotations are provided together with the regions they apply to, in the form of
 a vector of region–annotation tuples.
 
-See also: `annotate!`.
+In accordance with the semantics documented in [`AnnotatedString`](@ref), the
+order of annotations returned matches the order in which they were applied.
+
+See also: [`annotate!`](@ref).
 """
 annotations(s::AnnotatedString) = s.annotations
 
@@ -399,6 +415,51 @@ Get all annotations of `chr`, in the form of a vector of annotation pairs.
 """
 annotations(c::AnnotatedChar) = c.annotations
 
+## Character transformation helper function, c.f. `unicode.jl`.
+
+"""
+    annotated_chartransform(f::Function, str::AnnotatedString, state=nothing)
+
+Transform every character in `str` with `f`, adjusting annotation regions as
+appropriate. `f` must take one of two forms, either:
+- `f(c::Char) -> Char`, or
+- `f(c::Char, state) -> (Char, state)`.
+
+This works by comparing the number of code units of each character before and
+after transforming with `f`, recording and aggregating any differences, then
+applying them to the annotation regions.
+
+Returns an `AnnotatedString{String}` (regardless of the original underling
+string type of `str`).
+"""
+function annotated_chartransform(f::Function, str::AnnotatedString, state=nothing)
+    outstr = IOBuffer()
+    annots = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
+    bytepos = firstindex(str) - 1
+    offsets = [bytepos => 0]
+    for c in str.string
+        oldnb = ncodeunits(c)
+        bytepos += oldnb
+        if isnothing(state)
+            c = f(c)
+        else
+            c, state = f(c, state)
+        end
+        nb = write(outstr, c)
+        if nb != oldnb
+            push!(offsets, bytepos => last(last(offsets)) + nb - oldnb)
+        end
+    end
+    for annot in str.annotations
+        region, value = annot
+        start, stop = first(region), last(region)
+        start_offset = last(offsets[findlast(<=(start) ∘ first, offsets)::Int])
+        stop_offset  = last(offsets[findlast(<=(stop) ∘ first, offsets)::Int])
+        push!(annots, ((start + start_offset):(stop + stop_offset), value))
+    end
+    AnnotatedString(String(take!(outstr)), annots)
+end
+
 ## AnnotatedIOBuffer
 
 struct AnnotatedIOBuffer <: AbstractPipe
@@ -439,7 +500,8 @@ function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:A
     write(io.io, String(astr))
 end
 
-write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c))
+write(io::AnnotatedIOBuffer, c::AnnotatedChar) =
+    write(io, AnnotatedString(string(c), map(a -> (1:ncodeunits(c), a), annotations(c))))
 write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
 write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
 write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
@@ -456,10 +518,37 @@ function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
     nb
 end
 
+# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers)
+# work as expected.
+function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), s)
+    else
+        invoke(write, Tuple{IO, typeof(s)}, io, s)
+    end::Int
+end
+# Can't be part of the `Union` above because it introduces method ambiguities
+function write(io::AbstractPipe, c::AnnotatedChar)
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), c)
+    else
+        invoke(write, Tuple{IO, typeof(c)}, io, c)
+    end::Int
+end
+
+"""
+    _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int})
+
+Erase the presence of `annotations` within a certain `span`.
+
+This operates by removing all elements of `annotations` that are entirely
+contained in `span`, truncating ranges that partially overlap, and splitting
+annotations that subsume `span` to just exist either side of `span`.
+"""
 function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int})
     # Clear out any overlapping pre-existing annotations.
     filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations)
-    extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
+    extras = Tuple{Int, Tuple{UnitRange{Int}, Pair{Symbol, Any}}}[]
     for i in eachindex(annotations)
         region, annot = annotations[i]
         # Test for partial overlap
@@ -470,31 +559,69 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int},
             # If `span` fits exactly within `region`, then we've only copied over
             # the beginning overhang, but also need to conserve the end overhang.
             if first(region) < first(span) && last(span) < last(region)
-                push!(extras, (last(span)+1:last(region), annot))
+                push!(extras, (i, (last(span)+1:last(region), annot)))
             end
         end
-        # Insert any extra entries in the appropriate position
-        for entry in extras
-            sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1
-            insert!(annotations, sortedindex, entry)
-        end
+    end
+    # Insert any extra entries in the appropriate position
+    for (offset, (i, entry)) in enumerate(extras)
+        insert!(annotations, i + offset, entry)
     end
     annotations
 end
 
+"""
+    _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
+
+Register new `annotations` in `io`, applying an `offset` to their regions.
+
+The largely consists of simply shifting the regions of `annotations` by `offset`
+and pushing them onto `io`'s annotations. However, when it is possible to merge
+the new annotations with recent annotations in accordance with the semantics
+outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there
+is a run of the most recent annotations that are also present as the first
+`annotations`, with the same value and adjacent regions, the new annotations are
+merged into the existing recent annotations by simply extending their range.
+
+This is implemented so that one can say write an `AnnotatedString` to an
+`AnnotatedIOBuffer` one character at a time without needlessly producing a
+new annotation for each character.
+"""
 function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
-    if !eof(io)
-        for (region, annot) in annotations
-            region = first(region)+offset:last(region)+offset
-            sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1
-            insert!(io.annotations, sortedindex, (region, annot))
-        end
-    else
-        for (region, annot) in annotations
-            region = first(region)+offset:last(region)+offset
-            push!(io.annotations, (region, annot))
+    run = 0
+    if !isempty(io.annotations) && last(first(last(io.annotations))) == offset
+        for i in reverse(axes(annotations, 1))
+            annot = annotations[i]
+            first(first(annot)) == 1 || continue
+            i <= length(io.annotations) || continue
+            if last(annot) == last(last(io.annotations))
+                valid_run = true
+                for runlen in 1:i
+                    new_range, new_annot = annotations[begin+runlen-1]
+                    old_range, old_annot = io.annotations[end-i+runlen]
+                    if last(old_range) != offset || first(new_range) != 1 || old_annot != new_annot
+                        valid_run = false
+                        break
+                    end
+                end
+                if valid_run
+                    run = i
+                    break
+                end
+            end
         end
     end
+    for runindex in 0:run-1
+        old_index = lastindex(io.annotations) - run + 1 + runindex
+        old_region, annot = io.annotations[old_index]
+        new_region, _ = annotations[begin+runindex]
+        io.annotations[old_index] = (first(old_region):last(new_region)+offset, annot)
+    end
+    for index in run+1:lastindex(annotations)
+        region, annot = annotations[index]
+        start, stop = first(region), last(region)
+        push!(io.annotations, (start+offset:stop+offset, annot))
+    end
 end
 
 function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
@@ -512,7 +639,7 @@ read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{
 function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
     pos = position(io)
     char = read(io.io, T)
-    annots = [annot for (range, annot) in io.annotations if pos+1 in range]
+    annots = Pair{Symbol, Any}[annot for (range, annot) in io.annotations if pos+1 in range]
     AnnotatedChar(char, annots)
 end
 read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 2d5f0cea26e36..bf11199143c1e 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -146,9 +146,8 @@ Stacktrace:
 
 Return a tuple of the character in `s` at index `i` with the index of the start
 of the following character in `s`. This is the key method that allows strings to
-be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
-then a bounds error is raised. The `iterate` function, as part of the iteration
-protocol may assume that `i` is the start of a character in `s`.
+be iterated, yielding a sequences of characters. The `iterate` function, as part
+of the iteration protocol may assume that `i` is the start of a character in `s`.
 
 See also [`getindex`](@ref), [`checkbounds`](@ref).
 """
diff --git a/base/strings/io.jl b/base/strings/io.jl
index 94b5a1e4e4614..754e058cd2f54 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -10,10 +10,10 @@ if `io` is not given) a canonical (un-decorated) text representation.
 The representation used by `print` includes minimal formatting and tries to
 avoid Julia-specific details.
 
-`print` falls back to calling `show`, so most types should just define
-`show`. Define `print` if your type has a separate "plain" representation.
-For example, `show` displays strings with quotes, and `print` displays strings
-without quotes.
+`print` falls back to calling the 2-argument `show(io, x)` for each argument `x` in `xs`,
+so most types should just define `show`. Define `print` if your type has a separate
+"plain" representation.  For example, `show` displays strings with quotes, and `print`
+displays strings without quotes.
 
 See also [`println`](@ref), [`string`](@ref), [`printstyled`](@ref).
 
@@ -246,14 +246,14 @@ end
 
 # optimized methods to avoid iterating over chars
 write(io::IO, s::Union{String,SubString{String}}) =
-    GC.@preserve s Int(unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))))::Int
+    GC.@preserve s (unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))) % Int)::Int
 print(io::IO, s::Union{String,SubString{String}}) = (write(io, s); nothing)
 
 """
     repr(x; context=nothing)
 
-Create a string from any value using the [`show`](@ref) function.
-You should not add methods to `repr`; define a `show` method instead.
+Create a string from any value using the 2-argument `show(io, x)` function.
+You should not add methods to `repr`; define a [`show`](@ref) method instead.
 
 The optional keyword argument `context` can be set to a `:key=>value` pair, a
 tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
@@ -262,7 +262,7 @@ attributes are used for the I/O stream passed to `show`.
 Note that `repr(x)` is usually similar to how the value of `x` would
 be entered in Julia.  See also [`repr(MIME("text/plain"), x)`](@ref) to instead
 return a "pretty-printed" version of `x` designed more for human consumption,
-equivalent to the REPL display of `x`.
+equivalent to the REPL display of `x`, using the 3-argument `show(io, mime, x)`.
 
 !!! compat "Julia 1.7"
     Passing a tuple to keyword `context` requires Julia 1.7 or later.
@@ -354,12 +354,22 @@ function join(io::IO, iterator, delim="")
 end
 
 function _join_preserve_annotations(iterator, args...)
-    if _isannotated(eltype(iterator)) || any(_isannotated, args)
+    et = @default_eltype(iterator)
+    if isconcretetype(et) && !_isannotated(et) && !any(_isannotated, args)
+        sprint(join, iterator, args...)
+    else
         io = AnnotatedIOBuffer()
         join(io, iterator, args...)
-        read(seekstart(io), AnnotatedString{String})
-    else
-        sprint(join, iterator, args...)
+        # If we know (from compile time information, or dynamically in the case
+        # of iterators with a non-concrete eltype), that the result is annotated
+        # in nature, we extract an `AnnotatedString`, otherwise we just extract
+        # a plain `String` from `io`.
+        if isconcretetype(et) || !isempty(io.annotations)
+            seekstart(io)
+            read(io, AnnotatedString{String})
+        else
+            String(take!(io.io))
+        end
     end
 end
 
@@ -374,8 +384,8 @@ escape_nul(c::Union{Nothing, AbstractChar}) =
     (c !== nothing && '0' <= c <= '7') ? "\\x00" : "\\0"
 
 """
-    escape_string(str::AbstractString[, esc]; keep = ())::AbstractString
-    escape_string(io, str::AbstractString[, esc]; keep = ())::Nothing
+    escape_string(str::AbstractString[, esc]; keep=(), ascii=false, fullhex=false)::AbstractString
+    escape_string(io, str::AbstractString[, esc]; keep=())::Nothing
 
 General escaping of traditional C and Unicode escape sequences. The first form returns the
 escaped string, the second prints the result to `io`.
@@ -390,11 +400,23 @@ escaped by a prepending backslash (`\"` is also escaped by default in the first
 The argument `keep` specifies a collection of characters which are to be kept as
 they are. Notice that `esc` has precedence here.
 
+The argument `ascii` can be set to `true` to escape all non-ASCII characters,
+whereas the default `ascii=false` outputs printable Unicode characters as-is.
+(`keep` takes precedence over `ascii`.)
+
+The argument `fullhex` can be set to `true` to require all `\\u` escapes to be
+printed with 4 hex digits, and `\\U` escapes to be printed with 8 hex digits,
+whereas by default (`fullhex=false`) they are printed with fewer digits if
+possible (omitting leading zeros).
+
 See also [`unescape_string`](@ref) for the reverse operation.
 
 !!! compat "Julia 1.7"
     The `keep` argument is available as of Julia 1.7.
 
+!!! compat "Julia 1.12"
+    The `ascii` and `fullhex` arguments require Julia 1.12.
+
 # Examples
 ```jldoctest
 julia> escape_string("aaa\\nbbb")
@@ -413,7 +435,7 @@ julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
 "ℵ\\\\x000"
 ```
 """
-function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
+function escape_string(io::IO, s::AbstractString, esc=""; keep = (), ascii::Bool=false, fullhex::Bool=false)
     a = Iterators.Stateful(s)
     for c::AbstractChar in a
         if c in esc
@@ -428,10 +450,10 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
             isprint(c)         ? print(io, c) :
                                  print(io, "\\x", string(UInt32(c), base = 16, pad = 2))
         elseif !isoverlong(c) && !ismalformed(c)
-            isprint(c)         ? print(io, c) :
-            c <= '\x7f'        ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
-            c <= '\uffff'      ? print(io, "\\u", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
-                                 print(io, "\\U", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
+            !ascii && isprint(c) ? print(io, c) :
+            c <= '\x7f'          ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
+            c <= '\uffff'        ? print(io, "\\u", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
+                                   print(io, "\\U", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
         else # malformed or overlong
             u = bswap(reinterpret(UInt32, c)::UInt32)
             while true
@@ -442,8 +464,8 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
     end
 end
 
-escape_string(s::AbstractString, esc=('\"',); keep = ()) =
-    sprint((io)->escape_string(io, s, esc; keep = keep), sizehint=lastindex(s))
+escape_string(s::AbstractString, esc=('\"',); keep = (), ascii::Bool=false, fullhex::Bool=false) =
+    sprint((io)->escape_string(io, s, esc; keep, ascii, fullhex), sizehint=lastindex(s))
 
 function print_quoted(io, s::AbstractString)
     print(io, '"')
@@ -615,7 +637,7 @@ string literals. (It also happens to be the escaping convention
 expected by the Microsoft C/C++ compiler runtime when it parses a
 command-line string into the argv[] array.)
 
-See also [`escape_string`](@ref).
+See also [`Base.escape_string()`](@ref).
 """
 function escape_raw_string(io::IO, str::AbstractString, delim::Char='"')
     total = 0
diff --git a/base/strings/search.jl b/base/strings/search.jl
index e2b3dc96b98cf..a481b3af775e0 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -10,7 +10,39 @@ match strings with [`match`](@ref).
 """
 abstract type AbstractPattern end
 
-nothing_sentinel(i) = i == 0 ? nothing : i
+# TODO: These unions represent bytes in memory that can be accessed via a pointer.
+# this property is used throughout Julia, e.g. also in IO code.
+# This deserves a better solution - see #53178.
+# If such a better solution comes in place, these unions should be replaced.
+const DenseInt8 = Union{
+    DenseArray{Int8},
+    FastContiguousSubArray{Int8,N,<:DenseArray} where N
+}
+
+# Note: This union is different from that above in that it includes CodeUnits.
+# Currently, this is redundant as CodeUnits <: DenseVector, but this subtyping
+# is buggy and may be removed in the future, see #54002
+const DenseUInt8 = Union{
+    DenseArray{UInt8},
+    FastContiguousSubArray{UInt8,N,<:DenseArray} where N,
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+    FastContiguousSubArray{UInt8,N,<:CodeUnits{UInt8, <:Union{String, SubString{String}}}} where N,
+}
+
+const DenseUInt8OrInt8 = Union{DenseUInt8, DenseInt8}
+
+last_byteindex(x::Union{String, SubString{String}}) = ncodeunits(x)
+last_byteindex(x::DenseUInt8OrInt8) = lastindex(x)
+
+function last_utf8_byte(c::Char)
+    u = reinterpret(UInt32, c)
+    shift = ((4 - ncodeunits(c)) * 8) & 31
+    (u >> shift) % UInt8
+end
+
+# Whether the given byte is guaranteed to be the only byte in a Char
+# This holds even in the presence of invalid UTF8
+is_standalone_byte(x::UInt8) = (x < 0x80) | (x > 0xf7)
 
 function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
                   s::Union{String, SubString{String}}, i::Integer)
@@ -20,38 +52,58 @@ function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar}
     end
     @inbounds isvalid(s, i) || string_index_err(s, i)
     c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_search(s, c % UInt8, i))
+    c ≤ '\x7f' && return _search(s, first_utf8_byte(c), i)
     while true
         i = _search(s, first_utf8_byte(c), i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
+        i === nothing && return nothing
+        isvalid(s, i) && pred(s[i]) && return i
         i = nextind(s, i)
     end
 end
 
-findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_search(a, pred.x))
+# Note: Currently, CodeUnits <: DenseVector, which makes this union redundant w.r.t
+# DenseArrayType{UInt8}, but this is a bug, and may be removed in future versions
+# of Julia. See #54002
+const DenseBytes = Union{
+    <:DenseArrayType{UInt8},
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+}
+
+function findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{UInt8, Int8}}, a::Union{DenseInt8, DenseUInt8})
+    findnext(pred, a, firstindex(a))
+end
 
-findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_search(a, pred.x, i))
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    _search(a, pred.x, i)
+end
 
-findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
-findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    _search(a, pred.x, i)
+end
 
-function _search(a::Union{String,SubString{String},ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
-    if i < 1
+# iszero is special, in that the bitpattern for zero for Int8 and UInt8 is the same,
+# so we can use memchr even if we search for an Int8 in an UInt8 array or vice versa
+findfirst(::typeof(iszero), a::DenseUInt8OrInt8) = _search(a, zero(UInt8))
+findnext(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _search(a, zero(UInt8), i)
+
+function _search(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = firstindex(a))
+    fst = firstindex(a)
+    lst = last_byteindex(a)
+    if i < fst
         throw(BoundsError(a, i))
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    n_bytes = lst - i + 1
+    if i > lst
+        return i == lst+1 ? nothing : throw(BoundsError(a, i))
+    end
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-fst, b, n_bytes)
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    return q == C_NULL ? nothing : (q-p+fst) % Int
 end
 
-function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
+function _search(a::DenseUInt8, b::AbstractChar, i::Integer = firstindex(a))
     if isascii(b)
         _search(a,UInt8(b),i)
     else
@@ -60,41 +112,51 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
 end
 
 function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
+                  s::Union{String, SubString{String}}, i::Integer)
     c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_rsearch(s, c % UInt8, i))
+    c ≤ '\x7f' && return _rsearch(s, first_utf8_byte(c), i)
     b = first_utf8_byte(c)
     while true
         i = _rsearch(s, b, i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
+        i == nothing && return nothing
+        isvalid(s, i) && pred(s[i]) && return i
         i = prevind(s, i)
     end
 end
 
-findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_rsearch(a, pred.x))
+function findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::DenseUInt8OrInt8)
+    findprev(pred, a, lastindex(a))
+end
 
-findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_rsearch(a, pred.x, i))
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    _rsearch(a, pred.x, i)
+end
 
-findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8)))
-findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i))
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    _rsearch(a, pred.x, i)
+end
+
+# See comments above for findfirst(::typeof(iszero)) methods
+findlast(::typeof(iszero), a::DenseUInt8OrInt8) = _rsearch(a, zero(UInt8))
+findprev(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _rsearch(a, zero(UInt8), i)
 
-function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a))
-    if i < 1
-        return i == 0 ? 0 : throw(BoundsError(a, i))
+function _rsearch(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = last_byteindex(a))
+    fst = firstindex(a)
+    lst = last_byteindex(a)
+    if i < fst
+        return i == fst - 1 ? nothing : throw(BoundsError(a, i))
+    end
+    if i > lst
+        return i == lst+1 ? nothing : throw(BoundsError(a, i))
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i-fst+1)
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    return q == C_NULL ? nothing : (q-p+fst) % Int
 end
 
-function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
+function _rsearch(a::DenseUInt8, b::AbstractChar, i::Integer = length(a))
     if isascii(b)
         _rsearch(a,UInt8(b),i)
     else
@@ -102,6 +164,35 @@ function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
     end
 end
 
+function findall(
+    pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
+    s::Union{String, SubString{String}}
+)
+    c = Char(pred.x)::Char
+    byte = last_utf8_byte(c)
+    ncu = ncodeunits(c)
+
+    # If only one byte, and can't be part of another Char: Forward to memchr.
+    is_standalone_byte(byte) && return findall(==(byte), codeunits(s))
+    result = Int[]
+    i = firstindex(s)
+    while true
+        i = _search(s, byte, i)
+        isnothing(i) && return result
+        i += 1
+        index = i - ncu
+        # If the char is invalid, it's possible that its first byte is
+        # inside another char. If so, indexing into the string will throw an
+        # error, so we need to check for valid indices.
+        isvalid(s, index) || continue
+        # We use iterate here instead of indexing, because indexing wastefully
+        # checks for valid index. It would be better if there was something like
+        # try_getindex(::String, ::Int) we could use.
+        char = first(something(iterate(s, index)))
+        pred(char) && push!(result, index)
+    end
+end
+
 """
     findfirst(pattern::AbstractString, string::AbstractString)
     findfirst(pattern::AbstractPattern, string::String)
@@ -175,18 +266,19 @@ end
 
 in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing)
 
-function _searchindex(s::Union{AbstractString,ByteArray},
+function _searchindex(s::Union{AbstractString,DenseUInt8OrInt8},
                       t::Union{AbstractString,AbstractChar,Int8,UInt8},
                       i::Integer)
+    sentinel = firstindex(s) - 1
     x = Iterators.peel(t)
     if isnothing(x)
-        return 1 <= i <= nextind(s,lastindex(s))::Int ? i :
+        return firstindex(s) <= i <= nextind(s,lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
     t1, trest = x
     while true
         i = findnext(isequal(t1),s,i)
-        if i === nothing return 0 end
+        if i === nothing return sentinel end
         ii = nextind(s, i)::Int
         a = Iterators.Stateful(trest)
         matched = all(splat(==), zip(SubString(s, ii), a))
@@ -460,9 +552,8 @@ julia> findall(UInt8[1,2], UInt8[1,2,3,1,2])
 !!! compat "Julia 1.3"
      This method requires at least Julia 1.3.
 """
-
-function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
-                 s::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
+function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
+                 s::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
                  ; overlap::Bool=false)
     found = UnitRange{Int}[]
     i, e = firstindex(s), lastindex(s)
@@ -515,7 +606,7 @@ function _rsearchindex(s::AbstractString,
     end
 end
 
-function _rsearchindex(s::String, t::String, i::Integer)
+function _rsearchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     if lastindex(t) == 1
         return something(findprev(isequal(t[1]), s, i), 0)
diff --git a/base/strings/string.jl b/base/strings/string.jl
index b2afce897a937..90d6e5b26ccd3 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -27,8 +27,6 @@ function Base.showerror(io::IO, exc::StringIndexError)
     end
 end
 
-const ByteArray = Union{CodeUnits{UInt8,String}, Vector{UInt8},Vector{Int8}, FastContiguousSubArray{UInt8,1,CodeUnits{UInt8,String}}, FastContiguousSubArray{UInt8,1,Vector{UInt8}}, FastContiguousSubArray{Int8,1,Vector{Int8}}}
-
 @inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
 
 """
@@ -81,7 +79,7 @@ function String(v::Vector{UInt8})
     end
     # optimized empty!(v); sizehint!(v, 0) calls
     setfield!(v, :size, (0,))
-    setfield!(v, :ref, MemoryRef(Memory{UInt8}()))
+    setfield!(v, :ref, memoryref(Memory{UInt8}()))
     return str
 end
 
@@ -104,9 +102,11 @@ function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
     ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
 end
 
-# This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
+# This is `@assume_effects :total !:consistent @ccall jl_alloc_string(n::Csize_t)::Ref{String}`,
 # but the macro is not available at this time in bootstrap, so we write it manually.
-@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0x000e)), :(convert(Csize_t, n))))
+const _string_n_override = 0x04ee
+@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String},
+    :(Core.svec(Csize_t)), 1, QuoteNode((:ccall, _string_n_override)), :(convert(Csize_t, n))))
 
 """
     String(s::AbstractString)
@@ -208,7 +208,7 @@ end
             i = i′
             @inbounds l = codeunit(s, i)
             (l < 0x80) | (0xf8 ≤ l) && return i+1
-            @assert l >= 0xc0
+            @assert l >= 0xc0 "invalid codeunit"
         end
         # first continuation byte
         (i += 1) > n && return i
@@ -431,7 +431,7 @@ is_valid_continuation(c) = c & 0xc0 == 0x80
     b = @inbounds codeunit(s, i)
     u = UInt32(b) << 24
     between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
-    return iterate_continued(s, i, u)
+    return @noinline iterate_continued(s, i, u)
 end
 
 # duck-type s so that external UTF-8 string packages like StringViews can hook in
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 2e04633b87487..ad047514c85a6 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -4,7 +4,9 @@
 module Unicode
 
 import Base: show, ==, hash, string, Symbol, isless, length, eltype,
-             convert, isvalid, ismalformed, isoverlong, iterate
+             convert, isvalid, ismalformed, isoverlong, iterate,
+             AnnotatedString, AnnotatedChar, annotated_chartransform,
+             @assume_effects, annotations
 
 # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
 
@@ -155,15 +157,15 @@ function utf8proc_decompose(str, options, buffer, nwords, chartransform::typeof(
     ret < 0 && utf8proc_error(ret)
     return ret
 end
-function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T
-    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}),
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::F) where F
+    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{F}),
                 str, sizeof(str), buffer, nwords, options,
-                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform)
+                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{F})), chartransform)
     ret < 0 && utf8proc_error(ret)
     return ret
 end
 
-function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity)
+function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F
     nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
     buffer = Base.StringVector(nwords*4)
     nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
@@ -182,7 +184,7 @@ const _julia_charmap = Dict{UInt32,UInt32}(
     0x210F => 0x0127,
 )
 
-utf8proc_map(s::AbstractString, flags::Integer, chartransform=identity) = utf8proc_map(String(s), flags, chartransform)
+utf8proc_map(s::AbstractString, flags::Integer, chartransform::F = identity) where F = utf8proc_map(String(s), flags, chartransform)
 
 # Documented in Unicode module
 function normalize(
@@ -254,6 +256,15 @@ julia> textwidth('⛵')
 ```
 """
 function textwidth(c::AbstractChar)
+    ismalformed(c) && return 1
+    i = codepoint(c)
+    i < 0x7f && return Int(i >= 0x20) # ASCII fast path
+    Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+end
+
+function textwidth(c::Char)
+    b = bswap(reinterpret(UInt32, c)) # from isascii(c)
+    b < 0x7f && return Int(b >= 0x20) # ASCII fast path
     ismalformed(c) && return 1
     Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
 end
@@ -271,6 +282,8 @@ julia> textwidth("March")
 """
 textwidth(s::AbstractString) = mapreduce(textwidth, +, s; init=0)
 
+textwidth(s::AnnotatedString) = textwidth(s.string)
+
 """
     lowercase(c::AbstractChar)
 
@@ -290,6 +303,8 @@ julia> lowercase('Ö')
 lowercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('A' <= c <= 'Z' ? c + 0x20 : c) :
     T(ccall(:utf8proc_tolower, UInt32, (UInt32,), c))
 
+lowercase(c::AnnotatedChar) = AnnotatedChar(lowercase(c.char), annotations(c))
+
 """
     uppercase(c::AbstractChar)
 
@@ -309,6 +324,8 @@ julia> uppercase('ê')
 uppercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_toupper, UInt32, (UInt32,), c))
 
+uppercase(c::AnnotatedChar) = AnnotatedChar(uppercase(c.char), annotations(c))
+
 """
     titlecase(c::AbstractChar)
 
@@ -332,6 +349,8 @@ julia> uppercase('ǆ')
 titlecase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_totitle, UInt32, (UInt32,), c))
 
+titlecase(c::AnnotatedChar) = AnnotatedChar(titlecase(c.char), annotations(c))
+
 ############################################################################
 
 # returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category
@@ -340,7 +359,7 @@ function category_code(c::AbstractChar)
 end
 
 function category_code(x::Integer)
-    x ≤ 0x10ffff ? ccall(:utf8proc_category, Cint, (UInt32,), x) : Cint(30)
+    x ≤ 0x10ffff ? (@assume_effects :foldable @ccall utf8proc_category(UInt32(x)::UInt32)::Cint) : Cint(30)
 end
 
 # more human-readable representations of the category code
@@ -376,7 +395,8 @@ julia> islowercase('❤')
 false
 ```
 """
-islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))
+islowercase(c::AbstractChar) = ismalformed(c) ? false :
+    Bool(@assume_effects :foldable @ccall utf8proc_islower(UInt32(c)::UInt32)::Cint)
 
 # true for Unicode upper and mixed case
 
@@ -400,7 +420,8 @@ julia> isuppercase('❤')
 false
 ```
 """
-isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))
+isuppercase(c::AbstractChar) = ismalformed(c) ? false :
+    Bool(@assume_effects :foldable @ccall utf8proc_isupper(UInt32(c)::UInt32)::Cint)
 
 """
     iscased(c::AbstractChar) -> Bool
@@ -420,7 +441,7 @@ end
 """
     isdigit(c::AbstractChar) -> Bool
 
-Tests whether a character is a decimal digit (0-9).
+Tests whether a character is an ASCII decimal digit (`0`-`9`).
 
 See also: [`isletter`](@ref).
 
@@ -606,6 +627,7 @@ julia> uppercase("Julia")
 ```
 """
 uppercase(s::AbstractString) = map(uppercase, s)
+uppercase(s::AnnotatedString) = annotated_chartransform(uppercase, s)
 
 """
     lowercase(s::AbstractString)
@@ -621,6 +643,7 @@ julia> lowercase("STRINGS AND THINGS")
 ```
 """
 lowercase(s::AbstractString) = map(lowercase, s)
+lowercase(s::AnnotatedString) = annotated_chartransform(lowercase, s)
 
 """
     titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
@@ -669,6 +692,23 @@ function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Boo
     return String(take!(b))
 end
 
+# TODO: improve performance characteristics, room for a ~10x improvement.
+function titlecase(s::AnnotatedString; wordsep::Function = !isletter, strict::Bool=true)
+    initial_state = (; startword = true, state = Ref{Int32}(0),
+             c0 = eltype(s)(zero(UInt32)), wordsep, strict)
+    annotated_chartransform(s, initial_state) do c, state
+        if isgraphemebreak!(state.state, state.c0, c) && state.wordsep(c)
+            state = Base.setindex(state, true, :startword)
+            cnew = c
+        else
+            cnew = state.startword ? titlecase(c) : state.strict ? lowercase(c) : c
+            state = Base.setindex(state, false, :startword)
+        end
+        state = Base.setindex(state, c, :c0)
+        cnew, state
+    end
+end
+
 """
     uppercasefirst(s::AbstractString) -> String
 
@@ -693,6 +733,17 @@ function uppercasefirst(s::AbstractString)
     string(c′, SubString(s, nextind(s, 1)))
 end
 
+# TODO: improve performance characteristics, room for a ~5x improvement.
+function uppercasefirst(s::AnnotatedString)
+    annotated_chartransform(s, true) do c, state
+        if state
+            (titlecase(c), false)
+        else
+            (c, state)
+        end
+    end
+end
+
 """
     lowercasefirst(s::AbstractString)
 
@@ -715,6 +766,17 @@ function lowercasefirst(s::AbstractString)
     string(c′, SubString(s, nextind(s, 1)))
 end
 
+# TODO: improve performance characteristics, room for a ~5x improvement.
+function lowercasefirst(s::AnnotatedString)
+    annotated_chartransform(s, true) do c, state
+        if state
+            (lowercase(c), false)
+        else
+            (c, state)
+        end
+    end
+end
+
 ############################################################################
 # iterators for grapheme segmentation
 
diff --git a/base/strings/util.jl b/base/strings/util.jl
index 219b329d7924d..0ba76e1c76fa0 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -513,6 +513,154 @@ function rpad(
     r == 0 ? stringfn(s, p^q) : stringfn(s, p^q, first(p, r))
 end
 
+"""
+    rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the last characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = rtruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 I lo…"
+
+julia> textwidth(s)
+10
+
+julia> rtruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:right))
+    if isnothing(ret)
+        return string(str)
+    else
+        left, _ = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement
+    end
+end
+
+"""
+    ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the first characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = ltruncate("🍕🍕 I love 🍕", 10)
+"…I love 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ltruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`rtruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:left))
+    if isnothing(ret)
+        return string(str)
+    else
+        _, right = ret::Tuple{Int,Int}
+        @views return replacement * str[right:end]
+    end
+end
+
+"""
+    ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the middle characters
+with `replacement` if necessary. The default replacement string is "…". By default, the truncation
+prefers keeping chars on the left, but this can be changed by setting `prefer_left` to `false`.
+
+# Examples
+```jldoctest
+julia> s = ctruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 …e 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ctruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`rtruncate`](@ref).
+"""
+function ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:center), prefer_left)
+    if isnothing(ret)
+        return string(str)
+    else
+        left, right = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement * str[right:end]
+    end
+end
+
+function string_truncate_boundaries(
+            str::AbstractString,
+            maxwidth::Integer,
+            replacement::Union{AbstractString,AbstractChar},
+            ::Val{mode},
+            prefer_left::Bool = true) where {mode}
+
+    maxwidth >= 0 || throw(ArgumentError("maxwidth $maxwidth should be non-negative"))
+
+    # check efficiently for early return if str is less wide than maxwidth
+    total_width = 0
+    for c in str
+        total_width += textwidth(c)
+        total_width > maxwidth && break
+    end
+    total_width <= maxwidth && return nothing
+
+    l0, _ = left, right = firstindex(str), lastindex(str)
+    width = textwidth(replacement)
+    # used to balance the truncated width on either side
+    rm_width_left, rm_width_right, force_other = 0, 0, false
+    @inbounds while true
+        if mode === :left || (mode === :center && (!prefer_left || left > l0))
+            rm_width = textwidth(str[right])
+            if mode === :left || (rm_width_right <= rm_width_left || force_other)
+                force_other = false
+                (width += rm_width) <= maxwidth || break
+                rm_width_right += rm_width
+                right = prevind(str, right)
+            else
+                force_other = true
+            end
+        end
+        if mode ∈ (:right, :center)
+            rm_width = textwidth(str[left])
+            if mode === :left || (rm_width_left <= rm_width_right || force_other)
+                force_other = false
+                (width += textwidth(str[left])) <= maxwidth || break
+                rm_width_left += rm_width
+                left = nextind(str, left)
+            else
+                force_other = true
+            end
+        end
+    end
+    return prevind(str, left), nextind(str, right)
+end
+
 """
     eachsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     eachsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -1061,7 +1209,7 @@ function bytes2hex end
 
 function bytes2hex(itr)
     eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
-    b = Base.StringVector(2*length(itr))
+    b = Base.StringMemory(2*length(itr))
     @inbounds for (i, x) in enumerate(itr)
         b[2i - 1] = hex_chars[1 + x >> 4]
         b[2i    ] = hex_chars[1 + x & 0xf]
diff --git a/base/subarray.jl b/base/subarray.jl
index 2b8545c2cc226..47b4fa0584dba 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -115,11 +115,14 @@ function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Varar
     trimmedpind = _trimmedpind(V.indices...)
     vdest = trimmedpind isa Tuple{Vararg{Union{Slice,Colon}}} ? dest : view(dest, trimmedpind...)
     copyto!(vdest, view(V, _trimmedvind(V.indices...)...))
-    SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD))
+    indices = map(_trimmedindex, V.indices)
+    stride1 = LD ? compute_stride1(dest, indices) : 0
+    offset1 = LD ? compute_offset1(dest, stride1, indices) : 0
+    SubArray{T,N,A,I,LD}(dest, indices, offset1, stride1)
 end
 # Get the proper trimmed shape
 _trimmedshape(::ScalarIndex, rest...) = (1, _trimmedshape(rest...)...)
-_trimmedshape(i::AbstractRange, rest...) = (maximum(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractRange, rest...) = (isempty(i) ? zero(eltype(i)) : maximum(i), _trimmedshape(rest...)...)
 _trimmedshape(i::Union{UnitRange,StepRange,OneTo}, rest...) = (length(i), _trimmedshape(rest...)...)
 _trimmedshape(i::AbstractArray{<:ScalarIndex}, rest...) = (length(i), _trimmedshape(rest...)...)
 _trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedshape(rest...)
@@ -214,10 +217,6 @@ function view(A::AbstractArray, I::Vararg{Any,M}) where {M}
 end
 
 # Ranges implement getindex to return recomputed ranges; use that for views, too (when possible)
-function view(r1::OneTo, r2::OneTo)
-    @_propagate_inbounds_meta
-    getindex(r1, r2)
-end
 function view(r1::AbstractUnitRange, r2::AbstractUnitRange{<:Integer})
     @_propagate_inbounds_meta
     getindex(r1, r2)
@@ -349,11 +348,29 @@ FastContiguousSubArray{T,N,P,I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Var
 @inline _reindexlinear(V::FastContiguousSubArray, i::Int) = V.offset1 + i
 @inline _reindexlinear(V::FastContiguousSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ i
 
+"""
+An internal type representing arrays stored contiguously in memory.
+"""
+const DenseArrayType{T,N} = Union{
+    DenseArray{T,N},
+    <:FastContiguousSubArray{T,N,<:DenseArray},
+}
+
+"""
+An internal type representing mutable arrays stored contiguously in memory.
+"""
+const MutableDenseArrayType{T,N} = Union{
+    Array{T, N},
+    Memory{T},
+    FastContiguousSubArray{T,N,<:Array},
+    FastContiguousSubArray{T,N,<:Memory}
+}
+
 # parents of FastContiguousSubArrays may support fast indexing with AbstractUnitRanges,
 # so we may just forward the indexing to the parent
 # This may only be done for non-offset ranges, as the result would otherwise have offset axes
-const OneBasedRanges = Union{OneTo{Int}, UnitRange{Int}, Slice{OneTo{Int}}, IdentityUnitRange{OneTo{Int}}}
-function getindex(V::FastContiguousSubArray, i::OneBasedRanges)
+const _OneBasedRanges = Union{OneTo{Int}, UnitRange{Int}, Slice{OneTo{Int}}, IdentityUnitRange{OneTo{Int}}}
+function getindex(V::FastContiguousSubArray, i::_OneBasedRanges)
     @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[_reindexlinear(V, i)]
@@ -410,27 +427,7 @@ function isassigned(V::FastSubArray{<:Any, 1}, i::Int)
     r
 end
 
-function _unsetindex!(V::FastSubArray, i::Int)
-    @inline
-    @boundscheck checkbounds(V, i)
-    @inbounds _unsetindex!(V.parent, _reindexlinear(V, i))
-    return V
-end
-function _unsetindex!(V::FastSubArray{<:Any,1}, i::Int)
-    @inline
-    @boundscheck checkbounds(V, i)
-    @inbounds _unsetindex!(V.parent, _reindexlinear(V, i))
-    return V
-end
-function _unsetindex!(V::SubArray{T,N}, i::Vararg{Int,N}) where {T,N}
-    @inline
-    @boundscheck checkbounds(V, i...)
-    @inbounds _unsetindex!(V.parent, reindex(V.indices, i)...)
-    return V
-end
-
 IndexStyle(::Type{<:FastSubArray}) = IndexLinear()
-IndexStyle(::Type{<:SubArray}) = IndexCartesian()
 
 # Strides are the distance in memory between adjacent elements in a given dimension
 # which we determine from the strides of the parent
@@ -440,7 +437,8 @@ substrides(strds::Tuple{}, ::Tuple{}) = ()
 substrides(strds::NTuple{N,Int}, I::Tuple{ScalarIndex, Vararg{Any}}) where N = (substrides(tail(strds), tail(I))...,)
 substrides(strds::NTuple{N,Int}, I::Tuple{Slice, Vararg{Any}}) where N = (first(strds), substrides(tail(strds), tail(I))...)
 substrides(strds::NTuple{N,Int}, I::Tuple{AbstractRange, Vararg{Any}}) where N = (first(strds)*step(I[1]), substrides(tail(strds), tail(I))...)
-substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("strides is invalid for SubArrays with indices of type $(typeof(I[1]))"))
+substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError(
+    LazyString("strides is invalid for SubArrays with indices of type ", typeof(I[1]))))
 
 stride(V::SubArray, d::Integer) = d <= ndims(V) ? strides(V)[d] : strides(V)[end] * size(V)[end]
 
@@ -452,7 +450,7 @@ compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
     (@inline; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
-compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
+compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError(LazyString("invalid strided index type ", typeof(I[1]))))
 
 elsize(::Type{<:SubArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 2505824768099..4f2646c7641b7 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -8,6 +8,9 @@ struct SummarySize
     chargeall::Any
 end
 
+nth_pointer_isdefined(obj, i::Int) = ccall(:jl_nth_pointer_isdefined, Cint, (Any, Csize_t), obj, i-1) != 0
+get_nth_pointer(obj, i::Int) = ccall(:jl_get_nth_pointer, Any, (Any, Csize_t), obj, i-1)
+
 """
     Base.summarysize(obj; exclude=Union{...}, chargeall=Union{...}) -> Int
 
@@ -26,7 +29,7 @@ julia> Base.summarysize(1.0)
 8
 
 julia> Base.summarysize(Ref(rand(100)))
-864
+848
 
 julia> sizeof(Ref(rand(100)))
 8
@@ -50,15 +53,28 @@ function summarysize(obj;
                 val = x[i]
             end
         elseif isa(x, GenericMemory)
-            nf = length(x)
-            if @inbounds @inline isassigned(x, i)
-                val = x[i]
+            T = eltype(x)
+            if Base.allocatedinline(T)
+                np = datatype_npointers(T)
+                nf = length(x) * np
+                idx = (i-1) ÷ np + 1
+                if @inbounds @inline isassigned(x, idx)
+                    elt = x[idx]
+                    p = (i-1) % np + 1
+                    if nth_pointer_isdefined(elt, p)
+                        val = get_nth_pointer(elt, p)
+                    end
+                end
+            else
+                nf = length(x)
+                if @inbounds @inline isassigned(x, i)
+                    val = x[i]
+                end
             end
         else
-            nf = nfields(x)
-            ft = typeof(x).types
-            if !isbitstype(ft[i]) && isdefined(x, i)
-                val = getfield(x, i)
+            nf = datatype_npointers(typeof(x))
+            if nth_pointer_isdefined(x, i)
+                val = get_nth_pointer(x, i)
             end
         end
         if nf > i
@@ -82,7 +98,7 @@ end
     # and so is somewhat approximate.
     key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    if nfields(obj) > 0
+    if datatype_npointers(typeof(obj)) > 0
         push!(ss.frontier_x, obj)
         push!(ss.frontier_i, 1)
     end
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 62241de9ffd4a..966ed76751f28 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -68,22 +68,29 @@ let
 
     # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
     # Run with the `--exclude-jlls` option to filter out all JLL packages
-    stdlibs = [
-        # No dependencies
-        :FileWatching, # used by loading.jl -- implicit assumption that init runs
-        :Libdl, # Transitive through LinAlg
-        :Artifacts, # Transitive through LinAlg
-        :SHA, # transitive through Random
-        :Sockets, # used by stream.jl
-
-        # Transitive through LingAlg
-        # OpenBLAS_jll
-        # libblastrampoline_jll
-
-        # 1-depth packages
-        :LinearAlgebra, # Commits type-piracy and GEMM
-        :Random, # Can't be removed due to rand being exported by Base
-    ]
+    if isdefined(Base.BuildSettings, :INCLUDE_STDLIBS)
+        # e.g. INCLUDE_STDLIBS = "FileWatching,Libdl,Artifacts,SHA,Sockets,LinearAlgebra,Random"
+        stdlibs = Symbol.(split(Base.BuildSettings.INCLUDE_STDLIBS, ","))
+    else
+        # TODO: this is included for compatibility with PackageCompiler, which looks for it.
+        # This should eventually be removed so we only use `BuildSettings`.
+        stdlibs = [
+            # No dependencies
+            :FileWatching, # used by loading.jl -- implicit assumption that init runs
+            :Libdl, # Transitive through LinAlg
+            :Artifacts, # Transitive through LinAlg
+            :SHA, # transitive through Random
+            :Sockets, # used by stream.jl
+
+            # Transitive through LingAlg
+            # OpenBLAS_jll
+            # libblastrampoline_jll
+
+            # 1-depth packages
+            :LinearAlgebra, # Commits type-piracy and GEMM
+            :Random, # Can't be removed due to rand being exported by Base
+        ]
+    end
     # PackageCompiler can filter out stdlibs so it can be empty
     maxlen = maximum(textwidth.(string.(stdlibs)); init=0)
 
@@ -139,6 +146,7 @@ end
 
 empty!(Base.TOML_CACHE.d)
 Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+@eval Base BUILDROOT = ""
 @eval Sys begin
     BINDIR = ""
     STDLIB = ""
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index c5744873312d6..7dab313cf4f57 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -34,7 +34,7 @@ export BINDIR,
        isjsvm,
        isexecutable,
        isreadable,
-       iswriteable,
+       iswritable,
        username,
        which
 
@@ -56,6 +56,8 @@ global STDLIB::String = "$BINDIR/../share/julia/stdlib/v$(VERSION.major).$(VERSI
 # In case STDLIB change after julia is built, the variable below can be used
 # to update cached method locations to updated ones.
 const BUILD_STDLIB_PATH = STDLIB
+# Similarly, this is the root of the julia repo directory that julia was built from
+const BUILD_ROOT_PATH = "$BINDIR/../.."
 
 # helper to avoid triggering precompile warnings
 
@@ -147,7 +149,7 @@ function __init__()
     end
     global CPU_THREADS = if env_threads !== nothing
         env_threads = tryparse(Int, env_threads)
-        if !(env_threads isa Int && env_threads > 0)
+        if env_threads === nothing || env_threads <= 0
             env_threads = Int(ccall(:jl_cpu_threads, Int32, ()))
             Core.print(Core.stderr, "WARNING: couldn't parse `JULIA_CPU_THREADS` environment variable. Defaulting Sys.CPU_THREADS to $env_threads.\n")
         end
@@ -165,7 +167,7 @@ end
 # without pulling in anything unnecessary like `CPU_NAME`
 function __init_build()
     global BINDIR = ccall(:jl_get_julia_bindir, Any, ())::String
-    vers = "v$(VERSION.major).$(VERSION.minor)"
+    vers = "v$(string(VERSION.major)).$(string(VERSION.minor))"
     global STDLIB = abspath(BINDIR, "..", "share", "julia", "stdlib", vers)
     nothing
 end
@@ -399,7 +401,7 @@ end
 
 Get the maximum resident set size utilized in bytes.
 See also:
-    - man page of `getrusage`(2) on Linux and FreeBSD.
+    - man page of `getrusage`(2) on Linux and BSD.
     - Windows API `GetProcessMemoryInfo`.
 """
 maxrss() = ccall(:jl_maxrss, Csize_t, ())
@@ -553,85 +555,9 @@ windows_version
 
 const WINDOWS_VISTA_VER = v"6.0"
 
-"""
-    Sys.isexecutable(path::String)
-
-Return `true` if the given `path` has executable permissions.
-
-!!! note
-    This permission may change before the user executes `path`,
-    so it is recommended to execute the file and handle the error if that fails,
-    rather than calling `isexecutable` first.
-
-!!! note
-    Prior to Julia 1.6, this did not correctly interrogate filesystem
-    ACLs on Windows, therefore it would return `true` for any
-    file.  From Julia 1.6 on, it correctly determines whether the
-    file is marked as executable or not.
-
-See also [`ispath`](@ref), [`isreadable`](@ref), [`iswriteable`](@ref).
-"""
-function isexecutable(path::String)
-    # We use `access()` and `X_OK` to determine if a given path is
-    # executable by the current user.  `X_OK` comes from `unistd.h`.
-    X_OK = 0x01
-    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, X_OK) == 0
-end
-isexecutable(path::AbstractString) = isexecutable(String(path))
-
-"""
-    Sys.isreadable(path::String)
-
-Return `true` if the access permissions for the given `path` permitted reading by the current user.
-
-!!! note
-    This permission may change before the user calls `open`,
-    so it is recommended to just call `open` alone and handle the error if that fails,
-    rather than calling `isreadable` first.
-
-!!! note
-    Currently this function does not correctly interrogate filesystem
-    ACLs on Windows, therefore it can return wrong results.
-
-!!! compat "Julia 1.11"
-    This function requires at least Julia 1.11.
-
-See also [`ispath`](@ref), [`isexecutable`](@ref), [`iswriteable`](@ref).
-"""
-function isreadable(path::String)
-    # We use `access()` and `R_OK` to determine if a given path is
-    # readable by the current user.  `R_OK` comes from `unistd.h`.
-    R_OK = 0x04
-    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, R_OK) == 0
-end
-isreadable(path::AbstractString) = isreadable(String(path))
-
-"""
-    Sys.iswriteable(path::String)
-
-Return `true` if the access permissions for the given `path` permitted writing by the current user.
-
-!!! note
-    This permission may change before the user calls `open`,
-    so it is recommended to just call `open` alone and handle the error if that fails,
-    rather than calling `iswriteable` first.
-
-!!! note
-    Currently this function does not correctly interrogate filesystem
-    ACLs on Windows, therefore it can return wrong results.
-
-!!! compat "Julia 1.11"
-    This function requires at least Julia 1.11.
-
-See also [`ispath`](@ref), [`isexecutable`](@ref), [`isreadable`](@ref).
-"""
-function iswriteable(path::String)
-    # We use `access()` and `W_OK` to determine if a given path is
-    # writeable by the current user.  `W_OK` comes from `unistd.h`.
-    W_OK = 0x02
-    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, W_OK) == 0
-end
-iswriteable(path::AbstractString) = iswriteable(String(path))
+const isexecutable = Base.isexecutable
+const isreadable   = Base.isreadable
+const iswritable   = Base.iswritable
 
 """
     Sys.which(program_name::String)
diff --git a/base/task.jl b/base/task.jl
index ba96d7bca5095..6cb1ff785eeee 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -53,7 +53,6 @@ push!(c::CompositeException, ex) = push!(c.exceptions, ex)
 pushfirst!(c::CompositeException, ex) = pushfirst!(c.exceptions, ex)
 isempty(c::CompositeException) = isempty(c.exceptions)
 iterate(c::CompositeException, state...) = iterate(c.exceptions, state...)
-eltype(::Type{CompositeException}) = Any
 
 function showerror(io::IO, ex::CompositeException)
     if !isempty(ex)
@@ -115,6 +114,13 @@ end
 Wrap an expression in a [`Task`](@ref) without executing it, and return the [`Task`](@ref). This only
 creates a task, and does not run it.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a1() = sum(i for i in 1:1000);
@@ -150,21 +156,10 @@ const task_state_runnable = UInt8(0)
 const task_state_done     = UInt8(1)
 const task_state_failed   = UInt8(2)
 
-const _state_index = findfirst(==(:_state), fieldnames(Task))
-@eval function load_state_acquire(t)
-    # TODO: Replace this by proper atomic operations when available
-    @GC.preserve t llvmcall($("""
-        %ptr = inttoptr i$(Sys.WORD_SIZE) %0 to i8*
-        %rv = load atomic i8, i8* %ptr acquire, align 8
-        ret i8 %rv
-        """), UInt8, Tuple{Ptr{UInt8}},
-        Ptr{UInt8}(pointer_from_objref(t) + fieldoffset(Task, _state_index)))
-end
-
 @inline function getproperty(t::Task, field::Symbol)
     if field === :state
         # TODO: this field name should be deprecated in 2.0
-        st = load_state_acquire(t)
+        st = @atomic :acquire t._state
         if st === task_state_runnable
             return :runnable
         elseif st === task_state_done
@@ -181,7 +176,9 @@ end
         # TODO: this field name should be deprecated in 2.0
         return t._isexception ? t.result : nothing
     elseif field === :scope
-        error("Querying `scope` is disallowed. Use `current_scope` instead.")
+        error("""
+            Querying a Task's `scope` field is disallowed.
+            The private `Core.current_scope()` function is better, though still an implementation detail.""")
     else
         return getfield(t, field)
     end
@@ -216,7 +213,7 @@ julia> istaskdone(b)
 true
 ```
 """
-istaskdone(t::Task) = load_state_acquire(t) !== task_state_runnable
+istaskdone(t::Task) = (@atomic :acquire t._state) !== task_state_runnable
 
 """
     istaskstarted(t::Task) -> Bool
@@ -260,7 +257,7 @@ true
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
+istaskfailed(t::Task) = ((@atomic :acquire t._state) === task_state_failed)
 
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
 function Threads.threadpool(t::Task)
@@ -313,6 +310,7 @@ end
 
 # just wait for a task to be done, no error propagation
 function _wait(t::Task)
+    t === current_task() && Core.throw(ConcurrencyViolationError("deadlock detected: cannot wait on current task"))
     if !istaskdone(t)
         donenotify = t.donenotify::ThreadSynchronizer
         lock(donenotify)
@@ -356,15 +354,166 @@ function _wait2(t::Task, waiter::Task)
     nothing
 end
 
-function wait(t::Task)
-    t === current_task() && error("deadlock detected: cannot wait on current task")
+"""
+    wait(t::Task; throw=true)
+
+Wait for a `Task` to finish.
+
+The keyword `throw` (defaults to `true`) controls whether a failed task results
+in an error, thrown as a [`TaskFailedException`](@ref) which wraps the failed task.
+
+Throws a `ConcurrencyViolationError` if `t` is the currently running task, to prevent deadlocks.
+"""
+function wait(t::Task; throw=true)
     _wait(t)
-    if istaskfailed(t)
-        throw(TaskFailedException(t))
+    if throw && istaskfailed(t)
+        Core.throw(TaskFailedException(t))
     end
     nothing
 end
 
+# Wait multiple tasks
+
+"""
+    waitany(tasks; throw=true) -> (done_tasks, remaining_tasks)
+
+Wait until at least one of the given tasks have been completed.
+
+If `throw` is `true`, throw `CompositeException` when one of the
+completed tasks completes with an exception.
+
+The return value consists of two task vectors. The first one consists of
+completed tasks, and the other consists of uncompleted tasks.
+
+!!! warning
+    This may scale poorly compared to writing code that uses multiple individual tasks that
+    each runs serially, since this needs to scan the list of `tasks` each time and
+    synchronize with each one every time this is called. Or consider using
+    [`waitall(tasks; failfast=true)`](@ref waitall) instead.
+"""
+waitany(tasks; throw=true) = _wait_multiple(tasks, throw)
+
+"""
+    waitall(tasks; failfast=true, throw=true) -> (done_tasks, remaining_tasks)
+
+Wait until all the given tasks have been completed.
+
+If `failfast` is `true`, the function will return when at least one of the
+given tasks is finished by exception. If `throw` is `true`, throw
+`CompositeException` when one of the completed tasks has failed.
+
+`failfast` and `throw` keyword arguments work independently; when only
+`throw=true` is specified, this function waits for all the tasks to complete.
+
+The return value consists of two task vectors. The first one consists of
+completed tasks, and the other consists of uncompleted tasks.
+"""
+waitall(tasks; failfast=true, throw=true) = _wait_multiple(tasks, throw, true, failfast)
+
+function _wait_multiple(waiting_tasks, throwexc=false, all=false, failfast=false)
+    tasks = Task[]
+
+    for t in waiting_tasks
+        t isa Task || error("Expected an iterator of `Task` object")
+        push!(tasks, t)
+    end
+
+    if (all && !failfast) || length(tasks) <= 1
+        exception = false
+        # Force everything to finish synchronously for the case of waitall
+        # with failfast=false
+        for t in tasks
+            _wait(t)
+            exception |= istaskfailed(t)
+        end
+        if exception && throwexc
+            exceptions = [TaskFailedException(t) for t in tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return tasks, Task[]
+        end
+    end
+
+    exception = false
+    nremaining::Int = length(tasks)
+    done_mask = falses(nremaining)
+    for (i, t) in enumerate(tasks)
+        if istaskdone(t)
+            done_mask[i] = true
+            exception |= istaskfailed(t)
+            nremaining -= 1
+        else
+            done_mask[i] = false
+        end
+    end
+
+    if nremaining == 0
+        return tasks, Task[]
+    elseif any(done_mask) && (!all || (failfast && exception))
+        if throwexc && (!all || failfast) && exception
+            exceptions = [TaskFailedException(t) for t in tasks[done_mask] if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return tasks[done_mask], tasks[.~done_mask]
+        end
+    end
+
+    chan = Channel{Int}(Inf)
+    sentinel = current_task()
+    waiter_tasks = fill(sentinel, length(tasks))
+
+    for (i, done) in enumerate(done_mask)
+        done && continue
+        t = tasks[i]
+        if istaskdone(t)
+            done_mask[i] = true
+            exception |= istaskfailed(t)
+            nremaining -= 1
+            exception && failfast && break
+        else
+            waiter = @task put!(chan, i)
+            waiter.sticky = false
+            _wait2(t, waiter)
+            waiter_tasks[i] = waiter
+        end
+    end
+
+    while nremaining > 0
+        i = take!(chan)
+        t = tasks[i]
+        waiter_tasks[i] = sentinel
+        done_mask[i] = true
+        exception |= istaskfailed(t)
+        nremaining -= 1
+
+        # stop early if requested, unless there is something immediately
+        # ready to consume from the channel (using a race-y check)
+        if (!all || (failfast && exception)) && !isready(chan)
+            break
+        end
+    end
+
+    close(chan)
+
+    if nremaining == 0
+        return tasks, Task[]
+    else
+        remaining_mask = .~done_mask
+        for i in findall(remaining_mask)
+            waiter = waiter_tasks[i]
+            donenotify = tasks[i].donenotify::ThreadSynchronizer
+            @lock donenotify Base.list_deletefirst!(donenotify.waitq, waiter)
+        end
+        done_tasks = tasks[done_mask]
+        if throwexc && exception
+            exceptions = [TaskFailedException(t) for t in done_tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return done_tasks, tasks[remaining_mask]
+        end
+    end
+end
+
 """
     fetch(x::Any)
 
@@ -576,7 +725,7 @@ Print an error log to `stderr` if task `t` fails.
 
 # Examples
 ```julia-repl
-julia> Base._wait(errormonitor(Threads.@spawn error("task failed")))
+julia> wait(errormonitor(Threads.@spawn error("task failed")); throw = false)
 Unhandled Task ERROR: task failed
 Stacktrace:
 [...]
@@ -654,6 +803,17 @@ macro sync_add(expr)
     end
 end
 
+function repl_backend_task()
+    @isdefined(active_repl_backend) || return
+    backend = active_repl_backend
+    isdefined(backend, :backend_task) || return
+    backend_task = getfield(active_repl_backend, :backend_task)::Task
+    if backend_task._state === task_state_runnable && getfield(backend, :in_eval)
+        return backend_task
+    end
+    return
+end
+
 # runtime system hook called when a task finishes
 function task_done_hook(t::Task)
     # `finish_task` sets `sigatomic` before entering this function
@@ -675,10 +835,9 @@ function task_done_hook(t::Task)
     end
 
     if err && !handled && Threads.threadid() == 1
-        if isa(result, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, result) # this terminates the task
+        if isa(result, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, result)
         end
     end
     # Clear sigatomic before waiting
@@ -689,14 +848,11 @@ function task_done_hook(t::Task)
         # If an InterruptException happens while blocked in the event loop, try handing
         # the exception to the REPL task since the current task is done.
         # issue #19467
-        if Threads.threadid() == 1 &&
-            isa(e, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, e)
-        else
-            rethrow()
+        if Threads.threadid() == 1 && isa(e, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, e)
         end
+        rethrow() # this will terminate the program
     end
 end
 
@@ -839,6 +995,13 @@ the woken task.
     It is incorrect to use `schedule` on an arbitrary `Task` that has already been started.
     See [the API reference](@ref low-level-schedule-wait) for more information.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a5() = sum(i for i in 1:1000);
@@ -863,7 +1026,7 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     # schedule a task to be (re)started with the given value or exception
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
-        t.queue === nothing || Base.list_deletefirst!(t.queue::IntrusiveLinkedList{Task}, t)
+        q = t.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, t)
         setfield!(t, :result, arg)
         setfield!(t, :_isexception, true)
     else
@@ -887,7 +1050,7 @@ function yield()
     try
         wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     end
 end
@@ -899,11 +1062,15 @@ end
 
 A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
 immediately yields to `t` before calling the scheduler.
+
+Throws a `ConcurrencyViolationError` if `t` is the currently running task.
 """
 function yield(t::Task, @nospecialize(x=nothing))
-    (t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
+    current = current_task()
+    t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
+    (t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
     t.result = x
-    enq_work(current_task())
+    enq_work(current)
     set_next_task(t)
     return try_yieldto(ensure_rescheduled)
 end
diff --git a/base/terminfo.jl b/base/terminfo.jl
index 6f1d1ca8015f0..79713f4a86aa3 100644
--- a/base/terminfo.jl
+++ b/base/terminfo.jl
@@ -245,7 +245,8 @@ end
 Locate the terminfo file for `term`, return `nothing` if none could be found.
 
 The lookup policy is described in `terminfo(5)` "Fetching Compiled
-Descriptions".
+Descriptions". A terminfo database is included by default with Julia and is
+taken to be the first entry of `@TERMINFO_DIRS@`.
 """
 function find_terminfo_file(term::String)
     isempty(term) && return
@@ -261,6 +262,7 @@ function find_terminfo_file(term::String)
         append!(terminfo_dirs,
                 replace(split(ENV["TERMINFO_DIRS"], ':'),
                         "" => "/usr/share/terminfo"))
+    push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "terminfo"))
     Sys.isunix() &&
         push!(terminfo_dirs, "/etc/terminfo", "/lib/terminfo", "/usr/share/terminfo")
     for dir in terminfo_dirs
@@ -268,8 +270,15 @@ function find_terminfo_file(term::String)
             return joinpath(dir, chr, term)
         elseif isfile(joinpath(dir, chrcode, term))
             return joinpath(dir, chrcode, term)
+        elseif isfile(joinpath(dir, lowercase(chr), lowercase(term)))
+            # The vendored terminfo database is fully lowercase to avoid issues on
+            # case-sensitive filesystems. On Unix-like systems, terminfo files with
+            # different cases are hard links to one another, so this is still
+            # correct for non-vendored terminfo, just redundant.
+            return joinpath(dir, lowercase(chr), lowercase(term))
         end
     end
+    return nothing
 end
 
 """
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index 56d596958bc7e..a21d708b4a077 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -4,10 +4,10 @@ export threadid, nthreads, @threads, @spawn,
        threadpool, nthreadpools
 
 """
-    Threads.threadid() -> Int
+    Threads.threadid([t::Task]) -> Int
 
-Get the ID number of the current thread of execution. The master thread has
-ID `1`.
+Get the ID number of the current thread of execution, or the thread of task
+`t`. The master thread has ID `1`.
 
 # Examples
 ```julia-repl
@@ -21,12 +21,15 @@ julia> Threads.@threads for i in 1:4
 2
 5
 4
+
+julia> Threads.threadid(Threads.@spawn "foo")
+2
 ```
 
 !!! note
     The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration).
-    For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects.
-
+    For this reason in most cases it is not safe to use `threadid([task])` to index into, say, a vector of buffers or stateful
+    objects.
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
@@ -67,7 +70,7 @@ function _tpid_to_sym(tpid::Int8)
     elseif tpid == -1
         return :foreign
     else
-        throw(ArgumentError("Unrecognized threadpool id $tpid"))
+        throw(ArgumentError(LazyString("Unrecognized threadpool id ", tpid)))
     end
 end
 
@@ -79,7 +82,7 @@ function _sym_to_tpid(tp::Symbol)
     elseif tp == :foreign
         return Int8(-1)
     else
-        throw(ArgumentError("Unrecognized threadpool name `$(repr(tp))`"))
+        throw(ArgumentError(LazyString("Unrecognized threadpool name `", tp, "`")))
     end
 end
 
@@ -93,6 +96,24 @@ function threadpool(tid = threadid())
     return _tpid_to_sym(tpid)
 end
 
+"""
+    Threads.threadpooldescription(tid = threadid()) -> String
+
+Returns the specified thread's threadpool name with extended description where appropriate.
+"""
+function threadpooldescription(tid = threadid())
+    threadpool_name = threadpool(tid)
+    if threadpool_name == :foreign
+        # TODO: extend tls to include a field to add a description to a foreign thread and make this more general
+        n_others = nthreads(:interactive) + nthreads(:default)
+        # Assumes GC threads come first in the foreign thread pool
+        if tid > n_others && tid <= n_others + ngcthreads()
+            return "foreign: gc"
+        end
+    end
+    return string(threadpool_name)
+end
+
 """
     Threads.nthreadpools() -> Int
 
@@ -158,7 +179,8 @@ function threading_run(fun, static)
         else
             # TODO: this should be the current pool (except interactive) if there
             # are ever more than two pools.
-            @assert ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, _sym_to_tpid(:default)) == 1
+            _result = ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, _sym_to_tpid(:default))
+            @assert _result == 1
         end
         tasks[i] = t
         schedule(t)
@@ -410,7 +432,8 @@ function _spawn_set_thrpool(t::Task, tp::Symbol)
     if tpid == -1 || _nthreads_in_pool(tpid) == 0
         tpid = _sym_to_tpid(:default)
     end
-    @assert ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid) == 1
+    _result = ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid)
+    @assert _result == 1
     nothing
 end
 
@@ -462,7 +485,7 @@ macro spawn(args...)
         if ttype isa QuoteNode
             ttype = ttype.value
             if ttype !== :interactive && ttype !== :default
-                throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
+                throw(ArgumentError(LazyString("unsupported threadpool in @spawn: ", ttype)))
             end
             tp = QuoteNode(ttype)
         else
diff --git a/base/timing.jl b/base/timing.jl
index ecb67a2375d92..80ebb74abee26 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# This type must be kept in sync with the C struct in src/gc.h
+# This type must be kept in sync with the C struct in src/gc-interface.h
 struct GC_Num
     allocd          ::Int64 # GC internal
     deferred_alloc  ::Int64 # GC internal
@@ -29,7 +29,6 @@ struct GC_Num
 end
 
 gc_num() = ccall(:jl_gc_num, GC_Num, ())
-reset_gc_stats() = ccall(:jl_gc_reset_stats, Cvoid, ())
 
 # This type is to represent differences in the counters, so fields may be negative
 struct GC_Diff
@@ -48,7 +47,7 @@ gc_total_bytes(gc_num::GC_Num) =
     gc_num.allocd + gc_num.deferred_alloc + gc_num.total_allocd
 
 function GC_Diff(new::GC_Num, old::GC_Num)
-    # logic from `src/gc.c:jl_gc_total_bytes`
+    # logic from `jl_gc_total_bytes`
     old_allocd = gc_total_bytes(old)
     new_allocd = gc_total_bytes(new)
     return GC_Diff(new_allocd       - old_allocd,
@@ -316,7 +315,8 @@ macro time(msg, ex)
     quote
         local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg)
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
         ret.value
     end
 end
@@ -388,7 +388,8 @@ macro timev(msg, ex)
     quote
         local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        timev_print(ret.time*1e9, ret.gcstats, ret.lock_conflicts, (ret.compile_time*1e9, ret.recompile_time*1e9); msg=_msg)
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        timev_print(ret.time*1e9, ret.gcstats, ret.lock_conflicts, (ret.compile_time*1e9, ret.recompile_time*1e9); msg=_msg_str)
         ret.value
     end
 end
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index 23fc14a894c06..4d07cfed05d8a 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -9,8 +9,8 @@ module TOML
 
 using Base: IdSet
 
-# In case we do not have the Dates stdlib available
 # we parse DateTime into these internal structs,
+# unless a different DateTime library is passed to the Parser constructor
 # note that these do not do any argument checking
 struct Date
     year::Int
@@ -38,7 +38,7 @@ const TOMLDict  = Dict{String, Any}
 # Parser #
 ##########
 
-mutable struct Parser
+mutable struct Parser{Dates}
     str::String
     # 1 character look ahead
     current_char::Char
@@ -84,16 +84,11 @@ mutable struct Parser
 
     # Filled in in case we are parsing a file to improve error messages
     filepath::Union{String, Nothing}
-
-    # Gets populated with the Dates stdlib if it exists
-    Dates::Union{Module, Nothing}
 end
 
-const DATES_PKGID = Base.PkgId(Base.UUID("ade2ca70-3891-5945-98fb-dc099432e06a"), "Dates")
-
-function Parser(str::String; filepath=nothing)
+function Parser{Dates}(str::String; filepath=nothing) where {Dates}
     root = TOMLDict()
-    l = Parser(
+    l = Parser{Dates}(
             str,                  # str
             EOF_CHAR,             # current_char
             firstindex(str),      # pos
@@ -108,12 +103,12 @@ function Parser(str::String; filepath=nothing)
             IdSet{Any}(),         # static_arrays
             IdSet{TOMLDict}(),    # defined_tables
             root,
-            filepath,
-            isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
+            filepath
         )
     startup(l)
     return l
 end
+
 function startup(l::Parser)
     # Populate our one character look-ahead
     c = eat_char(l)
@@ -124,8 +119,10 @@ function startup(l::Parser)
     end
 end
 
-Parser() = Parser("")
-Parser(io::IO) = Parser(read(io, String))
+Parser{Dates}() where {Dates} = Parser{Dates}("")
+Parser{Dates}(io::IO) where {Dates} = Parser{Dates}(read(io, String))
+
+# Parser(...) will be defined by TOML stdlib
 
 function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
     p.str = str
@@ -151,8 +148,6 @@ end
 # Errors #
 ##########
 
-throw_internal_error(msg) = error("internal TOML parser error: $msg")
-
 # Many functions return a ParserError. We want this to bubble up
 # all the way and have this error be returned to the user
 # if the parse is called with `raise=false`. This macro
@@ -496,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String},
         d = d::TOMLDict
         key = dotted_keys[i]
         d = get!(TOMLDict, d, key)
-        if d isa Vector
+        if d isa Vector{Any}
             d = d[end]
+        elseif d isa Vector
+            return ParserError(ErrKeyAlreadyHasValue)
         end
         check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
     end
@@ -538,7 +535,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
     end
     d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
     k = table_key[end]
-    old = get!(() -> [], d, k)
+    old = get!(() -> Any[], d, k)
     if old isa Vector
         if old in l.static_arrays
             return ParserError(ErrAddArrayToStaticArray)
@@ -547,7 +544,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
         return ParserError(ErrArrayTreatedAsDictionary)
     end
     d_new = TOMLDict()
-    push!(old, d_new)
+    push!(old::Vector{Any}, d_new)
     push!(l.defined_tables, d_new)
     l.active_table = d_new
 
@@ -669,41 +666,20 @@ end
 # Array #
 #########
 
-function push!!(v::Vector, el)
-    # Since these types are typically non-inferable, they are a big invalidation risk,
-    # and since it's used by the package-loading infrastructure the cost of invalidation
-    # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
-    # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
-    # is no ambiguity about what types of objects will be created.
-    T = eltype(v)
-    t = typeof(el)
-    if el isa T || t === T
-        push!(v, el::T)
-        return v
-    elseif T === Union{}
-        out = Vector{t}(undef, 1)
-        out[1] = el
-        return out
-    else
-        if T isa Union
-            newT = Any
-        else
-            newT = Union{T, typeof(el)}
-        end
-        new = Array{newT}(undef, length(v))
-        copy!(new, v)
-        return push!(new, el)
+function copyto_typed!(a::Vector{T}, b::Vector) where T
+    for i in 1:length(b)
+        a[i] = b[i]::T
     end
+    return nothing
 end
 
-function parse_array(l::Parser)::Err{Vector}
+function parse_array(l::Parser{Dates})::Err{Vector} where Dates
     skip_ws_nl(l)
-    array = Vector{Union{}}()
+    array = Vector{Any}()
     empty_array = accept(l, ']')
     while !empty_array
         v = @try parse_value(l)
-        # TODO: Worth to function barrier this?
-        array = push!!(array, v)
+        array = push!(array, v)
         # There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
         skip_ws_nl(l)
         comma = accept(l, ',')
@@ -713,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector}
             return ParserError(ErrExpectedCommaBetweenItemsArray)
         end
     end
-    push!(l.static_arrays, array)
-    return array
+    # check for static type throughout array
+    T = !isempty(array) ? typeof(array[1]) : Union{}
+    for el in array
+        if typeof(el) != T
+            T = Any
+            break
+        end
+    end
+    if T === Any
+        new = array
+    elseif T === String
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Bool
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Int64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === UInt64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Float64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Union{}
+        new = Any[]
+    elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) ||
+        (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime)
+        # do nothing, leave as Vector{Any}
+        new = array
+    else @assert false end
+    push!(l.static_arrays, new)
+    return new
 end
 
 
@@ -854,7 +862,7 @@ function parse_number_or_date_start(l::Parser)
     ate, contains_underscore = @try accept_batch_underscore(l, isdigit, readed_zero)
     read_underscore |= contains_underscore
     if (read_digit || ate) && ok_end_value(peek(l))
-        return parse_int(l, contains_underscore)
+        return parse_integer(l, contains_underscore)
     end
     # Done with integers here
 
@@ -900,11 +908,22 @@ end
 function parse_float(l::Parser, contains_underscore)::Err{Float64}
     s = take_string_or_substring(l, contains_underscore)
     v = Base.tryparse(Float64, s)
-    v === nothing && return(ParserError(ErrGenericValueError))
+    v === nothing && return ParserError(ErrGenericValueError)
     return v
 end
 
-for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
+function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int64}
+    s = take_string_or_substring(l, contains_underscore)
+    v = try
+        Base.parse(Int64, s; base=base)
+    catch e
+        e isa Base.OverflowError && return ParserError(ErrOverflowError)
+        rethrow()
+    end
+    return v
+end
+
+for (name, T1, T2, n1, n2) in (("integer", Int64,  Int128,  17,  33),
                                ("hex", UInt64, UInt128, 18,  34),
                                ("oct", UInt64, UInt128, 24,  45),
                                ("bin", UInt64, UInt128, 66, 130),
@@ -921,8 +940,8 @@ for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
                 Base.parse(BigInt, s; base)
             end
         catch e
-            e isa Base.OverflowError && return(ParserError(ErrOverflowError))
-            error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+            e isa Base.OverflowError && return ParserError(ErrOverflowError)
+            rethrow()
         end
         return v
     end
@@ -1014,26 +1033,26 @@ function parse_datetime(l)
     return try_return_datetime(l, year, month, day, h, m, s, ms)
 end
 
-function try_return_datetime(p, year, month, day, h, m, s, ms)
-    Dates = p.Dates
+function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.DateTime(year, month, day, h, m, s, ms)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return DateTime(year, month, day, h, m, s, ms)
     end
 end
 
-function try_return_date(p, year, month, day)
-    Dates = p.Dates
+function try_return_date(p::Parser{Dates}, year, month, day) where Dates
     if Dates !== nothing
         try
             return Dates.Date(year, month, day)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return Date(year, month, day)
@@ -1049,13 +1068,13 @@ function parse_local_time(l::Parser)
     return try_return_time(l, h, m, s, ms)
 end
 
-function try_return_time(p, h, m, s, ms)
-    Dates = p.Dates
+function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.Time(h, m, s, ms)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return Time(h, m, s, ms)
@@ -1103,7 +1122,7 @@ function _parse_local_time(l::Parser, skip_hour=false)::Err{NTuple{4, Int64}}
         end
         # DateTime in base only manages 3 significant digits in fractional
         # second
-        fractional_second = parse_int(l, false)
+        fractional_second = parse_int(l, false)::Int64
         # Truncate off the rest eventual digits
         accept_batch(l, isdigit)
     end
diff --git a/base/tuple.jl b/base/tuple.jl
index a3fadb5c86e78..fc213410cfd7c 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -68,6 +68,7 @@ end
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
     @inline
+    @_nothrow_meta
     return (1 <= i <= length(t)) ? (t[i], i + 1) : nothing
 end
 
@@ -76,8 +77,8 @@ keys(@nospecialize t::Tuple) = OneTo(length(t))
 """
     prevind(A, i)
 
-Return the index before `i` in `A`. The returned index is often equivalent to `i
-- 1` for an integer `i`. This function can be useful for generic code.
+Return the index before `i` in `A`. The returned index is often equivalent to
+`i - 1` for an integer `i`. This function can be useful for generic code.
 
 !!! warning
     The returned index might be out of bounds. Consider using
@@ -110,8 +111,8 @@ function prevind end
 """
     nextind(A, i)
 
-Return the index after `i` in `A`. The returned index is often equivalent to `i
-+ 1` for an integer `i`. This function can be useful for generic code.
+Return the index after `i` in `A`. The returned index is often equivalent to
+`i + 1` for an integer `i`. This function can be useful for generic code.
 
 !!! warning
     The returned index might be out of bounds. Consider using
@@ -458,7 +459,7 @@ _totuple(::Type{Tuple{}}, itr, s...) = ()
 
 function _totuple_err(@nospecialize T)
     @noinline
-    throw(ArgumentError("too few elements for tuple type $T"))
+    throw(ArgumentError(LazyString("too few elements for tuple type ", T)))
 end
 
 function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
@@ -502,7 +503,7 @@ end
 _findfirst_rec(f, i::Int, ::Tuple{}) = nothing
 _findfirst_rec(f, i::Int, t::Tuple) = (@inline; f(first(t)) ? i : _findfirst_rec(f, i+1, tail(t)))
 function _findfirst_loop(f::Function, t)
-    for i in 1:length(t)
+    for i in eachindex(t)
         f(t[i]) && return i
     end
     return nothing
@@ -536,7 +537,7 @@ function _isequal(t1::Tuple{Any,Vararg{Any}}, t2::Tuple{Any,Vararg{Any}})
     return isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
 end
 function _isequal(t1::Any32, t2::Any32)
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         if !isequal(t1[i], t2[i])
             return false
         end
@@ -567,7 +568,7 @@ function _eq_missing(t1::Tuple, t2::Tuple)
 end
 function _eq(t1::Any32, t2::Any32)
     anymissing = false
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         eq = (t1[i] == t2[i])
         if ismissing(eq)
             anymissing = true
@@ -692,8 +693,11 @@ empty(@nospecialize x::Tuple) = ()
 foreach(f, itr::Tuple) = foldl((_, x) -> (f(x); nothing), itr, init=nothing)
 foreach(f, itr::Tuple, itrs::Tuple...) = foldl((_, xs) -> (f(xs...); nothing), zip(itr, itrs...), init=nothing)
 
-function circshift(x::Tuple, shift::Integer)
+circshift((@nospecialize t::Union{Tuple{},Tuple{Any}}), @nospecialize _::Integer) = t
+circshift(t::Tuple{Any,Any}, shift::Integer) = iseven(shift) ? t : reverse(t)
+function circshift(x::Tuple{Any,Any,Any,Vararg{Any,N}}, shift::Integer) where {N}
     @inline
-    j = mod1(shift, length(x))
-    ntuple(k -> getindex(x, k-j+ifelse(k>j,0,length(x))), Val(length(x)))
+    len = N + 3
+    j = mod1(shift, len)
+    ntuple(k -> getindex(x, k-j+ifelse(k>j,0,len)), Val(len))::Tuple
 end
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index 0de6270cafb2d..6928d420a3860 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -278,6 +278,7 @@ big(x::TwicePrecision) = big(x.hi) + big(x.lo)
 
 -(x::TwicePrecision) = TwicePrecision(-x.hi, -x.lo)
 
+zero(x::TwicePrecision) = zero(typeof(x))
 function zero(::Type{TwicePrecision{T}}) where {T}
     z = zero(T)
     TwicePrecision{T}(z, z)
diff --git a/base/util.jl b/base/util.jl
index c91f04e8baf96..95d62c4a16e1d 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -271,15 +271,29 @@ function securezero! end
 unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len))
 
 """
-    Base.getpass(message::AbstractString) -> Base.SecretBuffer
+    Base.getpass(message::AbstractString; with_suffix::Bool=true) -> Base.SecretBuffer
 
 Display a message and wait for the user to input a secret, returning an `IO`
-object containing the secret.
+object containing the secret. If `with_suffix` is `true` (the default), the
+suffix `": "` will be appended to `message`.
 
 !!! info "Windows"
     Note that on Windows, the secret might be displayed as it is typed; see
     `Base.winprompt` for securely retrieving username/password pairs from a
     graphical interface.
+
+!!! compat "Julia 1.12"
+    The `with_suffix` keyword argument requires at least Julia 1.12.
+
+# Examples
+
+```julia-repl
+julia> Base.getpass("Secret")
+Secret: SecretBuffer("*******")
+
+julia> Base.getpass("Secret> "; with_suffix=false)
+Secret> SecretBuffer("*******")
+```
 """
 function getpass end
 
@@ -339,11 +353,13 @@ function with_raw_tty(f::Function, input::TTY)
     end
 end
 
-function getpass(input::TTY, output::IO, prompt::AbstractString)
+function getpass(input::TTY, output::IO, prompt::AbstractString; with_suffix::Bool=true)
     input === stdin || throw(ArgumentError("getpass only works for stdin"))
     with_raw_tty(stdin) do
-        print(output, prompt, ": ")
+        print(output, prompt)
+        with_suffix && print(output, ": ")
         flush(output)
+
         s = SecretBuffer()
         plen = 0
         while true
@@ -364,7 +380,7 @@ end
 
 # allow new getpass methods to be defined if stdin has been
 # redirected to some custom stream, e.g. in IJulia.
-getpass(prompt::AbstractString) = getpass(stdin, stdout, prompt)
+getpass(prompt::AbstractString; with_suffix::Bool=true) = getpass(stdin, stdout, prompt; with_suffix)
 
 """
     prompt(message; default="") -> Union{String, Nothing}
@@ -491,8 +507,10 @@ unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_
 
 _crc32c(a::NTuple{<:Any, UInt8}, crc::UInt32=0x00000000) =
     unsafe_crc32c(Ref(a), length(a) % Csize_t, crc)
-_crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
+
+function _crc32c(a::DenseBytes, crc::UInt32=0x00000000)
     unsafe_crc32c(a, length(a) % Csize_t, crc)
+end
 
 function _crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
     unsafe_crc32c(s, sizeof(s) % Csize_t, crc)
@@ -687,6 +705,7 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
     pathsep = Sys.iswindows() ? ";" : ":"
     ENV2["JULIA_DEPOT_PATH"] = string(mktempdir(; cleanup = true), pathsep) # make sure the default depots can be loaded
     ENV2["JULIA_LOAD_PATH"] = string("@", pathsep, "@stdlib")
+    ENV2["JULIA_TESTS"] = "true"
     delete!(ENV2, "JULIA_PROJECT")
     try
         run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR,
diff --git a/base/uuid.jl b/base/uuid.jl
index 1ed0b81b3f178..9b2da3c6409db 100644
--- a/base/uuid.jl
+++ b/base/uuid.jl
@@ -90,7 +90,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1]
     global string
     function string(u::UUID)
         u = u.value
-        a = Base.StringVector(36)
+        a = Base.StringMemory(36)
         for i in groupings
             @inbounds a[i] = hex_chars[1 + u & 0xf]
             u >>= 4
diff --git a/base/version.jl b/base/version.jl
index 90c34b78c868f..b362daa78f04f 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -9,12 +9,12 @@ const VInt = UInt32
     VersionNumber
 
 Version number type which follows the specifications of
-[semantic versioning (semver)](https://semver.org/), composed of major, minor
+[semantic versioning (semver)](https://semver.org/spec/v2.0.0-rc.2.html), composed of major, minor
 and patch numeric values, followed by pre-release and build
 alphanumeric annotations.
 
 `VersionNumber` objects can be compared with all of the standard comparison
-operators (`==`, `<`, `<=`, etc.), with the result following semver rules.
+operators (`==`, `<`, `<=`, etc.), with the result following semver v2.0.0-rc.2 rules.
 
 `VersionNumber` has the following public fields:
 - `v.major::Integer`
@@ -51,8 +51,7 @@ struct VersionNumber
     build::VerTuple
 
     function VersionNumber(major::VInt, minor::VInt, patch::VInt,
-            pre::VerTuple,
-            bld::VerTuple)
+                           @nospecialize(pre::VerTuple), @nospecialize(bld::VerTuple))
         major >= 0 || throw(ArgumentError("invalid negative major version: $major"))
         minor >= 0 || throw(ArgumentError("invalid negative minor version: $minor"))
         patch >= 0 || throw(ArgumentError("invalid negative patch version: $patch"))
@@ -179,7 +178,7 @@ ident_cmp(a::Integer, b::String ) = isempty(b) ? +1 : -1
 ident_cmp(a::String,  b::Integer) = isempty(a) ? -1 : +1
 ident_cmp(a::String,  b::String ) = cmp(a, b)
 
-function ident_cmp(A::VerTuple, B::VerTuple)
+function ident_cmp(@nospecialize(A::VerTuple), @nospecialize(B::VerTuple))
     for (a, b) in Iterators.Zip{Tuple{VerTuple,VerTuple}}((A, B))
         c = ident_cmp(a, b)
         (c != 0) && return c
diff --git a/cli/Makefile b/cli/Makefile
index 4e32c53b9a6f0..3cc0af1a76afd 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -17,15 +17,14 @@ LOADER_CFLAGS += -DGLIBCXX_LEAST_VERSION_SYMBOL=\"$(shell echo "$(CSL_NEXT_GLIBC
 endif
 
 ifeq ($(OS),WINNT)
-LOADER_LDFLAGS += -municode -mconsole -nostdlib --disable-auto-import \
-                  --disable-runtime-pseudo-reloc -lntdll -lkernel32 -lpsapi
+LOADER_LDFLAGS += -municode -mconsole -nostdlib -lntdll -lkernel32 -lpsapi
 else ifeq ($(OS),Linux)
 # textoff and notext are aliases to the same option which suppress the TEXTREL warning for i686
 LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed -Wl,-z,notext
 else ifeq ($(OS),FreeBSD)
 LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
-else ifeq ($(OS),Darwin)
-LOADER_LDFLAGS += -lSystem
+else ifeq ($(OS),OpenBSD)
+LOADER_LDFLAGS += -Wl,--no-as-needed -lpthread -rdynamic -lc -Wl,--as-needed
 endif
 
 # Build list of dependent libraries that must be opened
@@ -107,7 +106,7 @@ julia-debug: $(build_bindir)/julia-debug$(EXE)
 libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
 
-ifneq (,$(filter $(OS), Linux FreeBSD))
+ifneq (,$(filter $(OS), Linux FreeBSD OpenBSD))
 VERSIONSCRIPT := -Wl,--version-script=$(BUILDDIR)/julia.expmap
 endif
 
@@ -117,7 +116,7 @@ STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_
 endif
 
 $(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) $(RPATH_LIB) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -128,7 +127,7 @@ ifeq ($(OS), WINNT)
 endif
 
 $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -151,7 +150,7 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir)
 	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug)
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION
 	sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/'
 
 clean: | $(CLEAN_TARGETS)
diff --git a/cli/README.md b/cli/README.md
index 4021aceb7d839..5a4ecc0a6fc2b 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -4,9 +4,9 @@ This directory contains the code used by the Julia loader, implementing the piec
 This loader comprises the `julia` executable and the `libjulia` library, which are responsible for setting things up such that `libjulia-internal` and any other internal dependencies can be reliably loaded.
 The code is organized in three pieces:
 
-* `loader_exe.c` gets built into the main `julia` executable.  It immediately loads `libjulia`.
-* `loader_lib.c` gets built into the main `libjulia` shared library.  This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
-* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines.  These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
+* `loader_exe.c` gets built into the main `julia` executable. It immediately loads `libjulia`.
+* `loader_lib.c` gets built into the main `libjulia` shared library. This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
+* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines. These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
 
 The main requirements of the loader are as follows:
 
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 02030cf2717a5..af2a36cfce8ab 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -125,6 +125,32 @@ static void * lookup_symbol(const void * lib_handle, const char * symbol_name) {
 #endif
 }
 
+#if defined(_OS_WINDOWS_)
+void win32_formatmessage(DWORD code, char *reason, int len) {
+    DWORD res;
+    LPWSTR errmsg;
+    res = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                         FORMAT_MESSAGE_FROM_SYSTEM |
+                         FORMAT_MESSAGE_IGNORE_INSERTS |
+                         FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                         NULL, code,
+                         MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
+                         (LPWSTR)&errmsg, 0, NULL);
+    if (!res && (GetLastError() == ERROR_MUI_FILE_NOT_FOUND ||
+                 GetLastError() == ERROR_RESOURCE_TYPE_NOT_FOUND)) {
+      res = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                           FORMAT_MESSAGE_FROM_SYSTEM |
+                           FORMAT_MESSAGE_IGNORE_INSERTS |
+                           FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                           NULL, code,
+                           0, (LPWSTR)&errmsg, 0, NULL);
+    }
+    res = WideCharToMultiByte(CP_UTF8, 0, errmsg, -1, reason, len, NULL, NULL);
+    reason[len - 1] = '\0';
+    LocalFree(errmsg);
+}
+#endif
+
 // Find the location of libjulia.
 char *lib_dir = NULL;
 JL_DLLEXPORT const char * jl_get_libdir()
@@ -135,21 +161,21 @@ JL_DLLEXPORT const char * jl_get_libdir()
     }
 #if defined(_OS_WINDOWS_)
     // On Windows, we use GetModuleFileNameW
-    wchar_t *libjulia_path = utf8_to_wchar(LIBJULIA_NAME);
     HMODULE libjulia = NULL;
 
-    // Get a handle to libjulia.
-    if (!libjulia_path) {
-        jl_loader_print_stderr3("ERROR: Unable to convert path ", LIBJULIA_NAME, " to wide string!\n");
-        exit(1);
-    }
-    libjulia = LoadLibraryW(libjulia_path);
-    if (libjulia == NULL) {
-        jl_loader_print_stderr3("ERROR: Unable to load ", LIBJULIA_NAME, "!\n");
+    // Get a handle to libjulia
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)jl_get_libdir, &libjulia)) {
+        DWORD err = GetLastError();
+        jl_loader_print_stderr3("ERROR: could not locate library \"", LIBJULIA_NAME, "\"\n");
+
+        char msg[2048];
+        win32_formatmessage(err, msg, sizeof(msg));
+        jl_loader_print_stderr(msg);
         exit(1);
     }
-    free(libjulia_path);
-    libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
+
+    wchar_t *libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
     if (!GetModuleFileNameW(libjulia, libjulia_path, 32768)) {
         jl_loader_print_stderr("ERROR: GetModuleFileName() failed\n");
         exit(1);
@@ -520,7 +546,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
         (*jl_codegen_exported_func_addrs[symbol_idx]) = addr;
     }
     // Next, if we're on Linux/FreeBSD, set up fast TLS.
-#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_)
+#if !defined(_OS_WINDOWS_) && !defined(_OS_OPENBSD_)
     void (*jl_pgcstack_setkey)(void*, void*(*)(void)) = lookup_symbol(libjulia_internal, "jl_pgcstack_setkey");
     if (jl_pgcstack_setkey == NULL) {
         jl_loader_print_stderr("ERROR: Cannot find jl_pgcstack_setkey() function within libjulia-internal!\n");
diff --git a/cli/trampolines/trampolines_aarch64.S b/cli/trampolines/trampolines_aarch64.S
index 2d87ae6dcdb1c..ccb9a647ac6c3 100644
--- a/cli/trampolines/trampolines_aarch64.S
+++ b/cli/trampolines/trampolines_aarch64.S
@@ -5,9 +5,9 @@
 
 #define XX(name) \
 .global CNAME(name) SEP \
+CNAME(name)##: SEP \
 .cfi_startproc SEP \
 .p2align    2 SEP \
-CNAME(name)##: SEP \
     adrp x16, PAGE(CNAME(name##_addr)) SEP \
     ldr x16, [x16, PAGEOFF(CNAME(name##_addr))] SEP \
     br x16 SEP \
diff --git a/contrib/asan/Make.user.asan b/contrib/asan/Make.user.asan
index 96ed13b54e0f9..025cfad82214b 100644
--- a/contrib/asan/Make.user.asan
+++ b/contrib/asan/Make.user.asan
@@ -6,6 +6,7 @@ TOOLDIR=$(TOOLCHAIN)/usr/tools
 USECLANG=1
 override CC=$(TOOLDIR)/clang
 override CXX=$(TOOLDIR)/clang++
+override PATCHELF=$(TOOLDIR)/patchelf
 export ASAN_SYMBOLIZER_PATH=$(TOOLDIR)/llvm-symbolizer
 
 USE_BINARYBUILDER_LLVM=1
@@ -16,9 +17,6 @@ override SANITIZE_ADDRESS=1
 # make the GC use regular malloc/frees, which are hooked by ASAN
 override WITH_GC_DEBUG_ENV=1
 
-# default to a debug build for better line number reporting
-override JULIA_BUILD_MODE=debug
-
 # Enable Julia assertions and LLVM assertions
 FORCE_ASSERTIONS=1
 LLVM_ASSERTIONS=1
diff --git a/contrib/asan/build.sh b/contrib/asan/build.sh
index 77f3078b35c42..2e7f243772c81 100755
--- a/contrib/asan/build.sh
+++ b/contrib/asan/build.sh
@@ -40,7 +40,7 @@ if [ ! -d "$TOOLCHAIN" ]; then
     cp "$HERE/Make.user.tools"  "$TOOLCHAIN/Make.user"
 fi
 
-make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools install-patchelf
 
 echo
 echo "Building Julia..."
diff --git a/contrib/bolt/.gitignore b/contrib/bolt/.gitignore
new file mode 100644
index 0000000000000..921d429130268
--- /dev/null
+++ b/contrib/bolt/.gitignore
@@ -0,0 +1,10 @@
+profiles-bolt*
+optimized.build
+toolchain
+
+bolt
+bolt_instrument
+merge_data
+copy_originals
+stage0
+stage1
diff --git a/contrib/bolt/Makefile b/contrib/bolt/Makefile
new file mode 100644
index 0000000000000..ea92ba9ff936a
--- /dev/null
+++ b/contrib/bolt/Makefile
@@ -0,0 +1,134 @@
+.PHONY: clean clean_profiles restore_originals
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+
+PROFILE_DIR:=$(CURDIR)/profiles-bolt
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage1` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage1`. $\
+	You should end up with some data in $(PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+$(STAGE0_BUILD) $(STAGE1_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: | $(STAGE0_BUILD)
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-BOLT && \
+	touch $@
+
+# Build with our custom flags, binary builder doesn't use them so we need to build LLVM for now.
+# We manually skip package image creation so that we can profile it
+$(STAGE1_BUILD): stage0
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage1
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(PROFILE_DIR)/$$file-prof"); \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+# We don't want to rebuild julia-src as then we lose the bolt instrumentation
+# So we have to manually build the sysimage and package image
+finish_stage1: stage1
+	$(MAKE) -C $(STAGE1_BUILD) julia-base-cache && \
+	$(MAKE) -C $(STAGE1_BUILD) -f sysimage.mk sysimg-release && \
+	$(MAKE) -C $(STAGE1_BUILD) -f pkgimage.mk release
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE1_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
diff --git a/contrib/bolt/README.md b/contrib/bolt/README.md
new file mode 100644
index 0000000000000..8680939ef6276
--- /dev/null
+++ b/contrib/bolt/README.md
@@ -0,0 +1,17 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a BOLT-optimized version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make copy_originals
+make bolt_instrument
+make finish_stage1
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
index e178ec8a02a38..fd3106587fb0d 100755
--- a/contrib/check-whitespace.jl
+++ b/contrib/check-whitespace.jl
@@ -18,6 +18,8 @@ const patterns = split("""
     *Makefile
 """)
 
+const is_gha = something(tryparse(Bool, get(ENV, "GITHUB_ACTIONS", "false")), false)
+
 # Note: `git ls-files` gives `/` as a path separator on Windows,
 #   so we just use `/` for all platforms.
 allow_tabs(path) =
@@ -63,8 +65,14 @@ function check_whitespace()
         for (path, lineno, msg) in sort!(collect(errors))
             if lineno == 0
                 println(stderr, "$path -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, "::", msg)
+                end
             else
                 println(stderr, "$path:$lineno -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, ",line=", lineno, "::", msg)
+                end
             end
         end
         exit(1)
diff --git a/contrib/download_cmake.sh b/contrib/download_cmake.sh
index 1deeb08ddded2..5cf3c579ed052 100755
--- a/contrib/download_cmake.sh
+++ b/contrib/download_cmake.sh
@@ -8,17 +8,17 @@ mkdir -p "$(dirname "$0")"/../deps/scratch
 cd "$(dirname "$0")"/../deps/scratch
 
 CMAKE_VERSION_MAJOR=3
-CMAKE_VERSION_MINOR=19
-CMAKE_VERSION_PATCH=3
+CMAKE_VERSION_MINOR=30
+CMAKE_VERSION_PATCH=1
 CMAKE_VERSION_MAJMIN=$CMAKE_VERSION_MAJOR.$CMAKE_VERSION_MINOR
 CMAKE_VERSION=$CMAKE_VERSION_MAJMIN.$CMAKE_VERSION_PATCH
 
 # listed at https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/cmake-$CMAKE_VERSION-SHA-256.txt
 # for the files cmake-$CMAKE_VERSION-macos-universal.tar.gz
 # cmake-$CMAKE_VERSION-Linux-x86_64.tar.gz and cmake-$CMAKE_VERSION-Linux-aarch64.tar.gz
-CMAKE_SHA256_DARWIN=a6b79ad05f89241a05797510e650354d74ff72cc988981cdd1eb2b3b2bda66ac
-CMAKE_SHA256_LINUX_X86_64=c18b65697e9679e5c88dccede08c323cd3d3730648e59048047bba82097e0ffc
-CMAKE_SHA256_LINUX_AARCH64=66e507c97ffb586d7ca6567890808b792c8eb004b645706df6fbf27826a395a2
+CMAKE_SHA256_DARWIN=51e12618829b811bba6f033ee8f39f6192da1b6abb20d82a7899d5134e879a4c
+CMAKE_SHA256_LINUX_X86_64=ac31f077ef3378641fa25a3cb980d21b2f083982d3149a8f2eb9154f2b53696b
+CMAKE_SHA256_LINUX_AARCH64=ad234996f8750f11d7bd0d17b03f55c434816adf1f1671aab9e8bab21a43286a
 
 PLATFORM="$(uname)-$(uname -m)"
 case $PLATFORM in
@@ -28,12 +28,12 @@ case $PLATFORM in
     echo "$CMAKE_SHA256_DARWIN  $FULLNAME.tar.gz" | shasum -a 256 -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/CMake.app/Contents/bin/cmake;;
   Linux-x86_64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-x86_64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_X86_64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
   Linux-aarch64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-aarch64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_AARCH64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index 57af6c1f32679..d3e73a1b1865a 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -12,8 +12,6 @@ Sys.__init_build()
 if !isdefined(Base, :uv_eventloop)
     Base.reinit_stdio()
 end
-Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
-import .FakePTYs: open_fake_pty
 using Base.Meta
 
 ## Debugging options
@@ -39,6 +37,26 @@ precompile(Base.unsafe_string, (Ptr{Int8},))
 # loading.jl
 precompile(Base.__require_prelocked, (Base.PkgId, Nothing))
 precompile(Base._require, (Base.PkgId, Nothing))
+precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
+precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
+precompile(Tuple{typeof(Base.Threads.atomic_add!), Base.Threads.Atomic{Int}, Int})
+precompile(Tuple{typeof(Base.Threads.atomic_sub!), Base.Threads.Atomic{Int}, Int})
+
+# LazyArtifacts (but more generally helpful)
+precompile(Tuple{Type{Base.Val{x} where x}, Module})
+precompile(Tuple{Type{NamedTuple{(:honor_overrides,), T} where T<:Tuple}, Tuple{Bool}})
+precompile(Tuple{typeof(Base.unique!), Array{String, 1}})
+precompile(Tuple{typeof(Base.invokelatest), Any})
+precompile(Tuple{typeof(Base.vcat), Array{String, 1}, Array{String, 1}})
+
+# Pkg loading
+precompile(Tuple{typeof(Base.Filesystem.normpath), String, String, Vararg{String}})
+precompile(Tuple{typeof(Base.append!), Array{String, 1}, Array{String, 1}})
+precompile(Tuple{typeof(Base.join), Array{String, 1}, Char})
+precompile(Tuple{typeof(Base.getindex), Base.Dict{Any, Any}, Char})
+precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, Char})
+precompile(Tuple{typeof(Base.convert), Type{Base.Dict{String, Base.Dict{String, String}}}, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.convert), Type{Base.Dict{String, Array{String, 1}}}, Base.Dict{String, Any}})
 
 # REPL
 precompile(isequal, (String, String))
@@ -67,6 +85,10 @@ precompile(Tuple{typeof(haskey), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
 precompile(Tuple{typeof(delete!), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
 precompile(Tuple{typeof(push!), Vector{Function}, Function})
 
+# preferences
+precompile(Base.get_preferences, (Base.UUID,))
+precompile(Base.record_compiletime_preference, (Base.UUID, String))
+
 # miscellaneous
 precompile(Tuple{typeof(Base.exit)})
 precompile(Tuple{typeof(Base.require), Base.PkgId})
@@ -84,6 +106,23 @@ precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel,
 precompile(Base.CoreLogging.env_override_minlevel, (Symbol, Module))
 precompile(Base.StackTraces.lookup, (Ptr{Nothing},))
 precompile(Tuple{typeof(Base.run_module_init), Module, Int})
+
+# precompilepkgs
+precompile(Tuple{typeof(Base.get), Type{Array{String, 1}}, Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.get), Type{Base.Dict{String, Any}}, Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.haskey), Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Base.TTY, Bool}, Int, Int})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Base.TTY, Bool}, Int})
+precompile(Tuple{typeof(Base.open), Base.CmdRedirect, String, Base.TTY})
+precompile(Tuple{typeof(Base.Precompilation.precompilepkgs)})
+precompile(Tuple{typeof(Base.Precompilation.printpkgstyle), Base.TTY, Symbol, String})
+precompile(Tuple{typeof(Base.rawhandle), Base.TTY})
+precompile(Tuple{typeof(Base.setindex!), Base.Dict{String, Array{String, 1}}, Array{String, 1}, String})
+precompile(Tuple{typeof(Base.setindex!), GenericMemory{:not_atomic, Union{Base.Libc.RawFD, Base.SyncCloseFD, IO}, Core.AddrSpace{Core}(0x00)}, Base.TTY, Int})
+precompile(Tuple{typeof(Base.setup_stdio), Base.TTY, Bool})
+precompile(Tuple{typeof(Base.spawn_opts_inherit), Base.DevNull, Base.TTY, Base.TTY})
+precompile(Tuple{typeof(Core.kwcall), NamedTuple{(:context,), Tuple{Base.TTY}}, typeof(Base.sprint), Function})
+precompile(Tuple{Type{Base.UUID}, Base.UUID})
 """
 
 for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base.TTY, IOContext{Base.TTY})
@@ -91,39 +130,66 @@ for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base
     hardcoded_precompile_statements *= "precompile(Tuple{typeof(show), $IO, $T})\n"
 end
 
+# Precompiles for Revise and other packages
 precompile_script = """
-# NOTE: these were moved to the end of Base.jl. TODO: move back here.
-# # Used by Revise & its dependencies
-# while true  # force inference
-# delete!(push!(Set{Module}(), Base), Main)
-# m = first(methods(+))
-# delete!(push!(Set{Method}(), m), m)
-# empty!(Set())
-# push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
-# (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-# (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
-# (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-# (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
-# (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
-# Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
-# Dict(Base => [:(1+1)])[Base]
-# Dict(:one => [1])[:one]
-# Dict("abc" => Set())["abc"]
-# pushfirst!([], sum)
-# get(Base.pkgorigins, Base.PkgId(Base), nothing)
-# sort!([1,2,3])
-# unique!([1,2,3])
-# cumsum([1,2,3])
-# append!(Int[], BitSet())
-# isempty(BitSet())
-# delete!(BitSet([1,2]), 3)
-# deleteat!(Int32[1,2,3], [1,3])
-# deleteat!(Any[1,2,3], [1,3])
-# Core.svec(1, 2) == Core.svec(3, 4)
-# # copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(which(+, (Int, Int)), [Int, Int], Core.svec())))
-# any(t->t[1].line > 1, [(LineNumberNode(2,:none),:(1+1))])
-# break   # end force inference
-# end
+for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter())
+    m = match.method
+    delete!(push!(Set{Method}(), m), m)
+    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
+
+    empty!(Set())
+    push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
+    (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
+    (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
+    (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
+    (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
+    (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
+    Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
+    Dict(Base => [:(1+1)])[Base]
+    Dict(:one => [1])[:one]
+    Dict("abc" => Set())["abc"]
+    pushfirst!([], sum)
+    get(Base.pkgorigins, Base.PkgId(Base), nothing)
+    sort!([1,2,3])
+    unique!([1,2,3])
+    cumsum([1,2,3])
+    append!(Int[], BitSet())
+    isempty(BitSet())
+    delete!(BitSet([1,2]), 3)
+    deleteat!(Int32[1,2,3], [1,3])
+    deleteat!(Any[1,2,3], [1,3])
+    Core.svec(1, 2) == Core.svec(3, 4)
+    any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
+
+    # Code loading uses this
+    sortperm(mtime.(readdir(".")), rev=true)
+    # JLLWrappers uses these
+    Dict{Base.UUID,Set{String}}()[Base.UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
+    get!(Set{String}, Dict{Base.UUID,Set{String}}(), Base.UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
+    eachindex(IndexLinear(), Expr[])
+    push!(Expr[], Expr(:return, false))
+    vcat(String[], String[])
+    k, v = (:hello => nothing)
+    Base.print_time_imports_report(Base)
+    Base.print_time_imports_report_init(Base)
+
+    # Preferences uses these
+    get(Dict{String,Any}(), "missing", nothing)
+    delete!(Dict{String,Any}(), "missing")
+    for (k, v) in Dict{String,Any}()
+        println(k)
+    end
+
+    # interactive startup uses this
+    write(IOBuffer(), "")
+
+    # not critical, but helps hide unrelated compilation from @time when using --trace-compile
+    foo() = rand(2,2) * rand(2,2)
+    @time foo()
+    @time foo()
+
+    break   # only actually need to do this once
+end
 """
 
 julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
@@ -203,10 +269,10 @@ ansi_disablecursor = "\e[?25l"
 blackhole = Sys.isunix() ? "/dev/null" : "nul"
 procenv = Dict{String,Any}(
         "JULIA_HISTORY" => blackhole,
-        "JULIA_PROJECT" => nothing, # remove from environment
-        "JULIA_LOAD_PATH" => "@stdlib",
+        "JULIA_LOAD_PATH" => "@$(Sys.iswindows() ? ";" : ":")@stdlib",
         "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
         "TERM" => "",
+        # "JULIA_DEBUG" => "precompilation",
         "JULIA_FALLBACK_REPL" => "true")
 
 generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printed
@@ -243,24 +309,32 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "R")
         # Also precompile a package here
         pkgname = "__PackagePrecompilationStatementModule"
-        mkpath(joinpath(prec_path, pkgname, "src"))
-        path = joinpath(prec_path, pkgname, "src", "$pkgname.jl")
-        write(path,
-              """
-              module $pkgname
-              end
-              """)
+        pkguuid = "824efdaf-a0e9-431c-8ee7-3d356b2531c2"
+        pkgpath = joinpath(prec_path, pkgname)
+        mkpath(joinpath(pkgpath, "src"))
+        write(joinpath(pkgpath, "src", "$pkgname.jl"),
+            """
+            module $pkgname
+            println("Precompiling $pkgname")
+            end
+            """)
+        write(joinpath(pkgpath, "Project.toml"),
+            """
+            name = "$pkgname"
+            uuid = "$pkguuid"
+            """)
+        touch(joinpath(pkgpath, "Manifest.toml"))
         tmp_prec = tempname(prec_path)
         tmp_proc = tempname(prec_path)
         s = """
-            pushfirst!(DEPOT_PATH, $(repr(prec_path)));
+            pushfirst!(DEPOT_PATH, $(repr(joinpath(prec_path,"depot"))));
             Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
-            Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
+            Base.Precompilation.precompilepkgs(;fancyprint=true);
             $precompile_script
             """
         p = run(pipeline(addenv(`$(julia_exepath()) -O0 --trace-compile=$tmp_proc --sysimage $sysimg
-                --cpu-target=native --startup-file=no --color=yes`, procenv),
-                 stdin=IOBuffer(s), stdout=debug_output))
+                --cpu-target=native --startup-file=no --color=yes --project=$(pkgpath)`, procenv),
+                 stdin=IOBuffer(s), stderr=debug_output, stdout=debug_output))
         n_step1 = 0
         for f in (tmp_prec, tmp_proc)
             isfile(f) || continue
@@ -314,9 +388,8 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
             yield() # Make clock spinning
             print_state("step3" => string("R$n_succeeded", failed > 0 ? " ($failed failed)" : ""))
         catch ex
-            @show backtrace()
             # See #28808
-            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
+            @warn "Failed to precompile expression" form=statement exception=(ex,catch_backtrace()) _module=nothing _file=nothing _line=0
         end
     end
     wait(clock) # Stop asynchronous printing
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index df17b967c1ed7..c692b3f522fb2 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -67,9 +67,7 @@ function ldlibs(doframework)
         "julia"
     end
     if Sys.isunix()
-        return "-Wl,-rpath,$(shell_escape(libDir())) " *
-            (Sys.isapple() ? string() : "-Wl,-rpath,$(shell_escape(private_libDir())) ") *
-            "-l$libname"
+        return "-Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname"
     else
         return "-l$libname -lopenlibm"
     end
diff --git a/contrib/mac/frameworkapp/README.md b/contrib/mac/frameworkapp/README.md
index 94c344d16564f..953ad115c94e1 100644
--- a/contrib/mac/frameworkapp/README.md
+++ b/contrib/mac/frameworkapp/README.md
@@ -4,23 +4,23 @@ New Julia Launcher App
 This builds the Julia framework and a launcher app and packages them in a
 product archive for the macOS Installer.
 
-Run `make APPLE_DEVELOPMENT_TEAM=xxxxxxxxxx` to build the product archive.  The
+Run `make APPLE_DEVELOPMENT_TEAM=xxxxxxxxxx` to build the product archive. The
 resulting archive may be installed to the home directory with
 `installer -pkg~/Documents/pkgs/Julia-1.1.0.pkg -target CurrentUserHomeDirectory`.
-To just build the app, build the `appexport` make target.  Read the comments at
+To just build the app, build the `appexport` make target. Read the comments at
 the top of the `Makefile` to set appropriate code signing parameters.
 
 The framework is installed in `/Library/Frameworks` and the app in
-`/Applications`.  Installation may be system-wide (i.e., relative to `/`) or
+`/Applications`. Installation may be system-wide (i.e., relative to `/`) or
 local to the user's home directory (i.e., `$Home/Applications/Julia.app`).
 
 The `julia` binary is embedded in the framework at
 `Julia.framework/Helpers/julia`.
 
-Multiple versions of Julia may be installed at once.  Each version is placed in
-the `Julia.framework/Versions` directory.  By default, the version is
+Multiple versions of Julia may be installed at once. Each version is placed in
+the `Julia.framework/Versions` directory. By default, the version is
 identified by the Major.Minor version number but may be customized by setting
-the `FRAMEWORK_VERSION` make variable.  The resulting product archive will not
-overwrite other versions but will upgrade a version if it exists.  Thus, the
+the `FRAMEWORK_VERSION` make variable. The resulting product archive will not
+overwrite other versions but will upgrade a version if it exists. Thus, the
 `1.1` framework version that is actually the 3rd patch (1.1.3) will overwrite
 any existing `1.1` framework version.
diff --git a/contrib/normalize_triplet.py b/contrib/normalize_triplet.py
index 77c047b360b76..b1bab29487b8f 100755
--- a/contrib/normalize_triplet.py
+++ b/contrib/normalize_triplet.py
@@ -19,6 +19,7 @@
 platform_mapping = {
     'darwin': "-apple-darwin[\\d\\.]*",
     'freebsd': "-(.*-)?freebsd[\\d\\.]*",
+    'openbsd': "-(.*-)?openbsd[\\d\\.]*",
     'windows': "-w64-mingw32",
     'linux': "-(.*-)?linux",
 }
@@ -96,6 +97,7 @@ def p(x):
         'darwin': 'apple-darwin',
         'windows': 'w64-mingw32',
         'freebsd': 'unknown-freebsd',
+        'openbsd': 'unknown-openbsd',
     }
     x = r(x)
     if x:
diff --git a/contrib/pgo-lto-bolt/.gitignore b/contrib/pgo-lto-bolt/.gitignore
new file mode 100644
index 0000000000000..1b29279acc0da
--- /dev/null
+++ b/contrib/pgo-lto-bolt/.gitignore
@@ -0,0 +1,14 @@
+stage0*
+stage1*
+stage2*
+bolt
+bolt_instrument
+merge_data
+copy_originals
+
+profiles
+profiles-bolt
+
+toolchain
+pgo-instrumented.build
+optimized.build
diff --git a/contrib/pgo-lto-bolt/Makefile b/contrib/pgo-lto-bolt/Makefile
new file mode 100644
index 0000000000000..2114b14991184
--- /dev/null
+++ b/contrib/pgo-lto-bolt/Makefile
@@ -0,0 +1,188 @@
+.PHONY: clean clean_profiles restore_originals
+
+# See the makefiles in contrib/bolt and contrib/pgo-lto for more information.
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/pgo-instrumented.build
+STAGE2_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
+
+BOLT_PROFILE_DIR:=$(CURDIR)/profiles-bolt
+PGO_PROFILE_DIR:=$(CURDIR)/profiles
+PGO_PROFILE_FILE:=$(PGO_PROFILE_DIR)/merged.prof
+PGO_PROFRAW_FILES:=$(wildcard $(PGO_PROFILE_DIR)/*.profraw)
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
+LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
+LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage2` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage2`. $\
+	You should end up with some data in $(BOLT_PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+# When building a single libLLVM.so we need to increase -vp-counters-per-site
+# significantly
+COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
+
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
+	`make top`. Afterwards run `make stage2`.'
+
+TOOLCHAIN_FLAGS = $\
+	"CC=$(STAGE0_TOOLS)clang" $\
+	"CXX=$(STAGE0_TOOLS)clang++" $\
+	"LD=$(STAGE0_TOOLS)ld.lld" $\
+	"AR=$(STAGE0_TOOLS)llvm-ar" $\
+	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
+	"CFLAGS+=$(PGO_CFLAGS)" $\
+	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
+	"LDFLAGS+=-fuse-ld=lld $(PGO_LDFLAGS)"
+
+$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: export USE_BINARYBUILDER_LLVM=1
+stage0: | $(STAGE0_BUILD)
+	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
+	# doesn't do that, and otherwise the profile constructor is not executed
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools install-BOLT && \
+	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
+	touch $@
+
+$(STAGE1_BUILD): stage0
+stage1: PGO_CFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_LDFLAGS:=-flto=thin -fprofile-generate=$(PGO_PROFILE_DIR)
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo $(AFTER_STAGE1_MESSAGE)
+
+stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+stage2: export USE_BINARYBUILDER_LLVM=0
+stage2: $(PGO_PROFILE_FILE) | $(STAGE2_BUILD)
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage2
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(BOLT_PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(BOLT_PROFILE_DIR)/$$file-prof"); \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+# We don't want to rebuild julia-src as then we lose the bolt instrumentation
+# So we have to manually build the sysimage and package image
+finish_stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+finish_stage2: stage2
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) julia-base-cache && \
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) -f sysimage.mk sysimg-release && \
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) -f pkgimage.mk release
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(BOLT_PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE2_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PGO_PROFILE_DIR) $(BOLT_PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 stage2 $(PGO_PROFILE_FILE) bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
+
+$(PGO_PROFILE_FILE): stage1 $(PGO_PROFRAW_FILES)
+	$(LLVM_PROFDATA) merge -output=$@ $(PGO_PROFRAW_FILES)
+
+# show top 50 functions
+top: $(PGO_PROFILE_FILE)
+	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)
diff --git a/contrib/pgo-lto-bolt/README.md b/contrib/pgo-lto-bolt/README.md
new file mode 100644
index 0000000000000..ab574907c292f
--- /dev/null
+++ b/contrib/pgo-lto-bolt/README.md
@@ -0,0 +1,18 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a PGO+LTO+BOLT version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make stage2
+make copy_originals
+make bolt_instrument
+make finish_stage2
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/pgo-lto/Makefile b/contrib/pgo-lto/Makefile
index a73825e182561..ddd86f5d5b39a 100644
--- a/contrib/pgo-lto/Makefile
+++ b/contrib/pgo-lto/Makefile
@@ -8,7 +8,6 @@ STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
 
 PROFILE_DIR:=$(CURDIR)/profiles
 PROFILE_FILE:=$(PROFILE_DIR)/merged.prof
-PROFRAW_FILES:=$(wildcard $(PROFILE_DIR)/*.profraw)
 JULIA_ROOT:=$(CURDIR)/../..
 
 LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
@@ -18,23 +17,24 @@ LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
 # When building a single libLLVM.so we need to increase -vp-counters-per-site
 # significantly
 COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
 
-AFTER_STAGE1_MESSAGE:='Run `make clean-profiles` to start with a clean slate. $\
-    Then run Julia to collect realistic profile data, for example: `$(STAGE1_BUILD)/julia -O3 -e $\
-    '\''using Pkg; Pkg.add("LoopVectorization"); Pkg.test("LoopVectorization")'\''`. This $\
-	should produce about 15MB of data in $(PROFILE_DIR). Note that running extensive $\
-	scripts may result in counter overflows, which can be detected by running $\
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
 	`make top`. Afterwards run `make stage2`.'
 
-TOOLCHAIN_FLAGS = $\
-	"CC=$(STAGE0_TOOLS)clang" $\
-	"CXX=$(STAGE0_TOOLS)clang++" $\
-	"LD=$(STAGE0_TOOLS)ld.lld" $\
-	"AR=$(STAGE0_TOOLS)llvm-ar" $\
-	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
-	"CFLAGS+=$(PGO_CFLAGS)" $\
-	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
-	"LDFLAGS+=$(PGO_LDFLAGS)"
+STAGE1_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-generate=$(PROFILE_DIR)" $\
+			CFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" $\
+			CXXFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)"
+STAGE2_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe" $\
+			CFLAGS="-fprofile-use=$(PROFILE_FILE)" $\
+			CXXFLAGS="-fprofile-use=$(PROFILE_FILE)"
+
+COMMON_FLAGS:=USECLANG=1 USE_BINARYBUILDER_LLVM=0
+
+all: stage2 # Default target as first in file
 
 $(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
 	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
@@ -48,26 +48,20 @@ stage0: | $(STAGE0_BUILD)
 	touch $@
 
 $(STAGE1_BUILD): stage0
-stage1: PGO_CFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
-stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
-stage1: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-generate=$(PROFILE_DIR)
-stage1: export USE_BINARYBUILDER_LLVM=0
 stage1: | $(STAGE1_BUILD)
-	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo "--- Build Julia Stage 1 - with instrumentation"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE1_FLAGS) $(MAKE) -C $(STAGE1_BUILD) $(COMMON_FLAGS) && touch $@
 	@echo $(AFTER_STAGE1_MESSAGE)
 
-stage2: PGO_CFLAGS:=-fprofile-use=$(PROFILE_FILE)
-stage2: PGO_CXXFLAGS:=-fprofile-use=$(PROFILE_FILE)
-stage2: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe
-stage2: export USE_BINARYBUILDER_LLVM=0
 stage2: $(PROFILE_FILE) | $(STAGE2_BUILD)
-	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo "--- Build Julia Stage 2 - PGO + LTO optimised"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) && touch $@
 
-install: stage2
-	$(MAKE) -C $(STAGE2_BUILD) USE_BINARYBUILDER_LLVM=0 install
+.DEFAULT: stage2
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) $@
 
-$(PROFILE_FILE): stage1 $(PROFRAW_FILES)
-	$(LLVM_PROFDATA) merge -output=$@ $(PROFRAW_FILES)
+$(PROFILE_FILE): stage1 $(wildcard $(PROFILE_DIR)/*.profraw)
+	$(LLVM_PROFDATA) merge -output=$@ $(PROFILE_DIR)/*.profraw
 
 # show top 50 functions
 top: $(PROFILE_FILE)
diff --git a/deps/BOLT.mk b/deps/BOLT.mk
new file mode 100644
index 0000000000000..34391ab10f716
--- /dev/null
+++ b/deps/BOLT.mk
@@ -0,0 +1,118 @@
+## BOLT ##
+include $(SRCDIR)/BOLT.version
+
+ifneq ($(USE_BINARYBUILDER_BOLT), 1)
+BOLT_GIT_URL:=https://github.com/llvm/llvm-project.git
+BOLT_TAR_URL=https://api.github.com/repos/llvm/llvm-project/tarball/$1
+$(eval $(call git-external,BOLT,BOLT,CMakeLists.txt,,$(SRCCACHE)))
+
+BOLT_BUILDDIR := $(BUILDDIR)/$(BOLT_SRC_DIR)/build
+
+LLVM_ENABLE_PROJECTS := bolt
+
+LLVM_CFLAGS :=
+LLVM_CXXFLAGS :=
+LLVM_CPPFLAGS :=
+LLVM_LDFLAGS :=
+LLVM_CMAKE :=
+
+LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
+
+# Otherwise LLVM will translate \\ to / on mingw
+LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
+
+# Allow adding LLVM specific flags
+LLVM_CFLAGS += $(CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CXXFLAGS += $(LLVM_CXXFLAGS)
+LLVM_CPPFLAGS += $(CPPFLAGS)
+LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(LLVM_LDFLAGS)
+LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING=host -DCMAKE_BUILD_TYPE=Release
+LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off
+
+ifeq ($(OS), WINNT)
+LLVM_CPPFLAGS += -D__USING_SJLJ_EXCEPTIONS__ -D__CRT__NO_INLINE
+endif # OS == WINNT
+ifneq ($(HOSTCC),$(CC))
+LLVM_CMAKE += -DCROSS_TOOLCHAIN_FLAGS_NATIVE="-DCMAKE_C_COMPILER=$$(which $(HOSTCC));-DCMAKE_CXX_COMPILER=$$(which $(HOSTCXX))"
+
+# Defaults to off when crosscompiling, starting from LLVM 18
+LLVM_CMAKE += -DBOLT_ENABLE_RUNTIME=ON
+endif
+ifeq ($(OS), emscripten)
+LLVM_CMAKE += -DCMAKE_TOOLCHAIN_FILE=$(EMSCRIPTEN)/cmake/Modules/Platform/Emscripten.cmake -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_ENABLE_THREADS=OFF -DLLVM_BUILD_UTILS=OFF
+endif # OS == emscripten
+
+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
+ifeq (${USECLANG},0)
+LLVM_CXXFLAGS += -mminimal-toc
+endif
+endif
+
+ifeq ($(fPIC),)
+LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
+endif
+
+LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
+	-DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS)"
+ifeq ($(OS),Darwin)
+# Explicitly use the default for -mmacosx-version-min=10.9 and later
+LLVM_CMAKE += -DLLVM_ENABLE_LIBCXX=ON
+endif
+
+LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
+	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
+
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(BOLT_BUILDDIR)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
+$(BOLT_BUILDDIR)/build-configured: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+		$(CMAKE) $(SRCCACHE)/$(BOLT_SRC_DIR)/llvm $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
+		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-compiled: $(BOLT_BUILDDIR)/build-configured
+	cd $(BOLT_BUILDDIR) && \
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build . --target bolt)
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-checked: $(BOLT_BUILDDIR)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	cd $(BOLT_BUILDDIR) && \
+		  $(CMAKE) --build . --target check-bolt
+endif
+	echo 1 > $@
+
+BOLT_INSTALL = \
+	cd $1 && mkdir -p $2$$(build_depsbindir) && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P tools/bolt/cmake_install.cmake
+
+$(eval $(call staged-install, \
+	bolt,$$(BOLT_SRC_DIR)/build, \
+	BOLT_INSTALL,,,))
+
+clean-bolt:
+	-rm -f $(BOLT_BUILDDIR)/build-configured $(BOLT_BUILDDIR)/build-compiled
+	-$(MAKE) -C $(BOLT_BUILDDIR) clean
+
+get-bolt: $(BOLT_SRC_FILE)
+extract-bolt: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+configure-bolt: $(BOLT_BUILDDIR)/build-configured
+compile-bolt: $(BOLT_BUILDDIR)/build-compiled
+fastcheck-bolt: #none
+check-bolt: $(BOLT_BUILDDIR)/build-checked
+
+else # USE_BINARYBUILDER_BOLT
+
+$(eval $(call bb-install,BOLT,BOLT,false,true))
+
+endif # USE_BINARYBUILDER_BOLT
diff --git a/deps/BOLT.version b/deps/BOLT.version
new file mode 100644
index 0000000000000..6a785041e163f
--- /dev/null
+++ b/deps/BOLT.version
@@ -0,0 +1,11 @@
+# -*- makefile -*-
+
+BOLT_VER := 18.1.4
+BOLT_JLL_VER := 18.1.4+0
+
+## jll artifact
+BOLT_JLL_NAME := BOLT
+
+## source build
+BOLT_BRANCH=llvmorg-$(BOLT_VER)
+BOLT_SHA1=e6c3289804a67ea0bb6a86fadbe454dd93b8d855
diff --git a/deps/Makefile b/deps/Makefile
index 27f5fdbb693d5..b87a3e1e58609 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -64,13 +64,15 @@ ifeq ($(OS), Linux)
 DEP_LIBS += unwind
 else ifeq ($(OS), FreeBSD)
 DEP_LIBS += unwind
+else ifeq ($(OS), OpenBSD)
+DEP_LIBS += llvmunwind
 else ifeq ($(OS), Darwin)
 DEP_LIBS += llvmunwind
 endif
 endif
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 ifeq ($(USE_SYSTEM_PATCHELF), 0)
 DEP_LIBS += patchelf
 PATCHELF:=$(build_depsbindir)/patchelf
@@ -117,7 +119,6 @@ ifeq ($(USE_SYSTEM_GMP), 0)
 DEP_LIBS += gmp
 endif
 
-ifeq ($(USE_SYSTEM_LIBGIT2), 0)
 ifeq ($(USE_SYSTEM_MBEDTLS), 0)
 DEP_LIBS += mbedtls
 endif
@@ -134,18 +135,19 @@ ifeq ($(USE_SYSTEM_CURL), 0)
 DEP_LIBS += curl
 endif
 
+ifeq ($(USE_SYSTEM_LIBGIT2), 0)
 DEP_LIBS += libgit2
-endif # USE_SYSTEM_LIBGIT2
+endif
 
 ifeq ($(USE_SYSTEM_MPFR), 0)
 DEP_LIBS += mpfr
 endif
 
-ifeq ($(USE_GPL_LIBS), 1)
+# Only some of the modules in SuiteSparse are GPL.
+# xref: `remove-libsuitesparse-gpl-lib` in libsuitesparse.mk
 ifeq ($(USE_SYSTEM_LIBSUITESPARSE), 0)
 DEP_LIBS += libsuitesparse
 endif
-endif
 
 ifeq ($(USE_SYSTEM_UTF8PROC), 0)
 DEP_LIBS += utf8proc
@@ -169,6 +171,13 @@ ifeq ($(WITH_ITTAPI),1)
 DEP_LIBS += ittapi
 endif
 
+ifeq ($(WITH_NVTX),1)
+DEP_LIBS += nvtx
+endif
+
+ifneq ($(WITH_TERMINFO),0)
+DEP_LIBS += terminfo
+endif
 
 # Only compile standalone LAPACK if we are not using OpenBLAS.
 # OpenBLAS otherwise compiles LAPACK as part of its build.
@@ -192,7 +201,8 @@ DEP_LIBS_STAGED := $(DEP_LIBS)
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
 	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
 	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
-	sanitizers libsuitesparse lld libtracyclient ittapi JuliaSyntax
+	sanitizers libsuitesparse lld libtracyclient ittapi nvtx JuliaSyntax \
+	terminfo
 DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
 ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -226,9 +236,11 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 	rm -rf $(build_prefix)
 getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
+include $(SRCDIR)/BOLT.mk
 include $(SRCDIR)/csl.mk
 include $(SRCDIR)/sanitizers.mk
 include $(SRCDIR)/ittapi.mk
+include $(SRCDIR)/nvtx.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
@@ -252,6 +264,7 @@ include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
 include $(SRCDIR)/p7zip.mk
 include $(SRCDIR)/libtracyclient.mk
+include $(SRCDIR)/terminfo.mk
 
 # vendored Julia libs
 include $(SRCDIR)/JuliaSyntax.mk
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
index bd1cb65c6ae2d..cfa28a4d8b88f 100644
--- a/deps/blastrampoline.mk
+++ b/deps/blastrampoline.mk
@@ -16,16 +16,16 @@ $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPO
 BLASTRAMPOLINE_BUILD_ROOT := $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
 	cd $(dir $@)/src && $(MAKE) $(BLASTRAMPOLINE_BUILD_OPTS)
-ifeq ($(OS), WINNT)
-	# Windows doesn't like soft link, use hard link
-	cd $(BLASTRAMPOLINE_BUILD_ROOT)/build/ && \
-		cp -f --dereference --link libblastrampoline.dll libblastrampoline.dll
-endif
 	echo 1 > $@
 
 define BLASTRAMPOLINE_INSTALL
 	$(MAKE) -C $(BLASTRAMPOLINE_BUILD_ROOT) install $(BLASTRAMPOLINE_BUILD_OPTS) DESTDIR="$2"
 endef
+ifeq ($(OS), WINNT)
+# Windows doesn't like soft link, use hard link to copy file without version suffix
+BLASTRAMPOLINE_INSTALL += && cd $2$$(build_prefix)/bin && \
+$$(WIN_MAKE_HARD_LINK) libblastrampoline-*.dll libblastrampoline.dll
+endif
 $(eval $(call staged-install, \
 	blastrampoline,$(BLASTRAMPOLINE_SRC_DIR), \
 	BLASTRAMPOLINE_INSTALL,, \
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index 616300377e3e6..fd055e1ae8120 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -2,6 +2,6 @@
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 ## source build
-BLASTRAMPOLINE_VER := 5.8.0
-BLASTRAMPOLINE_BRANCH=v5.8.0
-BLASTRAMPOLINE_SHA1=81316155d4838392e8462a92bcac3eebe9acd0c7
+BLASTRAMPOLINE_VER := 5.11.0
+BLASTRAMPOLINE_BRANCH=v5.11.0
+BLASTRAMPOLINE_SHA1=05083d50611b5538df69706f0a952d8e642b0b4b
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
new file mode 100644
index 0000000000000..62e63ff3174d6
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
@@ -0,0 +1 @@
+c12540d5889cef05bc87183a4ce5a54c
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
new file mode 100644
index 0000000000000..0635e180ac9a5
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
@@ -0,0 +1 @@
+61cc7cc42b925f37502eed0d31eafadbfdc24a9ebc892c9b8d96a27b004cbccf2e5da7face5c8d9c9db57fac1b5cf662d890a67337436c5d4aa3373256638ab1
diff --git a/deps/checksums/Distributed-6a07d9853ab7686df7440a47d1b585c6c9f3be35.tar.gz/md5 b/deps/checksums/Distributed-6a07d9853ab7686df7440a47d1b585c6c9f3be35.tar.gz/md5
deleted file mode 100644
index f6ae9d2d77aeb..0000000000000
--- a/deps/checksums/Distributed-6a07d9853ab7686df7440a47d1b585c6c9f3be35.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-316bc519c49cd54685cfceb76a4dab5d
diff --git a/deps/checksums/Distributed-6a07d9853ab7686df7440a47d1b585c6c9f3be35.tar.gz/sha512 b/deps/checksums/Distributed-6a07d9853ab7686df7440a47d1b585c6c9f3be35.tar.gz/sha512
deleted file mode 100644
index 275997e84e6d5..0000000000000
--- a/deps/checksums/Distributed-6a07d9853ab7686df7440a47d1b585c6c9f3be35.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e71c269c5ca92f4b5b709b871dd1bb06f9e23c5b1444a30f780ccb37f4aa19b50668a5f81c85269f7cea5ebe740b453c1b3fe1569629f441d01d8964cd185e54
diff --git a/deps/checksums/Distributed-6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781.tar.gz/md5 b/deps/checksums/Distributed-6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781.tar.gz/md5
new file mode 100644
index 0000000000000..9904464c82b3b
--- /dev/null
+++ b/deps/checksums/Distributed-6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781.tar.gz/md5
@@ -0,0 +1 @@
+390521058a478a131ca49d349c9b9383
diff --git a/deps/checksums/Distributed-6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781.tar.gz/sha512 b/deps/checksums/Distributed-6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781.tar.gz/sha512
new file mode 100644
index 0000000000000..a7fbe055c2251
--- /dev/null
+++ b/deps/checksums/Distributed-6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781.tar.gz/sha512
@@ -0,0 +1 @@
+7f0f414d94739a25b7d713c46887e26cd349329828d42297f44928204b36d15ba9163ad6f670aba72ed9229557bb0f35ab4686429975d1f349fe12b1ba2b189f
diff --git a/deps/checksums/Downloads-1061ecc377a053fce0df94e1a19e5260f7c030f5.tar.gz/md5 b/deps/checksums/Downloads-1061ecc377a053fce0df94e1a19e5260f7c030f5.tar.gz/md5
new file mode 100644
index 0000000000000..f42bbedb6d415
--- /dev/null
+++ b/deps/checksums/Downloads-1061ecc377a053fce0df94e1a19e5260f7c030f5.tar.gz/md5
@@ -0,0 +1 @@
+70878dd96911d6960537dfee2a820d98
diff --git a/deps/checksums/Downloads-1061ecc377a053fce0df94e1a19e5260f7c030f5.tar.gz/sha512 b/deps/checksums/Downloads-1061ecc377a053fce0df94e1a19e5260f7c030f5.tar.gz/sha512
new file mode 100644
index 0000000000000..83164cad9a89d
--- /dev/null
+++ b/deps/checksums/Downloads-1061ecc377a053fce0df94e1a19e5260f7c030f5.tar.gz/sha512
@@ -0,0 +1 @@
+87d2bdc6c85cbbce5302aab8ffe92fc542c9c71a396844fcc04c0416be059b00298b4816ab5e5491dbf865660a3a6152f1c245875a1ec75fb49b4c7ba0d303d8
diff --git a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/md5 b/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/md5
deleted file mode 100644
index fc3bce951cafb..0000000000000
--- a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-97bb33510fadec7f4cc4c718c739e9a0
diff --git a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/sha512 b/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/sha512
deleted file mode 100644
index bf2821e8252b0..0000000000000
--- a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a362aaf762f42deebb8632a7a7980cd22b2777e8c4dc629e418580269e24a64217ad846d61acad70438cfdc190e47ba2ff7716edd4e04d8d10c1d765efce604d
diff --git a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/md5 b/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/md5
deleted file mode 100644
index 5e99f7453cfe2..0000000000000
--- a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-08230d0801fda3c81927d558452215e4
diff --git a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/sha512 b/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/sha512
deleted file mode 100644
index 16d15cdef3104..0000000000000
--- a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0386841dcf30ee53f7f95dd3206e1208482507d157aa09739873de2a56e5ca3d7bbf27eccd9f4ed81c1c0fea229673475f6454fe94df0ff960563ca4c29ed36c
diff --git a/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/md5 b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/md5
new file mode 100644
index 0000000000000..cbcb8097d1673
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/md5
@@ -0,0 +1 @@
+3dc1387ed88ba3c0df04d05a86d804d0
diff --git a/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/sha512 b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/sha512
new file mode 100644
index 0000000000000..2e58061d16058
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/sha512
@@ -0,0 +1 @@
+fe30ed73b257e6928097cb7baca5b82a9a60b2f9b9f219fbcf570c5ed513447f0fda2a48da06b57e381516a69278f7f8519764d00e9e4fb5683a5411e245ef45
diff --git a/deps/checksums/NetworkOptions-8eec5cb0acec4591e6db3c017f7499426cd8e352.tar.gz/md5 b/deps/checksums/NetworkOptions-8eec5cb0acec4591e6db3c017f7499426cd8e352.tar.gz/md5
new file mode 100644
index 0000000000000..7f391aac1e64d
--- /dev/null
+++ b/deps/checksums/NetworkOptions-8eec5cb0acec4591e6db3c017f7499426cd8e352.tar.gz/md5
@@ -0,0 +1 @@
+d2ccb9b91b0700bfb5ac7c01a03b4322
diff --git a/deps/checksums/NetworkOptions-8eec5cb0acec4591e6db3c017f7499426cd8e352.tar.gz/sha512 b/deps/checksums/NetworkOptions-8eec5cb0acec4591e6db3c017f7499426cd8e352.tar.gz/sha512
new file mode 100644
index 0000000000000..b7db226225d75
--- /dev/null
+++ b/deps/checksums/NetworkOptions-8eec5cb0acec4591e6db3c017f7499426cd8e352.tar.gz/sha512
@@ -0,0 +1 @@
+051223ab45dce692c0bbea0755cc0057bb3322a40659c092007799932a10cc23e80ee6b88fa0d86f28af5e7fe5a455cc9fb8267c76af0cd2b425cf2718629e91
diff --git a/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/md5 b/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/md5
deleted file mode 100644
index 433b89eaedcc2..0000000000000
--- a/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-73b81916dc08382ad34b8110983b61ad
diff --git a/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/sha512 b/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/sha512
deleted file mode 100644
index 01f34b83dc80b..0000000000000
--- a/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a860163ef4fa5c2ff8d8712fff5ad5bb16e7697acc53538a0dda4435b910f4ad94f9c206e0d81864762361fdbcfbc57eb2e1708b33326f4f1c8d95997a495fe3
diff --git a/deps/checksums/Pkg-299a356100f54215388502148979189aff760822.tar.gz/md5 b/deps/checksums/Pkg-299a356100f54215388502148979189aff760822.tar.gz/md5
new file mode 100644
index 0000000000000..3c112b99f88d9
--- /dev/null
+++ b/deps/checksums/Pkg-299a356100f54215388502148979189aff760822.tar.gz/md5
@@ -0,0 +1 @@
+791c9ca37077fdc36b959a17904dd935
diff --git a/deps/checksums/Pkg-299a356100f54215388502148979189aff760822.tar.gz/sha512 b/deps/checksums/Pkg-299a356100f54215388502148979189aff760822.tar.gz/sha512
new file mode 100644
index 0000000000000..c7c212047d2b0
--- /dev/null
+++ b/deps/checksums/Pkg-299a356100f54215388502148979189aff760822.tar.gz/sha512
@@ -0,0 +1 @@
+96520326931685d4300e825a302010f113e942aaa55aa4ff12caf3e9df314309df993c97753ae482c2198db67678423885bf5ea40c743c8e4b6ef96d7b8d4472
diff --git a/deps/checksums/Pkg-48eea8dbd7b651cdc932b909c1b718bb9c3f94f4.tar.gz/md5 b/deps/checksums/Pkg-48eea8dbd7b651cdc932b909c1b718bb9c3f94f4.tar.gz/md5
deleted file mode 100644
index 341ea80c32a7e..0000000000000
--- a/deps/checksums/Pkg-48eea8dbd7b651cdc932b909c1b718bb9c3f94f4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fea1786e8202d07744c17457a114e911
diff --git a/deps/checksums/Pkg-48eea8dbd7b651cdc932b909c1b718bb9c3f94f4.tar.gz/sha512 b/deps/checksums/Pkg-48eea8dbd7b651cdc932b909c1b718bb9c3f94f4.tar.gz/sha512
deleted file mode 100644
index 8f6d09bbb6001..0000000000000
--- a/deps/checksums/Pkg-48eea8dbd7b651cdc932b909c1b718bb9c3f94f4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e79913b68643ec4c448a861f446d0358484b01d16794c9dfe700b1f539d4c01ea259014d37df263b4d7dd3f3c2e1df3966c584b428041dbdf03d4e4794b52a64
diff --git a/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/md5 b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/md5
new file mode 100644
index 0000000000000..7182cc71f7b35
--- /dev/null
+++ b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/md5
@@ -0,0 +1 @@
+2db86c7030acc973d5b46a87f32f7e99
diff --git a/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/sha512 b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/sha512
new file mode 100644
index 0000000000000..a9e18eac9bfaa
--- /dev/null
+++ b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/sha512
@@ -0,0 +1 @@
+0d3f54e7e75b48966e1816608d6ddf62175b92a0c778813a562df20750c6ecef9e4ccc24f9f3fffe4051d4b6765332add8c289fcdc598c320f400cec57a223a3
diff --git a/deps/checksums/SparseArrays-cb602d7b7cf46057ddc87d23cda2bdd168a548ac.tar.gz/md5 b/deps/checksums/SparseArrays-cb602d7b7cf46057ddc87d23cda2bdd168a548ac.tar.gz/md5
deleted file mode 100644
index 5234c20cb4ff7..0000000000000
--- a/deps/checksums/SparseArrays-cb602d7b7cf46057ddc87d23cda2bdd168a548ac.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-47cb7d9dd6f3d8ae3cb497c202ae6411
diff --git a/deps/checksums/SparseArrays-cb602d7b7cf46057ddc87d23cda2bdd168a548ac.tar.gz/sha512 b/deps/checksums/SparseArrays-cb602d7b7cf46057ddc87d23cda2bdd168a548ac.tar.gz/sha512
deleted file mode 100644
index 1aa2d8146d78e..0000000000000
--- a/deps/checksums/SparseArrays-cb602d7b7cf46057ddc87d23cda2bdd168a548ac.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5ff47ea50564375e5e926c3f592a9708b1d9862e4090a53c6b02fc09bc1872578e016a4231564a10dd17be174beed54fd0b8821430828e7148f09556f8034ed9
diff --git a/deps/checksums/StyledStrings-e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2.tar.gz/md5 b/deps/checksums/StyledStrings-e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2.tar.gz/md5
deleted file mode 100644
index 2ab79799cca0e..0000000000000
--- a/deps/checksums/StyledStrings-e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fc3a846400107c432d20da6cfdd19ccf
diff --git a/deps/checksums/StyledStrings-e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2.tar.gz/sha512 b/deps/checksums/StyledStrings-e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2.tar.gz/sha512
deleted file mode 100644
index 70b0ef6f5cb3a..0000000000000
--- a/deps/checksums/StyledStrings-e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-22da8964cc4c09f7c7a3da44be14c953f520ce6d395cf0f9ccf9c17777d6d968b0a874b35c072801ef7a1f4eee40f96ea0e2fc5ed5b3a63ad0b6b776a9c14ebb
diff --git a/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/md5 b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/md5
new file mode 100644
index 0000000000000..0d39747d275ba
--- /dev/null
+++ b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/md5
@@ -0,0 +1 @@
+bf7c157df6084942b794fbe5b768a643
diff --git a/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/sha512 b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/sha512
new file mode 100644
index 0000000000000..d0a8d6cec08cf
--- /dev/null
+++ b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/sha512
@@ -0,0 +1 @@
+ba2f6b91494662208842dec580ea9410d8d6ba4e57315c72e872227f5e2f68cc970fcf5dbd9c8a03920f93b6adabdeaab738fff04f9ca7b5da5cd6b89759e7f6
diff --git a/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5 b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5
new file mode 100644
index 0000000000000..3c7510a592760
--- /dev/null
+++ b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5
@@ -0,0 +1 @@
+1d606dfc60d2af892009213650169129
diff --git a/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512 b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512
new file mode 100644
index 0000000000000..ec49e695cbb3a
--- /dev/null
+++ b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512
@@ -0,0 +1 @@
+6e60d74d00ffc2e1a5a9c13f59b3e3fc4360e641b9f0e3e4797c8b524288e779397bd56a8e57f47d5a06d1e6f359c86917164ec7f6e0ac3d6e876dfa09d2b0c8
diff --git a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5 b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5
deleted file mode 100644
index 921ffb0a2561e..0000000000000
--- a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b3d21b3f38cd106e64fa9d058d095651
diff --git a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512 b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512
deleted file mode 100644
index cbf6ad4952258..0000000000000
--- a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-61bd3555de7a2cec265ae72d58b4635f84ec75b993b9dab2dc5be64375b6057972a2786337f90742ad3b91c57f5008372a3a4f8a5b589e2cf4d5cd1a8056e03c
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index d72a584fd1b0c..edb8cadc74846 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,34 @@
-blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/md5/0478361eac783b99002b1ad985182f05
-blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/sha512/2489ce5770a9861889a2d07e61440ba4f233a92efd4a3544747f83320e0e7a229a8fe01553d99f5f1d98713316f2506daf0adb7d024a46e32b3de1bb2966d637
-libblastrampoline.v5.8.0+1.aarch64-apple-darwin.tar.gz/md5/a28837b9838fef2b3831de3278ec7949
-libblastrampoline.v5.8.0+1.aarch64-apple-darwin.tar.gz/sha512/111ac2fe5f8f8102f2f7c9e9e6aa1d1a12d2db941238c949ff8e64b30335e8b2f6ecce0d5f577879c231eb839c06e259302b709f3d34e94a97047bfa984222f6
-libblastrampoline.v5.8.0+1.aarch64-linux-gnu.tar.gz/md5/9e781a026e03118df81347fb90f10d45
-libblastrampoline.v5.8.0+1.aarch64-linux-gnu.tar.gz/sha512/89469f32a666efd46437351a8fb16758c35e5aecc563d202b480c10ddf9fa5350a5a321076b79b0a1a07ec2cea0b73aa5c28979cc382a198fa96cca0b5899d25
-libblastrampoline.v5.8.0+1.aarch64-linux-musl.tar.gz/md5/b7acda2fdd157bbb183d0dd33643beef
-libblastrampoline.v5.8.0+1.aarch64-linux-musl.tar.gz/sha512/cf4125a47334fe2ec0d5a4b11624b12e1366ec031500218f680ad5a53152b9d752c0c02a0b92d0e07f3eb21f2f8f58d0c587438a4869a72197bbd5e91531369d
-libblastrampoline.v5.8.0+1.armv6l-linux-gnueabihf.tar.gz/md5/eafabd99fb1287d495acb8efb8091fde
-libblastrampoline.v5.8.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/63ff4e6bc400fa8ee713a1c5ae4af0a8e152d49860c6f5e94a17e426ad9f780d41cc0f84d33c75ea5347af1a53f07fc012798d603b6a94ea39f37cfd651a0719
-libblastrampoline.v5.8.0+1.armv6l-linux-musleabihf.tar.gz/md5/9788f74b375ef6b84c16c080f2be5bdd
-libblastrampoline.v5.8.0+1.armv6l-linux-musleabihf.tar.gz/sha512/f00ebf794927404e2294a2fbb759b1e3e57836c7f683525fac0b2ac570da2c75904e43f154cf76fce310a624f9b35fbd40e6c7757882bb6f30db790f4221a543
-libblastrampoline.v5.8.0+1.armv7l-linux-gnueabihf.tar.gz/md5/4492bace63d8274d68ecdaa735e47e99
-libblastrampoline.v5.8.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/8868283e6c5224b80145fdfd17f13f713053ba94e49c170f38f0cbf9f794185d7dec9c107ce65dc76121d3ac5b21d2f3857f619d8279bede86a906230ff59a71
-libblastrampoline.v5.8.0+1.armv7l-linux-musleabihf.tar.gz/md5/d66b6ed1d4e5f6a130f36791063e651d
-libblastrampoline.v5.8.0+1.armv7l-linux-musleabihf.tar.gz/sha512/414ad07574a6e9aa670bbfea13eaea11da13129c9ccb4193cad708014c31493ff10ff427558b90cb16040fa64c8a325c2e375e3310c39fb37bb3e7fdb6a72a5f
-libblastrampoline.v5.8.0+1.i686-linux-gnu.tar.gz/md5/595199a3a01174cfa4d9ce3407bf30dc
-libblastrampoline.v5.8.0+1.i686-linux-gnu.tar.gz/sha512/02c3b0c3c0a411d5090a081f3bbbe38aaae40eaa5fe63d0690e0582e233cd9ce76483922557d4f65dc457e29a4e84d86ee5af20a60b082aec7bec4ca8607c1ca
-libblastrampoline.v5.8.0+1.i686-linux-musl.tar.gz/md5/5832d0044842cb84f4e1e1b0a04b8205
-libblastrampoline.v5.8.0+1.i686-linux-musl.tar.gz/sha512/d28954d0feef6a33fa0bfeb59acb68821222d36a4e353eaf41936ee2c9aace719c2d0f0b0f080eafe2baecc67a29de4cacc0446aac776bbb615c4426d35c9c8f
-libblastrampoline.v5.8.0+1.i686-w64-mingw32.tar.gz/md5/46391ac222980a0ad2c2d6d2b54db26d
-libblastrampoline.v5.8.0+1.i686-w64-mingw32.tar.gz/sha512/6dd3434648a297639ef327efa5827d6aea70df551774e52ba395cdf187bfb603d365eed84780913fda5f3d12512ac54ccf784da3cf6317671ab346211f5984b3
-libblastrampoline.v5.8.0+1.powerpc64le-linux-gnu.tar.gz/md5/5f76f5c6a88c0caaa6419ba212f8cb94
-libblastrampoline.v5.8.0+1.powerpc64le-linux-gnu.tar.gz/sha512/785071e682075b2cebd992394e66169f4ee2db3a8e23affb88dc05d9abf55f49d597b2a7400a13c83ad106ad825b5ee666b01f8625e51aec267132573273991e
-libblastrampoline.v5.8.0+1.x86_64-apple-darwin.tar.gz/md5/21beb51d448bd22e4608a16b3f4fde05
-libblastrampoline.v5.8.0+1.x86_64-apple-darwin.tar.gz/sha512/620ba64d93ef416e483f813617aa313957282d8361f920b5444702fa911ff0051d1f8a8814b5fa0b082fd4dc77d96cb8b763937c786959bbc97cbb6131617152
-libblastrampoline.v5.8.0+1.x86_64-linux-gnu.tar.gz/md5/14c1045ba4d400f490ddea5343a46f04
-libblastrampoline.v5.8.0+1.x86_64-linux-gnu.tar.gz/sha512/0fdae83f4df93b28951521cf426736367f568c1e76fb68eea42b045cc9a288b6836abb3206a6d61e4f88adcf198553e911c45231aecb0f552e06de28eb3bec54
-libblastrampoline.v5.8.0+1.x86_64-linux-musl.tar.gz/md5/59b110676fcb2fcfdcf670a5d435d555
-libblastrampoline.v5.8.0+1.x86_64-linux-musl.tar.gz/sha512/57a5022e9fabc0637a29f3c32f6180cb4f6a90282191232e299df6cea5265b535e4a0af4fde15c8fe80e5a59edea0fae96dd3a510f5720ecd78e85a2a9ffbfe0
-libblastrampoline.v5.8.0+1.x86_64-unknown-freebsd.tar.gz/md5/c22da112cfc7f9fa0f103d08f4b78965
-libblastrampoline.v5.8.0+1.x86_64-unknown-freebsd.tar.gz/sha512/ace02fac0dc6df472456007a081e0aaa85a6b17290321fb214349aac0f2d0f893df602dca28fc26ddfd4ed574fd9063bacff343249e5a1109f5d92dc9cb7a1d3
-libblastrampoline.v5.8.0+1.x86_64-w64-mingw32.tar.gz/md5/34fdc53745245887f968f420b2f02ed9
-libblastrampoline.v5.8.0+1.x86_64-w64-mingw32.tar.gz/sha512/bbf478736b7bd57b340ccd5b6744d526a7a95fc524d30fdf9af6e9d79285641be26fae5f9e5302d71a5be76b05c379e969a829e259d8100ba9c6ce202b632b3d
+blastrampoline-05083d50611b5538df69706f0a952d8e642b0b4b.tar.gz/md5/700b22cb26291736bd1263cd2a7f2d75
+blastrampoline-05083d50611b5538df69706f0a952d8e642b0b4b.tar.gz/sha512/967c16d28834df112916c0904dd4c7231a1c5e4edf279adb26411faa17da28eee4680ce2347b3941520dccbc768944277a8f724b21976960d00f840349b90e36
+libblastrampoline.v5.11.0+0.aarch64-apple-darwin.tar.gz/md5/769458d40e004d6126cae6b34351068f
+libblastrampoline.v5.11.0+0.aarch64-apple-darwin.tar.gz/sha512/75a726b9a4f41b70344ceb9e1f1a7ad370bfa84ce44c70b8a965061d777871e3bf2237ae055da7e6202ddef78932ba8baf2a01a675b1b0cec5338ef16ea2081b
+libblastrampoline.v5.11.0+0.aarch64-linux-gnu.tar.gz/md5/d92cf3f3fa1e977ea3a1a74acc8442d1
+libblastrampoline.v5.11.0+0.aarch64-linux-gnu.tar.gz/sha512/3354f4eec2a410f81cc0546a04ce98ddd416d441c1701a59ec5bebea99af8823b5af10a85cb4e3377548422c6d6a0a870f2e7a05ad0cda52c6143361d59ba4fb
+libblastrampoline.v5.11.0+0.aarch64-linux-musl.tar.gz/md5/41d060c03202b662e47bda5fbf7b1e84
+libblastrampoline.v5.11.0+0.aarch64-linux-musl.tar.gz/sha512/54a05516e12350441c33341fde53bc912aa52dc4b746089c2d21cb75f24f0fb140849a520327db6f52895743eab090b59fa974a2a426a49f8b4e38693340a306
+libblastrampoline.v5.11.0+0.armv6l-linux-gnueabihf.tar.gz/md5/4930dceefac63e7aa5a93e1ba0e00e59
+libblastrampoline.v5.11.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/dafce083c2b409ead61fdbdf4f46b7c93cab00c82a74a181d381c4a93f1e7af035cd6caf407b0199c1f8c2f2f68f93d67938ef092fa4a8d1133f0ea73fb51a9c
+libblastrampoline.v5.11.0+0.armv6l-linux-musleabihf.tar.gz/md5/82346cc4ddeaa29ea7a081edfdfcb08b
+libblastrampoline.v5.11.0+0.armv6l-linux-musleabihf.tar.gz/sha512/72e387bd661096a46077e8c15e12f8a6f18fd6aaf30af0678d00eca0d83af10758874643f5716539dd38269e831e4649d45db739aeb60996bf1b96277cea1d17
+libblastrampoline.v5.11.0+0.armv7l-linux-gnueabihf.tar.gz/md5/7e8f115268e8c62acaa2a53ecd32e2fe
+libblastrampoline.v5.11.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/4210c306ff7ccb53aa6c9f45e134c63b238c563ed753f7536dfc21f6962dfea35d9de62e429e2685b70d0db780ac766b72fd5e76e2d62df74000e3e5d553c30f
+libblastrampoline.v5.11.0+0.armv7l-linux-musleabihf.tar.gz/md5/7f388611c477b528a091f697b0d334d9
+libblastrampoline.v5.11.0+0.armv7l-linux-musleabihf.tar.gz/sha512/e9b017dfa8c19cb940395b253f3b28511a6619469fabff7ab1671ed0936e9e0681d1385c3d1f5d6417ccb65ffbdcf53a0c8519d4ef8e89f9500a05ca00296144
+libblastrampoline.v5.11.0+0.i686-linux-gnu.tar.gz/md5/254948ea87a435251b1e064a77b3d635
+libblastrampoline.v5.11.0+0.i686-linux-gnu.tar.gz/sha512/5a51d3c20c49c497a8f0c2d2e7b38b49ec5e367c7013a7f0efa4fc099639da20ef9c0bfdbdfbdc40b27ce61f189b18f5cf617d7a0ed4bc5300da692f7d6b77a4
+libblastrampoline.v5.11.0+0.i686-linux-musl.tar.gz/md5/a9504870af8db1e247be02c5e188f7a5
+libblastrampoline.v5.11.0+0.i686-linux-musl.tar.gz/sha512/5f0109168a16edb8ca66fcf10c2c10b57fe9c3061c0b08dac4dea936538fa5854aa1b66079f127b5d9902288b61772054013256aa307b682de38e350b1bbb367
+libblastrampoline.v5.11.0+0.i686-w64-mingw32.tar.gz/md5/815822f6bacb42c35b80bc77458c5c49
+libblastrampoline.v5.11.0+0.i686-w64-mingw32.tar.gz/sha512/c82f8c6fe0b7917860e5601c79e35d56297c53b6f7f992841d4f048e7981533e459f9db0805a16d82a9e03d452489760def0d9c57181dcfa5dc363102180eecd
+libblastrampoline.v5.11.0+0.powerpc64le-linux-gnu.tar.gz/md5/ee30c9cb4c51df03026f9e471040e9cc
+libblastrampoline.v5.11.0+0.powerpc64le-linux-gnu.tar.gz/sha512/5055d83a1b0625364ddd97652a4c6fa39c795078123cad33a085283889274f66c9dc053be0591c14be262dc7eef666726afa922c66ae8d05c2791c3d6bd7009e
+libblastrampoline.v5.11.0+0.x86_64-apple-darwin.tar.gz/md5/210cd354c9b4a8aa2a2b55723597e58b
+libblastrampoline.v5.11.0+0.x86_64-apple-darwin.tar.gz/sha512/1ee65d598f9f8a2cf7137135c8c2c431520b1cde319fc33dddfbdae9fe01d986e979a97c24cf85c090cc40064cfe47c376dfeb088ff417d17868c4df84fb2fd4
+libblastrampoline.v5.11.0+0.x86_64-linux-gnu.tar.gz/md5/e2213c42eebee6e45079ef6831077b3f
+libblastrampoline.v5.11.0+0.x86_64-linux-gnu.tar.gz/sha512/ab2c3026d34962a2ca5116d71a4e8eaaca5182d53f21edd3e4be81ce26e74e427c88797308af7fbbf1b9ee615e0383acf0dae1d0eb207ebc64dddaf927f00b48
+libblastrampoline.v5.11.0+0.x86_64-linux-musl.tar.gz/md5/8cde3c618e882ea2b7c8a017a69175c7
+libblastrampoline.v5.11.0+0.x86_64-linux-musl.tar.gz/sha512/8a3aca5691c3936d114c804471b2429b9ae81308f020247765614d2f792f93a012263ce4baa31cf42f4dacc23a7161a4c7f9debfba8d9028879f1ed3fc4e2433
+libblastrampoline.v5.11.0+0.x86_64-unknown-freebsd.tar.gz/md5/b02eb694e1486ef8ffe9534ac2bd5ec6
+libblastrampoline.v5.11.0+0.x86_64-unknown-freebsd.tar.gz/sha512/989273809ae567d7e7193529740423ac1870eae3a0effeecc67f84da914d81649786f393e101f013b7232ef5fe35066d89b3cb776ad0e87394799491ef28a467
+libblastrampoline.v5.11.0+0.x86_64-w64-mingw32.tar.gz/md5/6e7f602ab0bf5a5c28bf4e959a1bbf77
+libblastrampoline.v5.11.0+0.x86_64-w64-mingw32.tar.gz/sha512/556e7ca1a2576c1d7825ac1bc2449ffe2cd40391cf316b10f60681a5c736939c97eb5221c2837640928b5544f89f44cb14ca44ccf54092376390ea1a6012c9e5
diff --git a/deps/checksums/cacert-2023-12-12.pem/md5 b/deps/checksums/cacert-2023-12-12.pem/md5
deleted file mode 100644
index fa4b90b0b2ddd..0000000000000
--- a/deps/checksums/cacert-2023-12-12.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-1e305b4e910d204ab7b4c18ec0cf0f27
diff --git a/deps/checksums/cacert-2023-12-12.pem/sha512 b/deps/checksums/cacert-2023-12-12.pem/sha512
deleted file mode 100644
index 8ce26f24dffd9..0000000000000
--- a/deps/checksums/cacert-2023-12-12.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-dcbfe08d39efdd8de555b31e1050757900d08448c61f8a67c055f14514e5d47e734f6874b5ae628021c35ebc8dabdfdac5808df0b3a073d4f9246cca4b725fe8
diff --git a/deps/checksums/cacert-2024-03-11.pem/md5 b/deps/checksums/cacert-2024-03-11.pem/md5
new file mode 100644
index 0000000000000..618b6c74efdd4
--- /dev/null
+++ b/deps/checksums/cacert-2024-03-11.pem/md5
@@ -0,0 +1 @@
+594084120d27f482b1dc48f558d12d48
diff --git a/deps/checksums/cacert-2024-03-11.pem/sha512 b/deps/checksums/cacert-2024-03-11.pem/sha512
new file mode 100644
index 0000000000000..441b8e84707b0
--- /dev/null
+++ b/deps/checksums/cacert-2024-03-11.pem/sha512
@@ -0,0 +1 @@
+31f03cc19566d007c4cffdad2ada71d99b4734ad7b13bc4f30d73d321f40cbe13b87a801aa61d9788207a851cc1f95a8af8ac732a372d45edb932f204bce3744
diff --git a/deps/checksums/clang b/deps/checksums/clang
index f27067bf6cb85..7dc297db9c05b 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,108 +1,108 @@
-Clang.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/669e6bc49ff1715e259ea4d60dd3f0b5
-Clang.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/e81c11d4d3c5a96a646a535362dc7f18d1be25b11304faf65b002ca68255c57e067f3022d53e3ae19e2319b1267e71da66bbb8afc0be53632c558bf08aa47f52
-Clang.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/md5/22e32a15d23de5a4b5fa01f56c48e8b3
-Clang.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/sha512/4228e5a66f21058d181b67e4b959f84aceb2195e90af0f28908c537e42b72b32cbd3e813e9261476597ba04d76549cedf6b5aa72eddf6cda63a34df00106a8c5
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/2defb4a5181b381201a6d16c6711e89b
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/3386402e8fcfd1c0f6d154cd8b966f308759e13919a38d148a1ad3539e046330982eefaff074f7ab454cd1d396cb92d992dda6589fa73e00fdcb237ec1cfc843
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/f96d1684861a5b78ec1ce22c68659907
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/09327b84e86cf117a0e492d39b531690c700f4dbd3e1469ecd3d269fa5e788c173d62f972d474194999a12a3cf88e3151cc07ac418511207c8d2494c4e493f41
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/dd27b64b1c8a4b66647de4dda29e76bb
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/1fc68f4c2132eca09bb8cf9d51a1d6c223c4c705de181d973dbc5d818222da1a68eb4dc8f944750c0a715ddd73e61e7da6a4aa44c8a682bb549f0bcfe83a4044
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/e343df2dcacf98d7366ea9d3206f8655
-Clang.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/75ad869bd6ff350c37300ed6f0ceb2815023a8132c3025eb6992f33728b3c67556a503a05613a36bb6f3be39d06256bf51ba2b710f68ac2ed18004e2cd4ed133
-Clang.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/71f7ffde41325bf4946fb71429e641c9
-Clang.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/bbeedf07ee304016c23b57b30bb9e3d9de6186805c38ada808c2f742d61b0650a40626333fe452d2491ff758a07e4ff7e438e9bb66b6a93627381ee5a772e2e5
-Clang.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/02a5e1d72a7707c4a8dff484f7b06127
-Clang.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/591265593d59938744b7753ccff22622bcef5dac9779228ec599db0da250e39082a4dcdbf76cab6c4b79e3b9b8dcc458a426d410c70f739678e4abbd02dbcf42
-Clang.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/6e23a222df648d83a871a9e70092be22
-Clang.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/2ab4e861762ebd9c6e269b8cb10b3238cc0535620956ce63da0b34dd2faf4a925622ee879807ca6af6ff808fdd7f8a1d69dcfbdd499da2cb9d35daf4111b5406
-Clang.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/a9a8572068bb9ba3d4fff517e4141158
-Clang.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/addc84d7245b000e13fd128856fe7125c9cd7d66b943867b17623d73f77634638e7a4d5417ddbd441204fe0e7c26c6668d661e7f336d8e83af93f37ea92c7ce9
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/82466ead7526e9f7e676d5281618592b
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/6dc81a93532d7c039e115173de8d6dd28cb1dbc39a41426eb477d3c9172baa5ba0a30e1a5d4db74a9793a32fb19c691daf99794a6339d47b250ab4ea9a042dfb
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/9add4c5f6f4649baecdeb16ce8429c64
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/48276bd860514c27458c2d02ec047c6eabc7364c2b6f429abc081ac78bdd2ad4c9536c501029818676d8d7b1347e900f39ad58a01ba75a1cd8c14ccb78ba995c
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/4a4a38af087d005b713348ac1290fd2f
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/380975fdecfe0a33b71dcadaa652402764f08af0bd5d9d2cffbedc34a9acc7b5a5e2c92169f35bfd7427d9a431002bbcb4381d0662c58c2d46db9e5709d0c8ae
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/602d51818dbe23cc36ddc60083ebcbb0
-Clang.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/4593cc80d9ff046961f7f68c30502429ef7b270556b4153c5e47945e8032100470d74e9c9b62c34dd97fc9c81001615b10c31b171bfef2c67c1cd0574d0d2c95
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/e7aba5ab085a0893c4b46a4a8f0d963b
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/690b7623af2ca8c1be696b80ac42aa99ec006a5c96a2560250425bcc1c41ceec31d1e6396174ef5fa071ea1d6ecc19bfce9f5606cfaba2176ecb0993e2beb901
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/6544aa4614c5f95fa26194b4812a2ab3
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/4b4c5aa1449416d807cbb9546c31bba655f413f6f5cffd26b20ad53d452ada64235342c430aae5ac1faaa52496bcd097d6b7ec6027b9f6dcb6ea5cb6d0c77a0d
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/296309f5d25708e632866c688b5abce6
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/29082d6a4b1c9f0d2ab650fe0fd3e9d2085ad7225ea7c4cb7dc38ee8dfc03189a5c7b060af75022f81cca17aa71e3ff96b67255398d06db77a429b2cd0bc2017
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/de7837fe7342a448586a5a176bd98549
-Clang.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/194df7bbf67fa70bf78baffecfe5438307804743a4093a5bc123cafc8085d61be9f48c5c1bda47d5b9ca65e7614119c3a235c2ddb1d311623ac92135ebcc3f0d
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/71036efacbec706ea6bb4124ac42aa14
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/f4f98648e8708b5053b7086d823651a1fd8df5333a0ea571f9a39f7cc4449ad19abf0556bf756d4e12ab5cbadca771f36bdece6087cf45e4377da9f572e4b4ed
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/52374b994418675552d0004c4c304329
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/51385ef025b1c3d06ed4db078d6458a92d3826b733051e1513c94908e9475b2a27d0a9d5df8ee3f442ac009d1bac4597c0338ed0cb06e2ec5e9d6267ad9c1be3
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/4fbd519b4be6fc20180bfc770b38e1a8
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/7807b20b344186aa0fa54cebcab4371995f8d3a4e6fb1ae43fcceffd7d20be227f43346f6408375ecbfc40244be7d0cca690ba2000a8715deab19b729e2cf28f
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/3017cea85ad3f416ad0ad1131562ca38
-Clang.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/0ad5f18a3d33d849af2055ea833764c2d77d81fc5a33d41987fb4c89ac9eaa647da10d32b3a3c86317e60bfc4af431e855ba0bc2e5ccd0800950ccb77bc0e03e
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/e06e17cd995cb424628d02e0a40b7110
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/e0466a2cbf651684ad51ee9f2a34f10b12c0f114ab795b0b16c8f18d944d956101a913e7762ddca36bfb146c5930d1e8b6a6a8508517e44276fd4e21663118d9
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/3a3357bc382a3e32f98ac557f47ee626
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/bcc34b8ed6f3876c5f11b87f4779cc6106aa54055886c55d2f8d0ba420ddd5e498907b44b2fafcec1efd4cc9b513f26e09163cddefad60a1d07493b6a3c3d0ef
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/2a938d18681235edbfb6ebb1868246f4
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/6df520c1443b1750d5cb397389bde9478510042cdacead6420c08db9e80d69957a93e17c9918a83bf6c3b46d5b7fab71ba7efd52da641619ae3308ae510e9a54
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/304f130084074e3e4ac5d37f1c1bea57
-Clang.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/bf893e5ab3f572e1db081b484ffcddd7dc2f6c15060099a76ef00685885d8f71b9196272c5b973014f2492cd7f94cf1e3c32b9222e97d4b580baae9235539a29
-Clang.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/c1f5add69bea21e906485c0267345084
-Clang.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/72b08d5001e713505a3185dc179bf1d2614e77518ad6439f92e6df69b6b328e48ba449865fdda20f5cd4a10482dd8c07398e28ffcec7ca29216c12170e34825b
-Clang.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/c58327d44901fb4c51bd98639be97dcd
-Clang.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/21d92e946dd96e32000ab114b29db233d41e3731a6ebac9d9c876cc8e7b225be570d3751f74c98a0747121638fc6d6e63c3c3e8734cfd53a3c056dc4aec9f96a
-Clang.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/d8c025b9240ed4c8742ea8381b975d2c
-Clang.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/2631a139162c4acdf990054eef55e68fcd83608ee708a85589a82bb4b4a2e0a0e91dfca6f4b7c2aad9b4fed61fedca9237c6f55cd1cb91a9d9bfa1c279b3cabb
-Clang.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/62f5a0d34e3e39ff0832f5566e77b43e
-Clang.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/911c9e5a545500dac325faddbdbf33983862ebf2d2b55baf07f8003ce045710d15dc386f65c6a1c01a1c3500b831030ea686953b86631d1870ea4c6016784928
-Clang.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/aca138da6dbb96b923c3b444760901be
-Clang.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/21768ce38c29c012b36037ed7d5232b53c388d6fdfee81f13f7b04e4168957b2d8477b821452b8f602455899c779cd5d32e69a213fdaaa25e4e83d1e3c9621fd
-Clang.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/10de75baa15af9f99a0a936a7c1fdcd0
-Clang.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/686db38e54436b2816ca658cdd17b7f8de3c71d4f5bcca629968d5c2a11b91b55269aa506f8ba4a4f059f482804d7c622cdd92dbef445e2377f2cd1c6fe7265a
-Clang.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/c1f92df83f99e7d920da36b470e39a4d
-Clang.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/8f67d8fc779ee738ceba20389bd68560696e94497b0455b3894027bb9773ea0f4933cac6a3c4d10fd7a391d934cff0487722be509f37654736a23c3b4f3d2dde
-Clang.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/660393c8f29f5127bb32f5b1135028db
-Clang.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/a4366e839d1d4fbba30fa6599cc7cdee4d32474abb6ae5fa499e83546543917e9737ff38994bc7a9421d4fa88b8c466a50a49196482c2ddd4ab0b8a3f7556903
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/1689b18599e772b79c2bdd09bbf8f8fa
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/d7a74189addfe6ab7fca0dd7408b8b0476e3befe1cac65150a3558c6cd1cbb4b919c5f079fc892dee46b6776bad8d3251799ebb7d76d7fcaa4ef45830a551adc
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/33c06a9914fea54f71d3a546583e43d1
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/bcfbb536df4b0929a3cba2b6366d7cf0b3362c3eabd7f1e94f774a617f5134a26f3e4c41c44896c7a3e2f6c734b7efd2301de34d9883f6e2d42619d7fb9281bb
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/bd983991a186c350303e1108534ab713
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/7b96d52d66fe1bf4d967022067d5336b016947c120df7c892c03fa5b1b95a5fdbe24f220c13afc76ee25142c15aaa70000c11dab2b7e5db1445b0c73a7d3eaf5
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/1add4ae2faa3bf120b29a7d80b173b2b
-Clang.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/7996347224e4b973a1174e253d022784b5a6e7fd7813d7ed47a0dccc3728bbe0432d94a89ced9e406e2ff5860d4b1646a40ec889f5dc1aec264f13608bb05661
-Clang.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/fd02d184d64e04010a5a31c1a9dd9d43
-Clang.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/56d4be2fa7f0d2f1a6d17014ac4a8a6b302a542464c733c4637b1a3bbfc2bcf42755783445ba0f272d9bbbfb9f755eedb46d3ade8252136d9f3c4f328aac78d8
-Clang.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/md5/8977be77d1e9c9fd7d4c509fd20e96d5
-Clang.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/sha512/852290b61a1489f9251471d9863bdec6bbf179c325df6dd5acfc77937cd3cdecf0c988cc9eb02ce3ac5a5b7a44eb1c72ecb66f8cdd27b4f5b7bda237aa1ab1c2
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/e28a61211a0ff79356ce55126f934f6c
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/4a842bc9758d3ffba3143aabc4f4c91f5a73531565d7ed0a53191996b3e157c0cd0117bf5f9deb28a226f1bbd2a5c5bc880fb58d79673db2661d95baa73463fc
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/31647f7142d211ce00910f806b203193
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/15e10f67a5276aad4ac27fa6134b10b917b4e3dbeb19e615cf2691edc045029ece958a0208e347cab7c50fe0125e7b5cff97a0f73b5f05fc29dd9f4bbd439917
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/143114515a5460f569ab2b2bbb6f38a6
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/4cbb239fae8d8677165a3ecf8b43a5583eb053bd6861fc16db0090574c3e588b07af9b195ab615d02836e28f069341d29540863578f3455c76a109aa81888672
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/bb7c9cedba80462c3414aafe3abe1960
-Clang.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/faaade14199c876784ecdb7a6a60a937591a6cc8ecef58a2cbcd52b7f94a9f282bb3a9ef51e1956081f1898bdfe6a63e3bbf1da677331888e164d25cb270690e
-Clang.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/477eac2f9d86be87ac488141acda2e5b
-Clang.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/205a2bde47551dd09d5eb8bed193f1b64b18d423464c5bf7379553576d88fd33c23b6d43c4ad37587373fe41d7a3f1c3e0921efa7013dce1f314e88c6c3641a0
-Clang.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/e885ef6dc19efc4160eac81e99f9a220
-Clang.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/30f6ab9e0c4b1f1ac8b1429f0e3b8d222b371c4c1198332d11268930cf425d3660cfda0fcddf0d2ba57afb74ebee1c39c0a3e92aaf1fb7564c33c053e58fa872
-Clang.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/d962d03f5a979124cde704add0e2a801
-Clang.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/d677f1abff6c06411327ab78bd5276a6f880c3eb5e3ef32dcb02fbd64efe6751d24fb6bee0a45498a0e38f678a922ac95813da0ba025d5363f7a20c9dec44155
-Clang.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/4ea0a8e61ee8058982a1a4df1b30c001
-Clang.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/ce95b6798901a84116542bf8566f520dccd5aacb4af8f8c7542abec62dc3c1d7136e6aa8922c5c2f2f61103730e7629e507acbff670b6f59ee62aa1f9a3d6e7f
-Clang.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/md5/03304c40a1e4727b9db5052e59ae5884
-Clang.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/sha512/70467c23ab0a25c490803f78de10ae10b0afafcb76b77fd3ec325c4fecfcf03463c2b0a1842a7657f57c668811f4854776328e9af28227587f8268bb79363440
-Clang.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/md5/461da00bc30e1dad361dce9ef79e0e42
-Clang.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/sha512/51bc60c08fb0df8a565c88563642ef4a9ed4f8bb956a18f12ff570b673f69a26a2c8e8775c785a753727586336f276c42ce25385e7a813e227a93d2b532378e1
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/3e6d5b7230676a2c149524cba960706c
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/906d17378d957138de5f54bf0c8e8bda505a299d79c8b34b3db5113d2f9f732535ec464abbd198504f2a73c905f9209491ba30616e99258162c5045eb8e722d1
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/a2c0cc701b90f580700a6d60186db8f0
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/e85533953837ed4caa6c429ce4228fa85e8b667c0212211755ea9be0029149d6e08f19b1d02cf5e69134250f1aa8918630b1623ad1ae2cd0b0e8535ee769c9cb
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/5de809c792f82069cd68693e90ae8521
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/b62502667136444890b85e589e3dd19153d71d23f9e989baa0cf83af09022b7ea042e6da5eff9094e50f0ba8200cbfc7a20ca76b89c251d0b887a0a970b28e65
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/4b4c3064a5e613e2ac2d558a29d34a42
-Clang.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/93739f9b85ae4d345c7522ca8ce8edb1c699d16756bec7fbbe44d5d3429a5c9ed2bd587c4ccf57df8a1758b8e52be009448db86e1de372fea02e3ab2a0221837
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/ce3e582bcf2f92fdaf778339e8c51910
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/4f977e8f0912f52b9b4054089a53a05f60bf7ae352c39b2541e68fecf3c21969d6d1b85e40d71d61040b65f7c60a2c33c8d259734bc1d2ddf77392fc425025cb
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/1eda08774c2f9975de32bdce4ffc72bd
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/c76ec1de9a25f4f8bd309336830cc07e1113b941ced12cb46976b24aebd4ab3d261c943dbc9cdfb34a01f27073af6f598dded31a4e03c62f229cd2e7d5982af6
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/2817b0eeb83eff4e1f580729e02564ab
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/88242559299836c7a7b7d3a216353fc6880a587a839793ed71d6d053318d6e2071ff218587a082f2b5dd9fb2b0952b4c60e62030d707435607303708bb1e6d81
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/d3f92998b7cc35a507cb1071baae8b02
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/be22296623f604927e2e815a1cc149addda6d567270a50b2cdf77fe5b09f74313210a1ca7b1b3194592da23490ba1ccfdab9f520ce7219989e646f12208e418a
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/716300acfdee4415f1afa3b5571b102b
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/b97efb3c461ea7d2736a3a8bb6b6b5c99f02df9a095f11291319c629d44f1fb934b124d38af6be3e5cc7103c6f85793d7f185c607383461de5d0c846560a1d1b
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/034f44b2fc61791234d9580402002fb2
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/0b4ff55afcec0b1e8fbd09fab57de8b44d5ded360d3b53132c7a7df8d3a3b83a495bf6e0c706784e678c6de46be3a72e8bfe562c7f8dfad90b82880849625e35
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/54211070d63a2afac6350d06442cb145
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/a58f8afe9a20f202cf3956f758dc13a10be240d78877a02cd006d7e972751ed65623eef7e92a7256d9ed9157d6e277302f93b58f583d86d386ed4945f3c7d875
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/7084567b3637fe64088fdce357a255de
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/77ae83e159a814a7117cc859a0b2aa7a5d41f983d45b7eb1ce2fd2e93f8733ee067ac8c9fad9d5af90f852b8802043ef39c29b44430b2594892e57b61ccb680b
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/9e294d16a6e1c2c76c03f32cbbbfbe23
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/b8f83542b51f5cf953f6baed185550394744a8466307ee08525bf18a651fcecd7daafb98e75a0866b0e9a95a524e8940be7ae1878ba80d856182dcb7f7d2254e
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/70a41c2ffd55d2d87a7b8728287eb9fd
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/44bb3dea7227ee991b2666c43a88613d5b5d382eb560b5ad1f1184d38680c85a2ef961bac6ad71c2b920702c1ec6e09296198e7ff5e2929f4ba7839e55896e3f
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/95ee1406f8575898eb52e2c86ae18992
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/4da66e4d397491836b3e539258844346fe50bff41e6c0628cbb5c0eac76147bd91d1720cec1523452efdb063adf6ef8792dc278244e1f8e194ef60a180442c56
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/6c4e4e892b54ce81d73a8598728083e3
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/53d08fd8b6782867cfa6ce001b14a2fde38bc9ffc85c7e148aebf59dd9c1c535b54eaea816c39fcff42abc456c1047ed13d688917302bcc5a281abe368bd29bb
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5acc5853111bcd529eeb06ea31b329e5
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/b1794f7cdfba838a7e43de8f66700ae44fd16d8f06300e8ab955044ae9bc96110c5ea72691841cd3787cdc93dfb91c6b257702c20390689a8d1b45a994db2fd8
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/c4de50252e557fb126360001ddae6a97
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/9343a7272c76d5341bb49273ff8d43bed09ad99b2879ec51cfb8946174181b286af82d85e2d3a13a375c7e7859e51e4a4f06031a6a3fe7e540700cfc6a795741
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/af301478b20e56cb7fa1160cda2573a2
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/8822c58df101c239221fead6fb523e677da04a065b42849a2e6ffff03dfd81e07f162a9bbdd29490ad9c0e0a33d362eec46608b9e6e42dfb4889da1c22191c91
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/901d2808599d5ac5ac7b5ca4bc39833d
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/820756cad00b1fe927801a253bd3077709c2b067ae79f9e1812f3cc9e85a0b7ac2ce1534031b7c6f7bda3364b7173c1c508e7c7d316920fb9bb901c16c1b18c7
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/d1f368604084e907c382aaf00efe452c
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/523b25f6b79e222eb65b5f4cd8f23b0d2c8b25b29af0df88efe45546ea57c7dabd88baef454fa0b76342d8d364739107271f25d3504380fdec5c9d225fcc2521
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/e57c116b2ad1cf32307eb4e600ac80be
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/63366b983c7aac9fe1246b25432b2200c8316f569f6930eb12de3c867f448ffccb8756d418f92eae7751d4c9ce6c42cee38237e429b81530819684fd5150c93a
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/645929ce42276db10ab79184a60cd6e3
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/65555ed26d9bd670b8363e5dad949822c2bf0e141a5418e1dc30c3f8a4733dd050620e40be2e7552c2551ecb30d4ef3e8f74cb240f1d441a9720a25f5a3bcaa7
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/8424c6c6318dfa7bebeac33917b29453
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/6cf90c253f6b22358c2389a2347af2febd010117b22de0cc91ad713b8c8224627398004567c96b673650212eb5bd40bb97b9a637d46ddfeb3c72388d83445017
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/ea8151dc1dc32befe579c7f9d7f13898
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/ed518423e9ec35afd7983471cf9ff1e971b840f637f34e0f62a1f6c7379ea59d4dafbeb9a311d39761733ecc98c0318ce3d8883298f8998e9c741441c7c9616b
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/70ed39b13bcb0435fee63bc30ae25a39
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/b2afa383346875514c62129c2991b3604c4fd3d507ecf4fc4244dec81d08b30218f5aa03dc4977185c2c9fb2d08848ddd373e448883ab472e5221ae5bf285c99
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/e6798835128f663f0c837aed4463e34b
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c99856e16bd42ff967479e2c89690ea41268f1d1f868e2628482eafdfa53a0d69ed7c21ecc68ff0859eef07d9fe02f4844fad5f13df26cee6cea3a4254446096
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/92c1bd54b0474244e35c51952966a55b
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/2d7c3b60ba8b11cf903bc5ea720193852027cbe61ea0c8d6fac70be8f97691da3d36663aac6e61b68185dd83b42d09ad61dea973d9390271210d690295e4902c
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/c495d594f8ce1f701d1bab54d0b60521
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/0261bf45403daccf236723383341dc791e9cb3b291bde97812378d85aed785f083d5deea3bf806480a04ef1b972b00dccfd0537e43532a066c64733b817c3d77
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/41541de24d625271bdd5fad867b8eb0c
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/595226ad7ef75ab8ae03adb456b4ee9e884e9554c720b6c4ecbc38c75d446ddba7898be94630673074f09f40c6dc3e18fea9cee5a91b8b0e4727d20a180f670c
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/8bd8ca0436611e78882939067f6277f7
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/27c7b06e93fb0fb516b1b240e0df6c95e8bad6aea04d637ba065c6fafd087bfa94d9136afd39273c8d82d9c467395dcbd7b16f6a4b829acb0c0d4a5677676a5b
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/424bfbd7b69ddf7b1199afaacde3e028
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/9c48d949309aef6ee39371ff39a4f12c31bf3f25ddd288b317b2a17a803db73850cba2886598a1d10c4c154d511a4b79958d1acc012e92491a63f3925c522873
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6b0b3e045ad64ecdc9848898f30d5f34
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6c0f4bdabbbc94fc9e1fedc138b0bce99d383e380ae7222fb70f5935f17701d549f6486956c8a21731061e4bf60bbc52794f6ce6858b4d2adb89bf80f88795c0
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/3b7a461ebf957756aeb2a2455b0a298c
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/74641a3636dd58c69415b19f0cb1de444215e22cfa9f0268fd549b5c53b206811d8beecdeb9692285613468d9a0569e836d225fb8361218438346914f6282839
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/5e7b9ad5fc3af3bfdf262687cd248dfa
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/c54835fdf8e3e442b7c774d445c2f13c5dd8b3224f4ae165e72cc893ee5453d0112a9ca6d543b17f2c02a89471e2cff7cf022dc4c8188a5df25d101dd0f954b9
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/3204bd8074d42920a6707cc8624c0dfe
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/74b26c4556ca18645cc15647d8abdbd46fb94c75169934af885e5773a880c066b2ff221402fdb4a53417b2c97ce589783f7fae6a8d56ee89cc1f70577b02b2a1
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/7922c04964e0c1a5b44e95480290930d
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/4f0d675c1b85dc3e5007a62a7cfea412ca432d1276a259db3ed5a1bf0f33d6c555f16010de717a62e0e065e7c1dbaa66c281815eb9629d2b6c720b152820e582
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/e023eba0ea0a327f53013d5e4d50d0cb
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/9fbdebce9c7375a20d1cd10e39a0c26b131af686cb5771034a6afc6cab08855e0cada2add616c01394424383333950d0dde9c55a9477fa139cf0ca3fc438b229
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/a6c7d64ede931fb19e066a1c191e2f6d
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/1a085a4ea1efb910f2b529f3c0e51be4a5e31debbefd00ceefeddc352b36bea6d0de5a06ea7d509098d16416b536ffed3da8485feefad7a2f11b1bc148a0c8c2
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/692af94ca3e5c3d229cbb459e266aadf
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/b27f05cfb0ada89cefc5a6f6527583b6b43d03525954d5b1ad1c807712efdb8750ea558a230b587a0c0d9e77c54d9f8978cc2f3884653808c7409eab1b32a055
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/3b59b6aa4b18b5dbbc632811f2ffa270
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/f8c4b593f969c723ff1931c4875ed52497d83d74b94121890e10c9fcca5f6bddc5067555dee9949e61e426586ae3e568375fc44f318a07b70571ee34fdf7032c
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bc4be32ad57b13c3dabc80684a176ba7
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/19a8346547b6c6adc2a9156e4b913b20137593752efa3648ad532b08de67cf015bba1eb023204755f48904c3381a3665c6c54fc8233c50e887a22ceebc652303
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/05f37d069c7d59ec245d961d0928cb37
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/3b0956fe770fd9230319bfcaefab4922f9aee3df3e8516edf81cb7d322132ee9ab899af4464c75b1042aa99e3bcb07ede6de5646bba2a57995fc2eb32d4d0861
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/0304434211ff4101a148fcc0c96455d4
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/a033dc589fc95e63547b7ca82964116bec33ad6e78ac131934d4bb16988756d36c24d74761ca93b0e47dada1f3d2a63071cb3721ddb9af457cbeb164fe5f0f54
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/4e5d1064d90f24d57d63f08b61baaab5
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/cbfbe8b6f2be80e59b69d25d6af901ccb4807b12180208b69afa7223dd7d5249255265bc319c9402a1b0d1f0995940e3e72d7ecf1009f60d83021f8d35626a46
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/22fead15b4c45398ca869821d04ce015
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/2ee7a7d3f293f7b63c89bbe3b541722c502a840883804ffe272848f4ac99b7a8ed350ebe92ec434dfdf03d1f4a5531c1367859f4a4603c98325abe5a0ad71177
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/46dd01b10377cc3d45c6a42cac0a07e5
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/957677ce4251938d0c5e066448762b38a21bcce5ed424072ccd58085167d61b7e45a88fe32375f6bbd43dfb579b65a9afc09a886a650fc634a8fb9c81f27c9e3
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bd9a61ea186a39162201341f0739fe84
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/7a06d2a9ef20e88daa00d627d482ebbb6bf7223219d8b2a24aa60ac9eda24649d206b093d5bdb88b65c1e2b0d1ba0ad7dd927697e2bbac65bc9b42f9d14ad0d9
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/60c98c6cc7d4446fb52b7585bc8709f3
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/4d55464b4499a45f774e1000a8b015326d114103a3d348fb263367e5506ca6659444ea6ee2767712903757e83939cd446aff6fe2351438b644f0057053422b58
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/90a512d1881c4af1f1abfd5e90e37356
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/62d6d855aebd49f132d6470c7b0d5a0b965c6489b025046c1ea73fc53336030d6c5b4c867523a9206821f7fcf62fdb37ef0b7ff4b5eb04d07f40b65edd2c8e0f
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/c9eb9acb605d774db9636b82bf2e5f41
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/96e1440b3b0378edf8907d4cf779b1c53d63f6d00fa798efe1b6aaa289135aba8fd00a8d6f55d9678136e9e07d0c189293aec64f46e66788b938e1f8e1fc2199
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/5837070450c81d44395468d8e3671dc7
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/0e8b674c0360f9586f03c7f5d0ffd5bc73dcde1e88eddf7d6360c1461adb8efffb104d8f454116a6a6cdc909973d0876745590b21009a9de56e12ce6e1c2e8fc
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/5c198d35df5cf6435f4f5ac91a78be01
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/9ba0a532f499933320145834aec2b57a70410bf67af649ed675f00aebfd59de7c80e6f5d19e7ad57029a573090e63c5eba4b42b498a374810b48c8668b50dcaa
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/8ac88c856d946e29d1121426de44e6bc
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/94af63ad3fb17d9c07f5256e2d474effc0e3d5ef66f4a9f3ffeb9bdd8f1577c35e4d0aceb8b4746ab857d8f164141790ed494b7f687e644e040d2f3820f9e1fe
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/b4be546ff44019cf46d3250dd9a4321f
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/7ce5e4d68e18021392355359f59931219eeec3be4edd01f7a18b7bee499b589414bcea73820ee38dbc3b5ab12d912a93374b4a616b10ba491f5d41b6b33f3d9e
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/4616c348320d8704215d58c7268de6d7
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/b4c21147ed21d41321e04b092d47f99338c6ac7d50b8328ceb8ae26d6382955cbcd655dddd39f0de3d3c36a5fda7084a33272aad9f6cd9585c87fee68be73a68
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/bf9cf2efb938b68ac7e1560c464f9051
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/ca29438393d393912571a96ce59bdaadcacbb329342c42a0de0e8d8ab52f69d4e6966822c0743d99b1a277c8715c1f72ddd490b781b45bd691df2c137ed42a1d
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/94138893eaaa99f37354317bc13cf7e0
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/1b03d8d4e407372875667f25f74abdaac9be0b81c6229dc1c4c1714589efde6b1f8c76302a2545b103ee4f9812fa78f9e06e5d5bb5bc3903ce579328899faa2f
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
index 2dcfecfb56b26..48843f21c0feb 100644
--- a/deps/checksums/compilersupportlibraries
+++ b/deps/checksums/compilersupportlibraries
@@ -1,92 +1,92 @@
-CompilerSupportLibraries.v1.1.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
-CompilerSupportLibraries.v1.1.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/e084a4374be45ba52279682c640449bc
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/d4aedf5c08e13fd9596476330f69e374af64373f7bca0e4df6cbb4d1710d695dce23f2655ee368c3df2049b7b0ca1848c9e01a437fadc0eb08937c6a7cdf2a27
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/6b3975f25be16ea1370ef0bf353ac752
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/d6298517be1ce350a61d1c1a1bf9b27a541382aa8ccf3e86eeadd7c47e2fe88facd17139a3878adb939df869a330264a942d280e3468d53b61325df7e31daaad
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/7134b79b71059d4da79224df1ca0853e
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/e66590a37e756ff33a84514a7ca2bbe2e1517f3e901bc66e40139e8a318d6cd8e329e0c2a7c557ea39e6db2f56802d485ab81b87be1373717b78474b1c7bf7d7
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/fd37789f5745a17cc9a85902cebf4698
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/9ff4e9be39f115af2e5bb6a5c88b3940f15a010952cebf39da22e7a5c6744be2f905bebccba092db0a89cf82e8c0e1a3e61b74d4204d2a6648b5469f3ccb0d12
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/e9286ae9299c57d5df7b795997b4adf5
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/ea64858481095e0374be330aa2ac84b394bc3e5351b9326137c9cd5d15e6bec47d6e5f672a216572dcb80c3aa6fcb08950cc10157c264f429a93c235028d79a4
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/b19c6cbc5b2a62ea76dea64b0f8ae488
-CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/090d659f7e4a7034117e2bb2dcad0ef544cdca898bf032222cdb81d32af6e6528be842d2cf55839fe397c2ace05dd4ce920cf98cc96324ae18832016516e6cc3
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/26fb41031e1b797373aea7a7c4d7be3c
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8fd0b6c990681789caec528067b4bcb9661f9c0a5e0268927d4e88565fa7005db3b592fb8e7830cf32b3fb4ce54d6db747dfde896f93bd38f65b7a1290a2399a
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/6ed3f3e94f662177c3cf3c3734a5c1ec
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b8165630cced0f7880cb6fd6263cf39bbbbda668eccc94219720078a85a641c3b1b20648960041aa3a51108ab6df087b909c572d0690aacf8b99dc5496ff7db6
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/86060cbbe966a59f18f92a9b2fab95d4
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/0aa0ca0ff3a4d541c7a9599ca1adae7391fdd3fa841f3055ecb8635096d0d95a0763758d7533c887b38a655af55174dfcb63f470147b28a256b75a85c8e47801
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/4acf6f8929fb8ec9fdb8a0f1af06260d
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/3b2e0a5f62bd93434d07848c3045479a1a05bd8589dc976a5680e13805db5adcd9abdcca82edee7b28b4c4a9413ce795784a8a0f0a8fb7346a439322c27c96d9
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/a75a927c3e14bee6dca29b4907def681
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7853fd187f9289a8282d34112b5277bad13abe9dd9b6c796498db2f1a080b2c81faa6119df9ececd09725a019bf99706894765c9c20f618e359adc153c3181a2
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/ba8545cc20e6c602a0526a3b1fc1d2f1
-CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/7a7f3a7761deb068efc00ffc5d4bf4df365cb27674ce73abbe2305b678285161f1526f4facbe27fc11076d99b2079976507f78f5b463bd9057ed008e9d52f9cf
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/26fb41031e1b797373aea7a7c4d7be3c
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8fd0b6c990681789caec528067b4bcb9661f9c0a5e0268927d4e88565fa7005db3b592fb8e7830cf32b3fb4ce54d6db747dfde896f93bd38f65b7a1290a2399a
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/6ed3f3e94f662177c3cf3c3734a5c1ec
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b8165630cced0f7880cb6fd6263cf39bbbbda668eccc94219720078a85a641c3b1b20648960041aa3a51108ab6df087b909c572d0690aacf8b99dc5496ff7db6
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/86060cbbe966a59f18f92a9b2fab95d4
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/0aa0ca0ff3a4d541c7a9599ca1adae7391fdd3fa841f3055ecb8635096d0d95a0763758d7533c887b38a655af55174dfcb63f470147b28a256b75a85c8e47801
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/4acf6f8929fb8ec9fdb8a0f1af06260d
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/3b2e0a5f62bd93434d07848c3045479a1a05bd8589dc976a5680e13805db5adcd9abdcca82edee7b28b4c4a9413ce795784a8a0f0a8fb7346a439322c27c96d9
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/a75a927c3e14bee6dca29b4907def681
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7853fd187f9289a8282d34112b5277bad13abe9dd9b6c796498db2f1a080b2c81faa6119df9ececd09725a019bf99706894765c9c20f618e359adc153c3181a2
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/ba8545cc20e6c602a0526a3b1fc1d2f1
-CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/7a7f3a7761deb068efc00ffc5d4bf4df365cb27674ce73abbe2305b678285161f1526f4facbe27fc11076d99b2079976507f78f5b463bd9057ed008e9d52f9cf
-CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran3.tar.gz/md5/39dc387fd58ef02c461c7906ceb110e3
-CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/1296ac707fdad620c65256686523f2b027c8359f54d1f8354ef5d1ba514992c7269aad26b706575509b5e29d0ad3dec1c7d32fe3bcff0d723d6a4890819eca46
-CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran4.tar.gz/md5/21a76d54d875ef09db2cdce77d328c2e
-CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/9c6bf15338ffbc7113c536e145e53bfaa693007b971f83ee2db820d7d54018bd1cfdbedb6bbce000ee7aaadad1561e91f5ac0e0519bbfccbc3bc57fdfc0eb7e7
-CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran5.tar.gz/md5/f028f2c94f28201701ef6ba4fec9abc9
-CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/c231af1bb0fd4f733278f883837fddf574689bbd7c4dd46cfcd1478d784cbeae1fd785d7cf9f4b0f98cda08819b63a20d5026c6beb892a188fc979b7893697bc
-CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran3.tar.gz/md5/184436dc05207a653f13aae3d82a2e1b
-CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran3.tar.gz/sha512/b6e1f969528a168de087f472eebd23a4daf907aa48f7c5b42c35960b1cae3e6ca8f512982d69b757f39d6dc07b46f74c84e549cb22354a2f55d1265cba7b7013
-CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran4.tar.gz/md5/545bee22cb35d1c4c1381009e72eebca
-CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran4.tar.gz/sha512/78a65b9e7cda79cd648a1ae09daea970eba9d04fd5ea41bc1e37b065cf5c53974f759590292876f57c7f65139be66a6c381aa6756cdda7b36845cfed1bb7fddc
-CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran5.tar.gz/md5/3f1a08601a6a7bbd4ecfa36c8f6abbd9
-CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran5.tar.gz/sha512/0e225e0a7b651f6b3fbccf760d08d66f2d8af1e329d14ef67fd3968a46905e062edcf75f60d7540f0cd7dabcd3ac9130fa0f63e198869bdc6a9aabd391652805
-CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/9cfea65fa6c1b587d9b4b84ee64af166
-CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/b30e24982d4140c312759b8c26d4b72845fc1fa4d7fdf49ccfe9994f7bbf1815ed006a228f6a2185c5b8f9d596d0b04debd1d8392e705c530e5177a22c7c081d
-CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/af99905c4f054fe13842559f7201b3ad
-CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/96513ff22dc16cc259ad392862f1765218474bff24e561f14c1e0d349a6bc433952d9b7b73236b56722fd971e0b864b178d8a9f8d9499de4595bc9857ef17a95
-CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/1be078cd374d3b501b20d9ce679009ee
-CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/63097283c987dc439f02d72a6f70423acd962e4da25acc04185e654c7f16a617e34ad7efabd624fd2e70119e79e4d4806f76286d36d56c353f9e53814e75d3e4
-CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/156ae44ab4172903ad40932ca78a57ed
-CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/e800c20342dd9886c4c3f57e92278d6d41c544adba202ef3f5a6a4f8211fbbd8fab65f169adf7320b7be8a2ea02c0aa1afedbaf0b3f9afbfb691759aaaaccc4c
-CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/cb01c02fdcbd319784034744172e1eb9
-CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0ba635d39958672a0a55069521e20ca6c0f9c81a9f55c360f6043acb415709edb72bfe8d0e83c25cdf9ace8a9e9ba10e39457e234e3905c988eb95e0e0ecff3d
-CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/f9592263c6e72228c492ed2ed216f29e
-CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/cbe29742959906e3fe9a356991ca1f09d4d8cc2a02a9af8624b3e02b4ab59e33bc05082826f7c67c73c6b91cc8e1e5c4a0c275c21c5f8eab8b58ed942cdcb55c
-CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/8f0db2ff4688c3f9e1337a28976d833a
-CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/98502e07499ad9e22147a977b1fe55320e75b6229c3993f1cd1b71e47a09ae6bf78e2341ce978ea72d33b111d09b813a332bfe8f4f6dfb669509c300fcec2561
-CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/980a1b8e6262c4a7b8f86b84f7234043
-CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/c0755d4fbb1b6fd7755d7508d7df929feabe7e5778661397ef0205e21aa3be565b39ccc2a08ed0d958e812c0c759be68ef52de09fe92ebab6da342b309a0810d
-CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/4b3cdb65e6114c77fd1e51da69e41afa
-CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/803cb771875d94eda554bade8197b31aab988ab0c957a2f8853d82d01418be9fee7d9d4b7ef6f5b7fc8d1825ab22083a71d467eb976d5076fc5d73a9a7a30440
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/36638a444b185954bf12169edace1914
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/33f657775258d0da1a57fc03c5e8ed203946944581ebf70af7b0205f9bff7fcd4f2bde5b6fa3b01659c51f106d0e6df5c7533ab8d3372c4895675854688e01dc
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/36ac52a361fd0f4be5c66572345af7a4
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/802bd8089bb2a3b5959a47dbade2199b46c247d0a793cbf6fcbc97b9a1dccd6d8585ac7694ae4bef1dc3ba21796ae5b53f995c8793ccd7316e8fde68ac121f83
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/5911da90a0fc86d665aa86cba12e9d61
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/c966936dfd272d9706aa51ed44abcb8cded899b0caa8b12ee787a0fb1569fa90a1cba89c9a9b83e05c0993facc615feb851399f4799c06956ae3064d172c964d
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/5f42a52e72f0e79530d71733a93811bf
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/51078ef0e447bb181003a50b899b39a9d1ee8ecc92fc293f5a358d836ddf21d03dc44433ae28aa21fdf756c2912b2d3f1e374a5ba108c8c34552fcf32f93fd0b
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/f3bbee1114cb85c266a45f64632c6911
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/102e638f49ff0f62644f15a931c71a16b96f02f4c90d1b8bd378e0d7c54f4e8a150cdb5ffdbc3dcbafb83131bef84f9071cb77e8debdd98d8929c7b65401fc54
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/ff2b0ebdc7ef83cf8b48bd2ae76c6430
-CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/0730ecf1b9476612cadc3f3e7c1b227a1967edc091c88cd0cc19477079d1739fd5e7b1022ff686c0c6a2404edaebfb02c810dcfc1aa4187e7ecddb54998ad96c
-CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/35642304a9a2f435cf5214b2715198fe
-CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/a67f41ba31c99a064f504f508711537f9e90089ca5352bfc2698c3fcd3e499ca716f07ffeac4fb1b88c2c934f7f380f262af8c863d3b16ac7e805d5c805ab358
-CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/01df0fbb265e5ff1a480a7a5e23b0835
-CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/57a79f2b8e846c1514dcb18420f26ae2889962040f410b746836cab4395749155fa9cd9d00d4c25954c0ffa72f9f3823b1b50688a20ddf675301f64e0d4b5c7e
-CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/1f1f6380ce8815cc9cedcea0b40860e7
-CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/a88ea8af8c8df792861812bfdf7f1bcaae31582ab78ce78b47a0dc6fd57b93441c0471f529ce23877131ac9701c6eed72ce89241746e18271f3686fbd718138c
-CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/5eab740e86bfa7656f6a08038fe2fa63
-CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/3dc6b7ec39ff7dcb71478376c86ce34a35a62f049f6203722c5414b7b635ff1b412e02d8d24c13c123d18b2e914780da4639538676694e342a1a6b507691ef25
-CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/9718c79244ed31c367e715f1f563b8cd
-CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/eec2380c4e182f4e923142736a2c4aaf11a525a5f966fed7e4ec4b431ee28f3842a4e73495df116604f74b419e6d398576ee3dd21d3c0c53b92167dcfd0f6b84
-CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/58d7b0b79a22f3aade7e4f39eec898e7
-CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/efecc0ca09ec6b7b8898c2ffd333c7e0a6a44706d72ac0e5010409aba92ee70a88b6fd77434bedafe0e013561f8d0c74b5a274808a6c9499f6a3005a7691785f
+CompilerSupportLibraries.v1.1.1+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
+CompilerSupportLibraries.v1.1.1+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/d641904255ee412c45b089d92c53262b
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/ace0383fe9bd64faeed1fb05a11bbec932bd56b8460d06d2b7c3e1b5f4f6e9a9b3345937088684e5cd1ca9a85ef1a5ff56a97a1f60449cd6e35247de1e123d81
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2a71f320d8b9242ad26aabed74cbf404
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/03e2a4482baaca2d6ce5cc207224d03bd7851486ebe8072c7317f5fcdd641395d945552d9462ab44a9f2e4b0ffaa3874a76f314d67bc0f75393a1151ab518611
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/1beec15ad689a5f572040ca2a7b6a880
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/27bbe212a8d43e841cf8f3e9964b72bc220fea03cf5e65721b02d2f3aa5193acdce41e512578ed6be935b413cd0d2224a6bcd2e9624931f39092ba3cfc5cbcc0
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/9e949c2efe48a7b2a62bff7e1ffdede0
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/2947acb250f8ff4936da5ed02ddbfa492fc38bc87baa588a36bb892ba68b6636a912cda976f8fff00cc7a710c3bfb185826b4cd4a726750ef5f161d5f1aa21a2
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/7202764b1a89a748b07460d9c40a9279
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/63236225a9becdd166c4395ea5081c64f57bc51af89c2edb5abeb419d6eb8224a380a633afd861bb84a12435fd19c8554cbe5ffadf8324ff2c7f17021ed53e69
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/f66c30d3cec8057ae47f05df022ead51
+CompilerSupportLibraries.v1.1.1+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/5329d9469bb0f47560e52b15eb21ab70e0e2da0275bdb2f8e6ed4feb132bc9989a6b44984329455104546c95d05a05f8fb4f1cf232856219ba005100f4b16dc3
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/05ff63780f5b7c8c6c590c3626f32ac0
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8d3c4149531f3782f5efbb6a6fbbb7080ba005298ba962b5bc5f66250ea9fde91b34836ed909c16f306d21d2e358f985360962e9362a8e807ccd4254da3bb19b
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/3ca2b6e8101d831e546c1b6ed2ca9a42
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/21a0b9c5acde96c0a91303f4f395e55f272d5585ad18f0365105188d129a3ca94ad66d4dd99b471abdf41a7a7262a3b258fd04b887110ad15255b284cd1612b0
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/d4d560b8ecce0ff2cb4dbc88cb25942a
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/d405f61525af1b2fe85107a70ed67b8a1eb767923487fa71539e0f49d6e70358c8a24f4ef1c224256cf677af99b54a2f8243f1e207350fcb14d426a7a6bb3915
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/8c6eddaa156fd0afee28ac5a154bc3f7
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/b9fc86bb706ad98d61b63eb4cc8bfce6b2c67b58ba2cebecea7574f44790cce044bb1b4db1d20050b59538fa43b51cb352d752c77333a0f0621fde47c63a3596
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/0a54c16fea86c6dadb39eff65c465528
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/c635c636384d3af5b4b078be7398fbc665a185eae69dd223279affb4836fb5c575d6ab296ae940ccbe73777bdb5e355f4f28a2fa27606ac143ff424641c60c65
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/892dfd91703f0f77d170a5371a1c25d4
+CompilerSupportLibraries.v1.1.1+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/8ac59d00192c0e847168e61b3e93957f3909aab59ba8d05e47686a9f8b7226496f89b932151c42198ec966ccd47721cdf547a247ea4e5c61b22bfccce2ec591c
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/05ff63780f5b7c8c6c590c3626f32ac0
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8d3c4149531f3782f5efbb6a6fbbb7080ba005298ba962b5bc5f66250ea9fde91b34836ed909c16f306d21d2e358f985360962e9362a8e807ccd4254da3bb19b
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/3ca2b6e8101d831e546c1b6ed2ca9a42
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/21a0b9c5acde96c0a91303f4f395e55f272d5585ad18f0365105188d129a3ca94ad66d4dd99b471abdf41a7a7262a3b258fd04b887110ad15255b284cd1612b0
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/d4d560b8ecce0ff2cb4dbc88cb25942a
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/d405f61525af1b2fe85107a70ed67b8a1eb767923487fa71539e0f49d6e70358c8a24f4ef1c224256cf677af99b54a2f8243f1e207350fcb14d426a7a6bb3915
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/8c6eddaa156fd0afee28ac5a154bc3f7
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/b9fc86bb706ad98d61b63eb4cc8bfce6b2c67b58ba2cebecea7574f44790cce044bb1b4db1d20050b59538fa43b51cb352d752c77333a0f0621fde47c63a3596
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/0a54c16fea86c6dadb39eff65c465528
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/c635c636384d3af5b4b078be7398fbc665a185eae69dd223279affb4836fb5c575d6ab296ae940ccbe73777bdb5e355f4f28a2fa27606ac143ff424641c60c65
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/892dfd91703f0f77d170a5371a1c25d4
+CompilerSupportLibraries.v1.1.1+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/8ac59d00192c0e847168e61b3e93957f3909aab59ba8d05e47686a9f8b7226496f89b932151c42198ec966ccd47721cdf547a247ea4e5c61b22bfccce2ec591c
+CompilerSupportLibraries.v1.1.1+0.i686-linux-gnu-libgfortran3.tar.gz/md5/3094705222b6b61fd6a10422a73e1149
+CompilerSupportLibraries.v1.1.1+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/27f874cde357ffa45aaa10f2e620ec0f8ab4e5a8bf4607fc023a2ec42040bcc9a724f959237c340d67451f8621402fa05133c1420086b87135f40326c30b97af
+CompilerSupportLibraries.v1.1.1+0.i686-linux-gnu-libgfortran4.tar.gz/md5/ba0acaff60648efa3915348a8a353df8
+CompilerSupportLibraries.v1.1.1+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/0b6aaf75363cbe6133ca3aed351ab58ef1e441f61375f5baf702d8043813c459d48e8af17630f1a07dc22772ec9b02076af33726ed94e6314ae37d5a139d6dcc
+CompilerSupportLibraries.v1.1.1+0.i686-linux-gnu-libgfortran5.tar.gz/md5/95f1d57cfc43677e40bfc121bce79274
+CompilerSupportLibraries.v1.1.1+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/edacd9960e9de1236c91752e103cddfc018d697e87fabb3cceadf36153b4e97842ef284bd1532290a5620007234882b4c4cd4f36525b61763d97b2f608358262
+CompilerSupportLibraries.v1.1.1+0.i686-linux-musl-libgfortran3.tar.gz/md5/f37fe1818e1634476c44afae478611c8
+CompilerSupportLibraries.v1.1.1+0.i686-linux-musl-libgfortran3.tar.gz/sha512/6e4e3eb5ac9570bfdf5280f59167eb6c4a74f3aa152afb4c5d180b9a6cdbdca557e7dd13f0b5b76943b45a65e848fe77c5b3bbc6ddb0fd846d03fbc9fbedf7ce
+CompilerSupportLibraries.v1.1.1+0.i686-linux-musl-libgfortran4.tar.gz/md5/b4ffd52179aa0006c56f279b87cb7556
+CompilerSupportLibraries.v1.1.1+0.i686-linux-musl-libgfortran4.tar.gz/sha512/a047ac7db204c31802f646351af51c55fe06498e851b2df58d7f93f75d9c0067f8736f247f108991ec01ac7f86f3026ecf58b5f2f3a76d7eab00130754e7f704
+CompilerSupportLibraries.v1.1.1+0.i686-linux-musl-libgfortran5.tar.gz/md5/2d38fc835f236f89f457fdf859ccb903
+CompilerSupportLibraries.v1.1.1+0.i686-linux-musl-libgfortran5.tar.gz/sha512/51fbe41efbce33b1cf3728df6fa59fd0e85a13308b3e868fe9f70f4d67857615f83542ba69be824a73e89959503dd7a11335d1c495704bd7d6cad6656d0c5d57
+CompilerSupportLibraries.v1.1.1+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/9650002f6729c0964d33afcab334d77d
+CompilerSupportLibraries.v1.1.1+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/0b7907811a13d09b7b33203c7e46888308c7d6fcf5d69790babafc39f640541551f784264247f159a552f15df1ddd061c421a93b983d838d3bd7f85ba6427f70
+CompilerSupportLibraries.v1.1.1+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/47e9fb99906b9647e26e4126a913074e
+CompilerSupportLibraries.v1.1.1+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/d7285691fbe1318e48e061d678e54890762cc16996652a34b190924cc1462d24ab0b08729945eb25f4bef60e60d50f3e78db57d4cda0302b8ba579db8a1311e1
+CompilerSupportLibraries.v1.1.1+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/b588b2710f2b83d2c70c6104e585a3bd
+CompilerSupportLibraries.v1.1.1+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/b62a63b0c8750f85fc265db88456307b794e912352a68997c7cce06444391307c03edbe5b901833f53c5bd55f5a1e61a586538b08487cc139a2d71fccdce1d31
+CompilerSupportLibraries.v1.1.1+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/7cce4f3dc057ebebaa677bf6f0d51e9e
+CompilerSupportLibraries.v1.1.1+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/a0dd93905f0ede4da5e2fbacf2579154db8ac8e9963c77fb62284489686f2aa372925b3341742d86430a839267421af55f6e1e413473d17f13a1a199e6a904a0
+CompilerSupportLibraries.v1.1.1+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/06ee6aaeca78b3e9005f53f1fa32731f
+CompilerSupportLibraries.v1.1.1+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/ff0e33ce9f93b3a867cf409b95e763efbc8f4dde65ed19107eb14d29460d084f253e03ebd6375f1da996182b3d96e1fda4abff06507258da9a89ece36663db84
+CompilerSupportLibraries.v1.1.1+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/483251d28076ee959dff131d13d7e53b
+CompilerSupportLibraries.v1.1.1+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/a7c9053a8c1b784cb6459762f26e0c2106a9758cbe2aefe8975a14aaaf61b8a08e51c465e733e44d01537beb59d467c57e536ebd8b27b7b68f46945174c469c7
+CompilerSupportLibraries.v1.1.1+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/a147bf3a6d6550c177b8a784b9b02e21
+CompilerSupportLibraries.v1.1.1+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/c6f7a13f0195eae8f7ad980a4b24de9b155be69c4437522723411f9866a4aee3c5b350ee2f0c95f41f19aba43acaca78309881157e8498df0664c902d0c05a5d
+CompilerSupportLibraries.v1.1.1+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/3f19c9d0e723a8d5591357ac3a9452a0
+CompilerSupportLibraries.v1.1.1+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/5752bac310d80ed2dc1fc3d6580300d185787b9b933e31c8e0f572099abd0727d9483da8f9af858f706e96a183d2b10702c44381a080438cbb17d6459321ccfb
+CompilerSupportLibraries.v1.1.1+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/ad0f0e2fe3e7d147a0a27271a2aba0fc
+CompilerSupportLibraries.v1.1.1+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/f42231adea3d0b6133c3b5bc5fbf765bc6a7ba8ef0f407fa1b8def36dd8a71d20ef39fb6e57b43208489c2795a96562cdbf15f3d20b3f3a09edb29b99d19a33a
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/4c78d56dbbbff682c0a78d11fb9d1e70
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/0e9d6dcc4b8fddaaa94a26a46e915d33fb474f8a8ee14edd4d1c7e774846c44c5c5d852649a4f70409c99ac0e1d458077b7f0eb7dc0b0326ee8b625644d7074d
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/039d37f813b183c75feebadd21011eb6
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/05e7291de1fd2520247402f0db9d348fdd7a02d8dd9133ac65701f88d237110a3cc6c6e2c5717364ab786b6e6063038ec10c9605e77bc4dbe1064a0e77617f5d
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/a985f13a85eb14d1b6339ba4983dc372
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/27468ccd5642e6e11bd5972684518a0fb883bf4835ac18f5279c3fce97b1779131c7d9e39d8de26a15c293c832946334e964919f51d7679cd0569ce82b938579
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/9d86ce2fe481ea97a1fd098bd47d524c
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/a865a4127bacaedd81b6c81279f6a44bc3497ab29a0401f66da1abfc0738ea459be9f158d06969c161a65925739665084bec5f8650a8cd1e8f0d08f1f44d729f
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/86d9db869a7af6c96dea39f5d9d90505
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/01e0c69b04138989200ded92eddae6ff1873d3a440d17273d08bee40d53b2929e35bfd14be051074fe78671cac34ac2dd7360c1571790ee52f94a5921de42a65
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/e72d28df4bcb60ab2f3389046e7c83a8
+CompilerSupportLibraries.v1.1.1+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/cac193a26328ddeff5f7bcc3d7207101c574f9bdb1bff5c2b925315c5c2404a2fdb6591d1968f30931373fbfcae9bda784c72e65580ad3acc398448cd193f65d
+CompilerSupportLibraries.v1.1.1+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/35642304a9a2f435cf5214b2715198fe
+CompilerSupportLibraries.v1.1.1+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/a67f41ba31c99a064f504f508711537f9e90089ca5352bfc2698c3fcd3e499ca716f07ffeac4fb1b88c2c934f7f380f262af8c863d3b16ac7e805d5c805ab358
+CompilerSupportLibraries.v1.1.1+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/01df0fbb265e5ff1a480a7a5e23b0835
+CompilerSupportLibraries.v1.1.1+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/57a79f2b8e846c1514dcb18420f26ae2889962040f410b746836cab4395749155fa9cd9d00d4c25954c0ffa72f9f3823b1b50688a20ddf675301f64e0d4b5c7e
+CompilerSupportLibraries.v1.1.1+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/1f1f6380ce8815cc9cedcea0b40860e7
+CompilerSupportLibraries.v1.1.1+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/a88ea8af8c8df792861812bfdf7f1bcaae31582ab78ce78b47a0dc6fd57b93441c0471f529ce23877131ac9701c6eed72ce89241746e18271f3686fbd718138c
+CompilerSupportLibraries.v1.1.1+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/38fc8c445a1a610db40a7609155e22d6
+CompilerSupportLibraries.v1.1.1+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/085652c7ca583c3623611ca9262b70765c9936c9feb5f9034b2c6b6d6677a7a1d7d201b83d82d0d268f3190bd1a62eab0124e8fae3625407dee7f1df89d4106c
+CompilerSupportLibraries.v1.1.1+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/f3f89eb3c2e441fde6e6b9c1c1a61183
+CompilerSupportLibraries.v1.1.1+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/c53f79e20ad043ab099873f38ece98c6bed22950610ba88b9c178a4bd943039cc426473828d509deb8c65c93309da1de87bdf36fb3954b8f8047277c418fe2e0
+CompilerSupportLibraries.v1.1.1+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/024f7133425db23e215dc55589bb9171
+CompilerSupportLibraries.v1.1.1+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/819945496ea48dd44d8c0f12a11a358b7d1ebf198d60fbad576d74ddee68cdea98070cdd11ca96567d0c772ec007c03cbc83ff5c7d2ad737cbd486fe0c9afcd5
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
index 78b82572f3898..629a5f0601fcd 100644
--- a/deps/checksums/libgit2
+++ b/deps/checksums/libgit2
@@ -1,34 +1,34 @@
-LibGit2.v1.7.2+0.aarch64-apple-darwin.tar.gz/md5/069fa8cbd69e98a6196bd24bb427ff47
-LibGit2.v1.7.2+0.aarch64-apple-darwin.tar.gz/sha512/bacaf03e8aa28a55af2ae031f09a22f0a9cecb20257ee9778f37d87eb4ed21ff25dbb33ac342b134edc2a9584c0c57e6eabf7dbe78b031e9e45c2448bd8a317c
-LibGit2.v1.7.2+0.aarch64-linux-gnu.tar.gz/md5/3bc5dfb163045c335aec05097702ddd4
-LibGit2.v1.7.2+0.aarch64-linux-gnu.tar.gz/sha512/c8055e51713f6fe6f064c7e4626db7a70d07af6e5c8cb031e88ef8ea728accdb92a449f738fb248c0afbc9698d30d0670fa5cf640df564886887c7ce4dbc7124
-LibGit2.v1.7.2+0.aarch64-linux-musl.tar.gz/md5/23440824a3ec0a0d82fa8adccac63534
-LibGit2.v1.7.2+0.aarch64-linux-musl.tar.gz/sha512/040874d092d7a11c5bfa284e95b86335792ffa6b868e8c7165ca304647d82ffbc7a72b65bb92038abd1bfa545e831d5b2a658b3f5891377735f85d3e4ddff7b2
-LibGit2.v1.7.2+0.armv6l-linux-gnueabihf.tar.gz/md5/3ffc0954875a20f610f6852c522311f2
-LibGit2.v1.7.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/35767e10a36a0025539a354ef5be2ec4031457c0c1121a07f4616c774e48bb85425732935a12718d23627b4bdf1fb5c7fe6b220cb4e2bcb94b9d26c743af0004
-LibGit2.v1.7.2+0.armv6l-linux-musleabihf.tar.gz/md5/73acea595fc31967d7fe1e808f4cc47c
-LibGit2.v1.7.2+0.armv6l-linux-musleabihf.tar.gz/sha512/e124681a7b7b1ff64a1ca073ee6fcc75fc2b11ab0a3404b0a2bc9944854c4048e576697254cd8efed1d6c7103ac2e1bd574a75c6c3d8a383775d48170ef6d474
-LibGit2.v1.7.2+0.armv7l-linux-gnueabihf.tar.gz/md5/b710035b0be0c7e4810b9282912369a0
-LibGit2.v1.7.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/e34b2956a869303a5b6bed059f326989e4e95f039fa3d1b75fc6f80c6d1170df96e2ea809df05aad59685751eb4666a0a05e0c3b456bcace2b0570b174b2b541
-LibGit2.v1.7.2+0.armv7l-linux-musleabihf.tar.gz/md5/aa4c7374dda73266cd387596fa31c258
-LibGit2.v1.7.2+0.armv7l-linux-musleabihf.tar.gz/sha512/54f6c9bd7dd1ee0438b817b1b18a94ce6f18838613524f146cf37f7d01319dd7c05d19d352376454ed2658e9dd839fede32db05748d9c352b56c3f0fe01edebc
-LibGit2.v1.7.2+0.i686-linux-gnu.tar.gz/md5/2cf407e8cf5f7c7c984b48704935afaa
-LibGit2.v1.7.2+0.i686-linux-gnu.tar.gz/sha512/44224ccf773b9f77979c1130cbb893d80c934918940ab9357bff7d92b97bdf1671d4263d121a603a6c12e60375bafc38b4bcac9a12cb0abb4665b5848d12946a
-LibGit2.v1.7.2+0.i686-linux-musl.tar.gz/md5/742b947992f9f003a49c480165df1c87
-LibGit2.v1.7.2+0.i686-linux-musl.tar.gz/sha512/4d1a0c5c119f391be76d93ee7e82c6e38c8cda5c172c3d431bed6c6923776d4ad194c88509203332bed23f4c42581255a319a2ca45b6433b3fb68b3dc0aa1f02
-LibGit2.v1.7.2+0.i686-w64-mingw32.tar.gz/md5/476c290758970376ddfdcad25ba74c7c
-LibGit2.v1.7.2+0.i686-w64-mingw32.tar.gz/sha512/dc76e0563b45aead1187fd7abfe411e3fbc21ff45983b693c11db4265d39fc3f51d167fe447f6a7efb1f3ec1e35a9d0edb1c3444ed50be62770dfea545fbddae
-LibGit2.v1.7.2+0.powerpc64le-linux-gnu.tar.gz/md5/3ce536ea1ad7facca14c19c84106e8e5
-LibGit2.v1.7.2+0.powerpc64le-linux-gnu.tar.gz/sha512/d52d400249c7ed16084b13fde81f784cbf3b13b7081944872986b216b7faf186b7c8ff28626504d461c6059c7b4962acde0b15dc2e36007492210568a0f425f7
-LibGit2.v1.7.2+0.x86_64-apple-darwin.tar.gz/md5/c59af177b7ebdcfd52312a40d7832ee8
-LibGit2.v1.7.2+0.x86_64-apple-darwin.tar.gz/sha512/5bae0efc71421f28e4a3b2aca70ef1810b5810ed3df3d3577b8a7e956ea895f4b9efdabf17a32130e1f71fede4ceae7246d099b71147686e5722abdf56d50778
-LibGit2.v1.7.2+0.x86_64-linux-gnu.tar.gz/md5/1a55eb14294730b0ba7a23df3aac27ee
-LibGit2.v1.7.2+0.x86_64-linux-gnu.tar.gz/sha512/21f7aea927bed22da1b974eab4d6f3413cc41e47b5a5e5bbfff6690474d6f374952b81549345a68c9227d1f3ef9e76578c81776cd1c41e75330c9f2dd1437c7b
-LibGit2.v1.7.2+0.x86_64-linux-musl.tar.gz/md5/33a9bfb5a805ca6ca695196c2e94183e
-LibGit2.v1.7.2+0.x86_64-linux-musl.tar.gz/sha512/924c2c2cd9baad369b21c17364164dbff7ecda153cb1ed89a4a7539407f3bdc54e87304ed2b7e26ba8046077cb0b3645e9263d53eb4b220436a2e150a77109c3
-LibGit2.v1.7.2+0.x86_64-unknown-freebsd.tar.gz/md5/8d27555b9a1bc086c4c3056cda876bd2
-LibGit2.v1.7.2+0.x86_64-unknown-freebsd.tar.gz/sha512/8ca7a589a328908ee10490230c963ae1292f9cb7c2c427c26ca5d6e4985d56ef76d02f98e05e4fc655cc5e92e2e405d63896a180bc33ebd4b852d2ff355c0339
-LibGit2.v1.7.2+0.x86_64-w64-mingw32.tar.gz/md5/3d7ac5a2af376a2ba0b335cb84710874
-LibGit2.v1.7.2+0.x86_64-w64-mingw32.tar.gz/sha512/5bdbb1110fa7c5bfaac0ba63002f9b1f480abab1cf3e284146352c30027381c23919745a0c490e202e8d856503f5c2bb2390a6bc32af533def7d31f8d6f7be31
-libgit2-a418d9d4ab87bae16b87d8f37143a4687ae0e4b2.tar.gz/md5/c67e5a2358f01d26b78269639ba20148
-libgit2-a418d9d4ab87bae16b87d8f37143a4687ae0e4b2.tar.gz/sha512/c82dc18bacb45f82c72f09e5525fd47191c8673a553df9faa05290c33dfdadfff9784341e68790130a14b12267c795f4ca166481fdf9674d2b70e8228bbeb11d
+LibGit2.v1.8.0+0.aarch64-apple-darwin.tar.gz/md5/c19f3a4f6567b7504f607fc6f328312f
+LibGit2.v1.8.0+0.aarch64-apple-darwin.tar.gz/sha512/0a776ab3eb4593abe0f2198a7371cbcf653ac5cf71ab7af9d5520c2bbbbbc981cf07ba3afa70f1ef6ea56f81e2d4b33b1be1482f9e215e61178b3dd1149ecb80
+LibGit2.v1.8.0+0.aarch64-linux-gnu.tar.gz/md5/8137d530bea16d41a614983068c1909d
+LibGit2.v1.8.0+0.aarch64-linux-gnu.tar.gz/sha512/bdcb6249acd8df887a8af7c084d409132694a39f5e9f90bd70bba0f3eba2bad3eab6958cce9f060b2a4392d99352ccda8be92000f24ed4498c85ba55e0cbf13f
+LibGit2.v1.8.0+0.aarch64-linux-musl.tar.gz/md5/4b9508ea58d4b1bd99f8471bd7c9a839
+LibGit2.v1.8.0+0.aarch64-linux-musl.tar.gz/sha512/e0996627a3d3ab9b3b1d103bbdd3e1179ede5479816f6b1be54471f120f76fe0495d3c7587c382985173c0614b634903b58c67ac3badbead82b4d797cc5915d7
+LibGit2.v1.8.0+0.armv6l-linux-gnueabihf.tar.gz/md5/02d6fae1745562cf724190929383688e
+LibGit2.v1.8.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/11c14c5f395252143218a495c7dd2817c8f18f73320200a521f5ccd5f0c3c87403dee2c3b9e8166022fde1a67e83cbb83e6f222aac38b41efa43a6c0254548a9
+LibGit2.v1.8.0+0.armv6l-linux-musleabihf.tar.gz/md5/afa7b90751565b865f443b5a0a870d8b
+LibGit2.v1.8.0+0.armv6l-linux-musleabihf.tar.gz/sha512/3594c223883a7da3bc0c78ae656fb15e47cc9dd196cf08f0abc0a1fb5f799842261e125440c07e92ba82896ad7427814bb43b63ba64d9b72ba38e9149132c26b
+LibGit2.v1.8.0+0.armv7l-linux-gnueabihf.tar.gz/md5/ead27583a1cc5748c84d58a07fa6fc7e
+LibGit2.v1.8.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/c81c3582fda7b299aaed72de07e9d0bd06c0c231aed73bb980c12c89b2b7593b2fb7990421bc2e45173f3d107ab50660842289675efa31f24ff752f0ebc63875
+LibGit2.v1.8.0+0.armv7l-linux-musleabihf.tar.gz/md5/71e38f112e8629682cc383c510f1f13f
+LibGit2.v1.8.0+0.armv7l-linux-musleabihf.tar.gz/sha512/0b4b2677bdfc4f8e2a5ef3b9adf8fa2c0c1e76bd240b2173c268835b59cda29cbffc4241319fe36dcd63d1507ecf0e0e843f48fca80e3fbe4d3df53601ad7dec
+LibGit2.v1.8.0+0.i686-linux-gnu.tar.gz/md5/b54cdd02201c1481746ab81db6f39aac
+LibGit2.v1.8.0+0.i686-linux-gnu.tar.gz/sha512/0496a607b0d6841fc0c87477274b61eb95afa24d48d2624f8aaf230028a24d0248336902b01a326010fdbc45d8d73eecedb14c82313e1b1a94b3b6a4296e2607
+LibGit2.v1.8.0+0.i686-linux-musl.tar.gz/md5/b8946f9c1b83a7c22b2a8ca6da05b23b
+LibGit2.v1.8.0+0.i686-linux-musl.tar.gz/sha512/931ff27da4e35749a5c020af72b750dce0b1d672cd98c04ad5a340f4f33ddb61c4df6711c36c6601063262fb733d45a3bb24eb1141e9d2fd2e0ab7b9bfbf54c8
+LibGit2.v1.8.0+0.i686-w64-mingw32.tar.gz/md5/cb9db0b590efe0c60f6e2f9558de4e5b
+LibGit2.v1.8.0+0.i686-w64-mingw32.tar.gz/sha512/b9f637ec10cd751dfb2156e154a68809af400cdecbf0d25910792937c63ea56e60b703c7b78e3038ab34b1100bb20df79dce6346f40f1d24e5188fefe9517ccc
+LibGit2.v1.8.0+0.powerpc64le-linux-gnu.tar.gz/md5/4ac1fa6b1ca43d3f6e97d590d1d27127
+LibGit2.v1.8.0+0.powerpc64le-linux-gnu.tar.gz/sha512/5cc3b9b9d85068bb3c6711c63ccb2672be765888a114147f70cae0eebf96f5bde00a40f163202c20e18a4cf4af2488fd1c304060daa3dd35748b30ab5a1fdb1d
+LibGit2.v1.8.0+0.x86_64-apple-darwin.tar.gz/md5/26c2d1dcf68bc7b9919dfd24eb2fabc7
+LibGit2.v1.8.0+0.x86_64-apple-darwin.tar.gz/sha512/9c3ba94c438682f321cb2130f71028587a4a21b960f94f8c3f633dbe007210ff1b7b5e0b0bc4972e818458843a47a9e8d50d88f1bd3fb03a8fe129fa66332a38
+LibGit2.v1.8.0+0.x86_64-linux-gnu.tar.gz/md5/8d48ad388cca20d6338095cf8a36c3c9
+LibGit2.v1.8.0+0.x86_64-linux-gnu.tar.gz/sha512/9ddf4dc7420a9129ff8c34eb07ee94d9213c1a1c22700521199032a773353ab2413fd70d002b273a06dd951558128cd5577b7e917de6575d379911831d737bed
+LibGit2.v1.8.0+0.x86_64-linux-musl.tar.gz/md5/5d9fd50bdf38ec99311e9627f254b95d
+LibGit2.v1.8.0+0.x86_64-linux-musl.tar.gz/sha512/eae36cc70cb414e15924c22c9b03a98949b89fd8ca429fbe23fa215253266ed4cd11530adb2feccc9a19299ebf2048f1f5929c1faffba463b593b6c3e1808bee
+LibGit2.v1.8.0+0.x86_64-unknown-freebsd.tar.gz/md5/af9e9d4bbe0df291f07f41416809a6f2
+LibGit2.v1.8.0+0.x86_64-unknown-freebsd.tar.gz/sha512/859786f13ba1fb5cd448dd5f22ebdf3381f5290d4875d65840fb31567ccd012f283a1849a82f2b2f58c3d73eda4c748d3da07d84a99665e0f50aeb39c37a4fb2
+LibGit2.v1.8.0+0.x86_64-w64-mingw32.tar.gz/md5/b51e3e238d776d52e396dd749f895e4f
+LibGit2.v1.8.0+0.x86_64-w64-mingw32.tar.gz/sha512/2581d4b1d6fd4d0f15b406f050bd8a2a41e13dc2a1699a9b956e56426778beb994e6552988bf50ddad682349209a8694eace9450dab0570434cdfbed9c9f0b24
+libgit2-d74d491481831ddcd23575d376e56d2197e95910.tar.gz/md5/2420def04a290dd7ad0e67f93789f106
+libgit2-d74d491481831ddcd23575d376e56d2197e95910.tar.gz/sha512/496c6640d2453c58b66bc53c6e4e9e84f48d89eb8f240aefc1eea7e2d19b6c601614764590f2d2ca9e51e7e1de8fcdd5bf67f27f32bab628eae40d26ed382048
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index 7ac93952b8b34..6887c3fe62f41 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,34 @@
-LibUV.v2.0.1+15.aarch64-apple-darwin.tar.gz/md5/f12ee08b8d5721bd88b651ac573c59ed
-LibUV.v2.0.1+15.aarch64-apple-darwin.tar.gz/sha512/b3c53e04624d9c60e664db2eaaf3add696f95c52e55db0c577547f7bf7e045ce30cffa0e38c3451f483dcdb7ddcac1b7f43b694dfadb37282773ce416af1030a
-LibUV.v2.0.1+15.aarch64-linux-gnu.tar.gz/md5/8cb9f4d6c6a4013fa9ed9ae120058275
-LibUV.v2.0.1+15.aarch64-linux-gnu.tar.gz/sha512/d4213ee47ebdcab4cc4719478608ffa461a5543a16656c2881587364c8ed1b39a2f418d5c3c6a04d7e6dd3df349f0afa72cd1df0e06db6146512356b37637024
-LibUV.v2.0.1+15.aarch64-linux-musl.tar.gz/md5/e23a0d7162fc3f09f0ea92d24cba3f4d
-LibUV.v2.0.1+15.aarch64-linux-musl.tar.gz/sha512/f4a13eee72575092ab673777a53d43ce999a7a8bf847daa0a86b39bbfd3f6c1575f105a0bb629766268921ff94f10d354a0e53d32623f6ad35fca44e7bac25f8
-LibUV.v2.0.1+15.armv6l-linux-gnueabihf.tar.gz/md5/70ed0616cd3737a357b7571ab648d239
-LibUV.v2.0.1+15.armv6l-linux-gnueabihf.tar.gz/sha512/25a4b8a90b7bb62e31c1c2bb1d7e137050de26f311daa9f77be932ab3d9a2b92c82ce82ed37cc74a2b1cd5a60bd0dc412e92d31013d36ad35af2d4f376fbc0b0
-LibUV.v2.0.1+15.armv6l-linux-musleabihf.tar.gz/md5/e4320c51c8904a59c105f7b63d086c79
-LibUV.v2.0.1+15.armv6l-linux-musleabihf.tar.gz/sha512/bbc39d800277a66298c56849c1131a6f1958a2c8b2465b59527965a1bd3f36a4c87c1d7ad4be59ec0f4ca8f94ec1804a619eb963d956b77e645e0e00effc6613
-LibUV.v2.0.1+15.armv7l-linux-gnueabihf.tar.gz/md5/d5436552dc177fc3fd0cf4f575cc03b4
-LibUV.v2.0.1+15.armv7l-linux-gnueabihf.tar.gz/sha512/8d3722290151ecf65f4e3fe3440828492a7b115810b87c8711c5592f4dea69727e516228a5a1bdac430c74b288f12de390daf0a9408274aa27cdd69efc81cfee
-LibUV.v2.0.1+15.armv7l-linux-musleabihf.tar.gz/md5/67e1ba03ddf2d2bdd79fef25a5976350
-LibUV.v2.0.1+15.armv7l-linux-musleabihf.tar.gz/sha512/535565657b5714da0db5c622b65b261a77f21f407b1166eee99f35ba3bb9c6fab4c7d1f3db70528767cef7c66c856c5101b01423be05efb9219ec4c76df0be6a
-LibUV.v2.0.1+15.i686-linux-gnu.tar.gz/md5/ba1432ecd75e6a43ff197d51ba76446c
-LibUV.v2.0.1+15.i686-linux-gnu.tar.gz/sha512/dc8940284d4ba05098037fcf4b3bada199878794ef34ad09d8992ef58e4c829b00a3fcca29c4f65b49edcab4274092232ff5d7619b3535333125043f89503cfc
-LibUV.v2.0.1+15.i686-linux-musl.tar.gz/md5/237080cf5d1286a3d2f35d134a285d28
-LibUV.v2.0.1+15.i686-linux-musl.tar.gz/sha512/834f16a16cf6b765dfd094037c8a45ccdedc2eb9e45f527c6721eb0f60f4e56dc4f9b90a2a35e3ed10da4a937d8555e97034e0a29d733cac5352061ebe286799
-LibUV.v2.0.1+15.i686-w64-mingw32.tar.gz/md5/8a5261a6ce4200d272b5eda4fe9ecb31
-LibUV.v2.0.1+15.i686-w64-mingw32.tar.gz/sha512/b372b9368a9ac92775a8d6ee39b492905dc5f58f16d7edf792e0c2ffbd4507ede79139a24fe852dfa26068d0f076a4abf3c3b868a8a057b23fdbad80999d315f
-LibUV.v2.0.1+15.powerpc64le-linux-gnu.tar.gz/md5/53db7b763af204477ce8f0bfae0ce15c
-LibUV.v2.0.1+15.powerpc64le-linux-gnu.tar.gz/sha512/bbfe2518d4dc84fe73b7d3f3bcaf5aeef0db0d284c0e205e3e3670b8ce6f3ea593a0e52d125fd9a79cd098dba0d35f151cb46469fa25dfeef62ebcc966f94d4f
-LibUV.v2.0.1+15.x86_64-apple-darwin.tar.gz/md5/b4699192c098564edc5589e90c47324e
-LibUV.v2.0.1+15.x86_64-apple-darwin.tar.gz/sha512/2505e4060712143d3213747d4276c0b99dec78579d2ab0b71c5d7edf15ae80e0178138a11470c976d97de14e44b153dcb00c08c864523d7ee0ead8e48cf15d14
-LibUV.v2.0.1+15.x86_64-linux-gnu.tar.gz/md5/f5515c50106a0748b9a1986c912a00f3
-LibUV.v2.0.1+15.x86_64-linux-gnu.tar.gz/sha512/5822396039e14a5d919f2612559a8d62a925c4319e66eb05ed20faed754318762f5653851684701cd4ddb1edf10bfe25d252c3d9cd84228e498e825ba61146b1
-LibUV.v2.0.1+15.x86_64-linux-musl.tar.gz/md5/1962e6f21080e874d2ca0275c5b569b2
-LibUV.v2.0.1+15.x86_64-linux-musl.tar.gz/sha512/4702e13633d77a6c0aeb1e7cd252471a779177e20c68485130f420cc4878c8f7de7f0f51730008ba9dc0f275875a5947629d39aff200691e26f31d42e388d53d
-LibUV.v2.0.1+15.x86_64-unknown-freebsd.tar.gz/md5/928a789c3cd3b4cefcc1cf13f5f179ac
-LibUV.v2.0.1+15.x86_64-unknown-freebsd.tar.gz/sha512/ee58667b19fdf7ec0218b4645b3f5bed2e9fc01cc29eab8473ee02aaa044f94a48eb220c6427200aaf56eacf52d0f72156712d17017cfecbc25d3c1b71a4bd2f
-LibUV.v2.0.1+15.x86_64-w64-mingw32.tar.gz/md5/d3b84729ee9d5a6e6a1e29357e25cced
-LibUV.v2.0.1+15.x86_64-w64-mingw32.tar.gz/sha512/43ca5999d8e8ff820722b1a548bd4e277e95c739c3363ed6159d080dd06842316f5370e8e4f4a886bf76010f4aed2bbf01f6091845b0e84b75f709277c561649
-libuv-afa1c67fa496eb49ade1e520f76fd018a1409eaa.tar.gz/md5/3863ccd5a51f85cf76679070c99be6cd
-libuv-afa1c67fa496eb49ade1e520f76fd018a1409eaa.tar.gz/sha512/bf4c705e05e730139b62de799331653eea2767898d654b8890ca2623db3adb9a1efcfcfab38af22e8ac44c67a9c5c018f1f7847a3703bee5f05657169a67d817
+LibUV.v2.0.1+18.aarch64-apple-darwin.tar.gz/md5/f176c76e5e2096dea8443302cf9550b8
+LibUV.v2.0.1+18.aarch64-apple-darwin.tar.gz/sha512/4301b13953a08a758b86e30af3261fd9a291ce3829b4d98e71e2a2c040e322e284c5a6eb4bc7189cc352f4b1cf7041e2cfd3380d511d88c151df3101ad74594e
+LibUV.v2.0.1+18.aarch64-linux-gnu.tar.gz/md5/c81515783363702a1bd4b65fd6d7f36b
+LibUV.v2.0.1+18.aarch64-linux-gnu.tar.gz/sha512/011429365337f5a45e56ca7a42709866bb994c747a1170d870f5f3ddfff2d36138866ee9278ac01172bc71bde8dee404bcb9cae9c7b44222bf1cc883659df269
+LibUV.v2.0.1+18.aarch64-linux-musl.tar.gz/md5/e74d5ea4912dd326b2705638faa7b805
+LibUV.v2.0.1+18.aarch64-linux-musl.tar.gz/sha512/a26a9f2c9051816230324071c502321f7af3885d581a400615858a93a4cd457226048d15b0e7f6a73d12659763c705b5ab519e9f5b35c6d886b9fd5babbfe352
+LibUV.v2.0.1+18.armv6l-linux-gnueabihf.tar.gz/md5/6df38bcf5d0a61dee63d16b73d0c9a24
+LibUV.v2.0.1+18.armv6l-linux-gnueabihf.tar.gz/sha512/d5354a6532061de0a58965ce0e427bde52f9ae0ee39a98e1a33de4c414fddcba9ba139ddf91be7321a4ccc97bbf7a8a8357ff10cf60f83c0a6bff7d839d6d7a8
+LibUV.v2.0.1+18.armv6l-linux-musleabihf.tar.gz/md5/6f02a24cfbfae3032fadceaea1faed39
+LibUV.v2.0.1+18.armv6l-linux-musleabihf.tar.gz/sha512/7fd107eb9a5ea84b488ea02e4fbedc9fe13bb11be859986a47af38f40ad775dd9f738c790878a3503437bcac1eb26ad9fe26f4aa0d3cb45c980b4c5abc9aec99
+LibUV.v2.0.1+18.armv7l-linux-gnueabihf.tar.gz/md5/96b09dec72f7e9b7409fa2920e67c866
+LibUV.v2.0.1+18.armv7l-linux-gnueabihf.tar.gz/sha512/6a0f79fc15c944fabba5c65180b665bc9769c6ff25863e330049f48b3a4394b448492f5a9a76bb7f8dbd3ce44dfc6f9ccdc2c71c42e1c749e88070fe99b1db69
+LibUV.v2.0.1+18.armv7l-linux-musleabihf.tar.gz/md5/f44e4b2521a813181f943895bdb0dd3c
+LibUV.v2.0.1+18.armv7l-linux-musleabihf.tar.gz/sha512/cda1413dca817f772e8b343db0c6de0ef6b8f269e9a6a2ef3403c2582aeab554f46281bbb1eb4659c259198ef47fe26aab648a281e66f80aaf2f2cda0a23ac05
+LibUV.v2.0.1+18.i686-linux-gnu.tar.gz/md5/1f231d89cf9c04515d2d107a5d786cc8
+LibUV.v2.0.1+18.i686-linux-gnu.tar.gz/sha512/089cb8a372cdee0cbc0e78fc201611bb9bafd99af9a78e09d6097a6b70e7c4aa001ebd86f944b0a885c072093c529bf86bcaa32bde4fc1934407a858c1a5a764
+LibUV.v2.0.1+18.i686-linux-musl.tar.gz/md5/01cfc2a9e2536dbd330267917abb19ce
+LibUV.v2.0.1+18.i686-linux-musl.tar.gz/sha512/72f3588cb464a60e61f8998242aaa2abdf93df920a2feba5e1d66ef0f2498488df0ec415cbb499d7f75c47bdfc7e3a2fdda6a94383492e0ad13e464eb1314ff8
+LibUV.v2.0.1+18.i686-w64-mingw32.tar.gz/md5/8c6599aab9ed4c46e52f03683aac664e
+LibUV.v2.0.1+18.i686-w64-mingw32.tar.gz/sha512/13f0565f7244a8bcf1ab43fac91a856dc86d214877033a3cefee8c2179c1a275dfd7dda32e9017763acac2ba42ab6799934a58f5feaa38fb6cf2253dd713f57a
+LibUV.v2.0.1+18.powerpc64le-linux-gnu.tar.gz/md5/af0e43d9d0aa91dd82b63220d96991ef
+LibUV.v2.0.1+18.powerpc64le-linux-gnu.tar.gz/sha512/9fabe3089e4fc60e910770c32d36300ce8ef36c28e8cc9c72fbecba6eb80285ee8174e84e4452fb4ce90ee7c7f94e99b03fce47d8c579bd614bfffd553f93666
+LibUV.v2.0.1+18.x86_64-apple-darwin.tar.gz/md5/871040e874eedae54553d8f1c91b9133
+LibUV.v2.0.1+18.x86_64-apple-darwin.tar.gz/sha512/d5eee08b65e4bb8b444c61ac277bec9ef944b9279dd7f0732b3cd91d47788c77938e5db71e019e01bbe7785a75df95faf14368764f700c6b7a6b9e4d96d6b4c2
+LibUV.v2.0.1+18.x86_64-linux-gnu.tar.gz/md5/d2d186952c6d017fe33f6a6bea63a3ea
+LibUV.v2.0.1+18.x86_64-linux-gnu.tar.gz/sha512/15501534bf5721e6bb668aabe6dc6375349f7a284e28df0609d00982e7e456908bd6868722391afa7f44a5c82faedc8cf544f69a0e4fb9fb0d529b3ae3d44d78
+LibUV.v2.0.1+18.x86_64-linux-musl.tar.gz/md5/271d4d40a1ae53ed5b2376e5936cfcf9
+LibUV.v2.0.1+18.x86_64-linux-musl.tar.gz/sha512/1956f059ed01f66b72349d6561b04e6a89b7257c0f838d7fbdd2cee79bd126bb46b93bf944a042b5a6a235762a7a0cdd117207342dd55a0c58653a70b4a38d48
+LibUV.v2.0.1+18.x86_64-unknown-freebsd.tar.gz/md5/62fe8523948914fbe7e28bf0b8d73594
+LibUV.v2.0.1+18.x86_64-unknown-freebsd.tar.gz/sha512/e6486888028c96975f74bc9313cba9706f6bf2be085aa776c44cbb2886753b2eee62469a0be92eb0542df1d0f51db3b34c7ba5e46842e16c6ff1d20e11b75322
+LibUV.v2.0.1+18.x86_64-w64-mingw32.tar.gz/md5/ae103f24b6e1830cdbe02143826fe551
+LibUV.v2.0.1+18.x86_64-w64-mingw32.tar.gz/sha512/f814085c135815947f342ff24fa0e1015e283ccece84a5b8dd5ccec0f5928a129e5fd79100a33b131376ad696f70b5acadcc5a02a7e6544635ecf7e18003ba1c
+libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/md5/c1a7d3c74ef3999052f3bfe426264353
+libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/sha512/a3f16863b711ddeeb5ab8d135d7df7a4be19cc2b9821fc78c8cd3ba421231d39b7d8bd9965321455094fda01584842a58f60612d93082b4fe32210b8aa44d999
diff --git a/deps/checksums/lld b/deps/checksums/lld
index c703c2c2d041f..cdcae063f68ff 100644
--- a/deps/checksums/lld
+++ b/deps/checksums/lld
@@ -1,108 +1,108 @@
-LLD.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/0e1fb589a6f0d95c4952c141fcc6edca
-LLD.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/f6f3bf38d9263c26f25191fbf8ac867167afdb700055f5fe34a06a1759fe2fb39a716760839d91e55a8149ff96a0821386aae8e5e0d4953e8a4cf9e7e627c4eb
-LLD.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/md5/4526e34074b99a330b0bd1cf2f1bbab7
-LLD.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/sha512/24ce4daf534084bb20f123bfe6c85cd2f706f552262d831de34dd393d1466e8cd905812f9f1dbfeb394401508240a66a397c006cc7bb067d6a6baf73dee168ee
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/ad127f66681d145045e47076d8d8e962
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/b63aec7e18f3c92b9f098779fe04f2bde454f981b2d39ac688ed6bb5ed9f1d78a00f8f5607bcf57bfa2b7c7e85f02ed405802642c9567acbc64a3dc5a430f226
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/83945b3b814c496fbb0d7351877a0fb1
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/3243f222dfc7dca9878606caf1593b5776c6066ffac25a195b9624b532fd5cdf5576faccb26db802c36f2d9bc3c84cfc62a83c77449d0056e190af1ccd0239bb
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/aa335fce63e6cbcc7018c56d7ef16a78
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/8d82a46d9f2e428a745687bff5b183bf91b1850c1e815aac56209c5a8817dbdc8d26fd0012917b5a14476ad8ec6ac450e01238da9024637b76be2899a1ca04ff
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/56cc8663e0390eda08d05a491a172c71
-LLD.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/dc056292c94b2a0619eb49f94521912c5f3cfbb9cb00bbd7b701a8c6a172e50dcbee719bdb43622a0c8fb32404d742efb416dea6922a3799af9ca7dda0400fa6
-LLD.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/19f4bddcbb68108301580a16b4cef32f
-LLD.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/1694c0d7cc63680f7c04b22a5d842acf3f48568e682a1a7acf29529ffe9d6ca027c33f9ae5cf4cdb1b703d28f30a654bd1f4e0586eee49ddf64bfb40c8a9206c
-LLD.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/eedfa092b0f4a2867cdb5bd4102a27d9
-LLD.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/dd895f9c5d4c76186c7d2172f56e1052f2b8926466fd0deb725683ab4d98b85b695b7d571f076bc6eda679747946ffa380e1c23f497ed8cf1fdbde89132ec653
-LLD.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/d46f65ed03a071779bd5277040905721
-LLD.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/1b3bf0d64ab0481b11f6776a249c252622b23f125eaef733dec4473ced554ba6b876632c0e4855d0f93a9e4eab06cbbfff8fe568661f569a08263543d8ccd930
-LLD.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/05ae31c675789b5b87bf800e971e8886
-LLD.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/02342bb5a5c78f7c876bb75dd0c9b7140b4e41878203469a573c190baa2be82ce64101bffb93524b24444a69e15fa6eabda8f7f006e934595e52c97d4865faa9
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/46ecafa64b02b6a72cbf33000728e023
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/72df77be62ede0737f2c9bbf58fe617ce8358ce3f08e3d3b9fbf8aa29c0ab4db4eb49e426fa43156d05f9f8c83b974594026029e67bf8a8fddf6ab33b1c2126b
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/ea36fa4169f9862db75bb1ff19b45645
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/7795affc068e64a5c3b68e9d0a773d21acec68eb2a4fb28c791938c993d0f0f8e9b1ce8b16cab3f25a48c2d7b8097449831f2e6340af4b47c59f9c35636e23b6
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/ba2e078e52583ff43f3b840e3a144726
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/0af8ea82206204ed515bb6c222d2b51a16b360858a1a4c9db7b3749cab79e08a9b0a91b54b389a19f0b40586d0f0ab3665fd2df415a90aa401728d592c9a5716
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/4f6c8a05367f8e70f6247f667737bc79
-LLD.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/f5ddaa2c14dd6fe85456701d60f279bbb30f2d9c63948ec3212a2fe15670be11ca8db815c914cdd069cdc469e3ad37d848220c7d5e1feff5ab6913a94be22d17
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/758f27b74dea4231ca5ad4bb1eb812f8
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/6f378dc8ab2a4b0f59a8107ddf6ef10ececdb03d616b6ba1741dc9ad0b7d96a8d506b45b1f3129c6f58b9ad8b3f97d300cea246c6687bedbe2b2abaec5738e95
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/7491cd8edcbd4933e0e897b2378e4f96
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/ebfda1800a564de252c1e0bfa8ba505589cb15a137a731df927449714379c64478d51d08f973f59d29274eac66a205dfcb96d809b4ac431e4618d68068ff02c7
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/23303f8b1baa11890f92ce44a1210706
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/45f5f696f0b4cdd3491c8601d24576628ac9ddea992ed0ea969d38389f50b547835b2b74b7df0b3710b8b1048ee11665c9dc40f6ba71aca428dffc81ce716b11
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/d46db7e7e1c7aded195d6eedb280f21b
-LLD.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/8cc4958e9cd55ce3180b17fe171446fe222ce38c76778e77a34998eeb3f3fea3de09fb2590c2f4c2a1015575c1f5e1a37356695ccba0041f8381a2e4389563a3
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/6892dea9c18297dfe41710092abb1d01
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/c7841b05942dc5edef6002e02452651d74e85ae4f8b575505e9955949efb47ccb8447a9157b52c88686c4334842fecf415912e228c58a2632002a4d1e72af0b8
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/35525045309d318400241205333f366f
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/a7176919817fd94c0ea39c4efd53f494958441a53c5ca18e0932f77b66a5a5aa7b2b4801ff776deb45161b6e099b5b626a83eab5258e9648307c8010b2baac09
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/087b8f9b88ddfd488e196d6e1b966e45
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/68cef6e1f29d18e92ae70cf9c6c4e21c96b4ab86993f58f7addff16281debd50051dad828bf18b2ebe297d03ea1efe16b8fec3daf9f8cc33dd6124e77db28134
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/5594492322eba79eea3141e29a71fc28
-LLD.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/3745179518285c1e30ec4c554c6ecb630943985796e62713d85de79ff0d4585d790bdcd1d2ba50ad12333671e7bf45dcea09f7cdf84bbb25440348203f4f512e
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/c751963cf072766374881f79cc5de719
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/22d351e65de75276969c1bd9f78f8873878ac21035311fc138bb399c250e45d3c689bda1d13b00f0a1c1a93afe90292be749a0184acfba5bf2a17a2f9aab0c60
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/03fd8f5ec6894645206d1e8e1f2939ce
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/025a12586a3370d1e604daecb5eaaf6b3300fa8c57d68fb09cc39e0fa5c353160be2c1493fcbbc7df28a6c345c46731d77422a282cef7b8a90d0df12739326fc
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/5ff5a4efb63282ba6e08663070c030e3
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/16bc7d275079d10ba197910bd194edd4d67ba29bb8d86b110cf71658b10abef83c9ff37d5ee366a015302966382705f79c77c810ef9e37e8ab25c3e5be09c78e
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/e92652c998ce524305d1af766f12b273
-LLD.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/a451ee76668601477459a474265d390a777a7a985ef976fa430766369d378ec71c32fcdcc8f99066fb6fcec8c56fa0154126d9f2d03d4d19b393508f8e0641d1
-LLD.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/ed523fd79f3f2b42d1f8e752346906bd
-LLD.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/34053f32db29857315eee9dfcae9067bfd517c42e761ee3e56218a9439725955a673756eb593ab2aec3d0b1eabadf0e4011f4a72fd1e14938a6bed0cd6592cf3
-LLD.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/6c6f042e199db6e7fcb6fc9d877e9d4e
-LLD.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/b884c46cd26b9312a7e07e3684806a42263635dd63c3127be7ea8c0ad3c76c7de4fe089da73f58306d65689116c7e83de73fb06533b322ba5eacdcf02241140f
-LLD.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/d2516d66916580f58cd2d79bd3372faa
-LLD.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/0204064425018cc2b08d0f8bceb5f73f7823f3494b9200fce64de600c653cd28f9b65634a45d50e9fa1ba5b19d2182d3f599425e69ee38dd08effe065e5db2fc
-LLD.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/5f27602a98ef1a642fd1cf49141b92ac
-LLD.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/267a0c91b1c9c44fd4bbcad09a7db13c94288368f45763f11796e575eedddf5218a17405eb737647c552c2c62aaa5761ca069f0063bb7aaed9c2c3d82f13f1d0
-LLD.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/07104f4a51ade84b7e2d218cb2e10c5f
-LLD.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/a22de36fcedfdb03f0d73c8bb12a1d4541d76a4b1af5d15c6b6f943b691c26cdb86003581b98d57bdcebf746d901e15c2c2e404fd0569846c7e12107d0280622
-LLD.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/f0e7f72818dde68f5b0ca858ce1370ba
-LLD.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/a6d010fcefb5a20cdeb328b2bf259b7c380598b488c00c4cd11dc5b3040457829e31979da0e7529b5a613257af6b95e666bbfa5c0ddd6baf97dccb507ca823f3
-LLD.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/2d81caa1f5e04e1b99d1e64c204ecf3a
-LLD.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/bcd5d9442aeaab69390b36f5c3be112dcf753fe57c9bbc36cfec35a7e17151860c421bd5bb30846fb4d4eb690b5de1d85f589c4f1b951e50838fbb2a2216fc97
-LLD.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/eda482cc028138e1a3999a637abe8a25
-LLD.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/19cbb8fdf828f3f38a91c8e33c4c29bf21901fb344020e03903591c13c22fe4093bf0c78a07288d1edbfe2e4a7044510125ca5007f6eb0e9da506bc72240501a
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/cbbe928d4453ad9b2ef7852a79a77be3
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/31e85d5d253ea24740ff0f1b784a4eb425fa8a39e5c816f7d62eaa8fa68b38cdf4d4dbcf25136bf4aeacd35600ebce52aca535251af64c41709b9993162aa97e
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/9ff5f80c86c0ac582af93811d51237cc
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/ad97304983ff2b5307ca9dc41d642bc5fb20be47f93629b92d977fa6f9b989f7234360c9b8cae07f8bdc800fc3b3f9b9970f15bc2b552dbb62072a12e7e716d3
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/f3d9c3944080a14bbb728e672958f9b6
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/c741c0f11212773eb8a98b43ecdd67a86ca58afba5c1a35055f96ee688f2d1b7d7ebdaa7532131497b3b943f22c4985eaa9330b7efbc7f663b230f65ab80e2ac
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/99a5bd1365d968249130a0823e29cae9
-LLD.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/018f80627982006ae5ee21251929a57e9cd69fbb575ab47a831a889f8b87769357144385ddd575a7943c11221bd5c59e02fd7ba861c4bb40940531dae8735c8a
-LLD.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/063171318971a85982d02b597c1ecbca
-LLD.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/4bdeddbe74994aeccb8a24fe0255271fbb453e1e06daf2f34389770fd9b5ed71c88e50833ffcb44a052b886c8aaf38dcc5b90ee590d85a8479fc574cfaf142c9
-LLD.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/md5/5bb545b987ab4ef00b7d04fa70c493a9
-LLD.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/sha512/5adeebca1e5923c0121117f5cc08eaef3c798212c22b929019ada92fe545401a27ebabad6680470a720cc9a9d2421cb1cb24c4e524ee4b6d9e6cc453f78f2bcc
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/c9508e437190c2448a0561905910c92b
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/a07a4dd7ea2bf59cc3afeabc488fe1b8f729cd8eb6572421b94693c6efd8e7163113aa5d392767e4eec9083fe6e667240dfbe49ea7de9f1ac7051ab478af5435
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/ea8ab6c04ddefdaec0a1273db6f20700
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/c2ec35d58a115796aecdc37f967539d70138af22ea738b7e28d7427cc556a4729bd57e8971189e3291d5c739b4f55f4607680125bb9dbb6a8289185a1464c126
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/6747ca50400f608985d8e571c876b2a5
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/3df9f8a361290f7a78cc20588cccbb22dd2eda1d9709568eb468008a345594270fd7df6c5c7b22bb5bd6c8338fccb508b7ec1b7e1dd855092bac2b25a954c3c5
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/38171d910690cebc8e6caf53ccae16dc
-LLD.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/d6073f4cdfa7b291ce7cff44d4978b0258c0a81f95e563f9db9fa946be64e7611cc164859aeed55929b174fd9536bbc213a41605c12cbafc1cff91c38c9cc26b
-LLD.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/7f8f893ae41c0bb12c84f60d6c9e9dc6
-LLD.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/0a80b6e351f4efe79796fec98adfaca0b206783b38333056d35be1d6077a09b6fb00ea0e33daf3bec9261b31b1c024d545e5047507567caa925c9abd52660ff6
-LLD.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/7a7db99f2d12c8b73eda81d06116bebb
-LLD.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/09e99cd7cb0200296c6efc313be2c36635ac490f5d4100c62e830dfb40d5064d38430435ea9df7e49dcb1490f4d31fbbaaf706f21eebc0e3c78765db7e376ded
-LLD.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/674055d6ff38ba166f9721647841fcbf
-LLD.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/5ff89cceff288ca9df2ba35a8acc08e75204c200a2204c6d5f2ffadadd1440c18398ed70c4f6a0133cb58fcdd5d95e4f67f03ce4faa4661c9d9e4965b1c9b38e
-LLD.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/a30fbc6a5fa69f879d04e47a94e0b07d
-LLD.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/394db6aed33be75b7783aa0c1f98b678f0f96cb16946dadd1c143cd4b0378707d37fd0b35a4a9687de91ea293635142e9c433befe9be7fc2899612d48b94ae8f
-LLD.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/md5/ff14d881b50746ff4be6642233091bc4
-LLD.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/sha512/6918a29cabcf3d4fa598cfc8390e534be63c049f8c69157629a0e02488f09b461fcf7cab5e0678b38b4c5be6d80a45a70b434889a7d1740100edadd3a32dbcb9
-LLD.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/md5/2ec3ef1bea40aa16de080605f082b616
-LLD.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/sha512/1d69b07dd179b1e146c4df8879beadd26ac5ba2c1fe0052bd5c40f0788f274c9c2e4e53a53703d95298fa889d2470197411c867247284de48834e7440a0c7977
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/21e359427a484e7876f4b9d9145b0273
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/d1da7c1740f1c7ac07247b554f7cc76a64aa03200b44e41d1a10d66458caaa466099eb82d8d33327ec77eb784a5be91454c17d20c2fbefe049a128a2bf0956f5
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/f2975d69c12e91660a84df715f971130
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/8fe4bfd7c692c66d871f2f0c67a8450076d7639addd2874cf6068427462a8656da96fe7c91bc93e5f0cb016ee3908e1b70c5212311f65ca7f0dc64f1f921e2ef
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/1e9103f812fe163e2fc6c868da4fd9ba
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/e75bbedd72cb5f0337dac014d78d1276992a6f1d6c8832d16bd58ef83b2cd8602d023b45e1235e96ccdd13300e4b67c28f0a17cc0b9ff1ccb89263105a9a66db
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/b664032e83cddee3d1157f8c670ebe5f
-LLD.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/d7c36b0aa74b5d36446bb0aab413e4e12f042532d15e55eb79a807efabb93e590538cfedf7e3a6abe33da3f2326d631c2a041da2e00dc4051934fed17aed0c4e
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/64c9a9f1758b9b292e0a3ef37f16ea41
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/cc740aaeb6ed29c56b2881e1488606338e4bd0e049ca4a5b8312b1d9129b778224570336698347e4562d632db9049e0e91ecce34ef68acb23a8bbf62455a81cc
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/1a8e11dba5cb574cde42de2b9703ff79
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/290300229576bb9155fe6bd24c0ee21beb41d0f2a46b208ab5a657b0199a7376c1f4cb07204c8ee1e6d202efe30ca040a6fff63c69b174120de3eb9866e344f4
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/cea134f347bae257cf5f55b6388cef81
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/16b59143e929791b0c3e56cfb4970d8b3c87adf6e847fa9e2aac17c4ff2aa311ba2c7511c1b0ae2f39d9aa92f87ad4d99c042fe35bec391ac865fedb72bd3b1e
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/5f903bab0e38fa608e8965acce6f020e
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/01e5f6a32958e04174c545f57c6c3b1bc88ccfd5ab18dcb9d67b92b55ebc7655a03bf963c4eaf7e5c3792d4691427a89db372e7534c6c8f965f8a715a32d9284
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/241a55374fd067f3736a2bb929e47015
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/f1fedea4e6b5f6f3bbf4d705034d6c51b06f011c2ecec1ae49c5b7bd123891eee8b991462d60be7fffd58f7c773afe910a06ec0b55b37eed9b4d09b9fdbd5068
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/ff018c7448a7589935333e46739ee2c4
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/b646c6a945b8f42b396164a8e87fc2e54b1ad05681f438dfba83fdd3712a60167aaffcb0300bc42d904eb4bd34c002a71642b59540ca01e64d6fecc6daaacdd8
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/e6ee9423a82322b9233cafb1c92eed2d
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/c915582a9ce2dfa8721741fb1ed19b719ba40f0092c2d29ebd68829ee558cef0b044a5e40985cff88e89129cbeed052d85fa5c6b6d87f9b3a68a6e89079ab4f3
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/cc55112e2db358cf26d7bae3211d8e4f
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/0ecb43045419020eea911f1767dae23a6b1e81bb155ec493e911a9412e45f7ec71461aea2e6fe346e641747139cae43d9435ccecaa7fd6a234e4d69bb06606ed
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/498b2909f80b20588135466d5211bc80
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/120fff24e85cf970670b20b5f4509475a3ae0d7621f8f67d018f3a7625548d736a3abc89f015966b1329c6b0a08a1db832e035ee3bae384e2c5864b73a856600
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/1bcd298d5292f8e51f19b97fa4b27ab0
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/695c42557f9ee53b2e10bbf74653fbad4d02124b962a1f50cf719d2821607dfbb9c1bf638dbbc9e0e544f3020a9ef4a82decd13f886cc41ddf47c07a5e40eaa1
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/2323ff933feaf3754b442bee48a63607
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/47b8e490b89e04fb8886dae438e3ddcd53c4e98045de2b0def3988671827528c8e9ae296411464c0f17cc64bd3956644673f47a3817237f27e1c3ed16ac8ef01
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/37cf8528666064a434296f2e0039e9c6
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/ea1504a859509f8a16030db7a65f42f0e78d67adf5946497f2178bf25456c0f2583af72c636881a4bdd1210dc0d377bdf300ef55aef5db8c56995424a1886059
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/1c341f2b161e2320d3d1a74685887f54
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/4f6fc099293deb1a2cf729ea7edd6e17fea0dc8b9fae9acfe34e00b1f5c798933df9538c805c8d28c6279eb38f9ebae2a1aeb1a2f23087352c6eeb3b27b63ddc
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/e306d59c71b0958c77108e650fac2612
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/79fd7cec0e169a9555ec9b0acc3248991e2e37a1d5bb422808ffcfd4f47e79321560b7985c82dfe070fb0b5ded5c160d83e358399c6e7608eeb62cd4a1406f88
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/c1d080f1aebb58778d730578fb113290
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/1f420da1897bd0a61413321aaaf032e8ed38d59e6dfe3313ca3a6ee6582ae6c566e3761ca8fcd1f5a964337ba8a9b3e73dc55ad68aca139beeb45e43d51e862b
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/6f4e0c7d2fe9ac254650dcd2842dafa8
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/bbc71b334250e5e6429766d88595adbb671a206630987ec2a27e05711ff0f844487dffc1c136052ec11394e9d5c51c70d1b75d5348f97d3bf7fab463291e9dc8
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/76925b9a7bc249b2227390c479c54f8d
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/20643ecb79732e3ae9666116dbd0763c18b808afa78e6a14998aadc7265cccd6efd28670592db61d3d27b8d3023be4c5f3df41fff9e1b38d61abf76829090b4f
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/399b9aac140d9050088fdb187ed4645f
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/8bab65965670fe392e78d0b9dc78c92cdcf202898f6d5a3174eb89ca5cb95b995675c8a7d81bbc4e95e490ad1a43d9d29d7907b7006789c0143a1d8f24cccaeb
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/026a4f5ae9eb3ac05e5e8fa894d77a5b
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/4bca8bd558619260cddf4e2f4593cbb2a0691b5ccc6d1dea6dfcc5a2b5f51d7d1a76c35e481244e211e2eacf32bd628df5ad0e6c75e5185bb1d9b569f6acbfd3
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/f898ceabcba052b7e6713a2b2c208a92
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/92be1910f795390be5f15ba5b2c220a3209a5f7ac04fca3f5229486628bcf5d2f20cf6e4dda8b41d6beaaff42a68a9ddb95fdacc6eae33b9183b581e9a194895
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/e366058cf69a4367945bdba9523f2a0b
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/45a786e8d0162bd5bd01c029691d2928d3744ef4a7a1efc2e39755dee2f9a9ae23ee725f0454ca601cb9c082a342209e9129df851314b5757c74767b13508fc4
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/665a8502170729c86ea95a7ea2fcce0f
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/c1a2a85c9ce14af8c91bc9a599393c52c0b8a585057366b1ceeed34c5db44641ecd0c9b377bee80cb4951fc7102fbb4f21fd050126bfa5bb4e582ffefee17035
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/b90b2130262f63f5189cc8e4a65e4433
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c1cbfd38c82d676c3fdbec486691334cf7bf4115d9ef2665012b71725c28545a49f34edf5689ea0352822c811c24c89cc152d1fccd1586b17ae8e6b2503641df
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/2d5360da4b2c9ffcea5d0a646a7c114b
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/73323e0937fe4423883480294c8df44744acde4f47380e35535cbe69c855c0e35e86a1eced3085ae0285f284f47af5ef237f4650bf2b6a8b9d5308efce88fa02
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/a9b9a65938a7701aaac6fa706481c867
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/fe8243aa131ad8be54f0fca5754c2e68ec39049004ec8feed499731c5228a7a46e303ba866b9f9a55e5318c73d8a46d964673e111f6c60e5ae1628c568d7d894
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/0d9592a287c9231ae2db65000be2cea2
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/4ee192dd33f518d2735a829ac8f822b5672b39e8c2254987aea6e5f2f0056213bd85d84c4050d52ba9ac8c35762521c324fe2d6e18db0396e7142af9cb61a561
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/d487598dec9969485dcf785fc0968bd4
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/8d3117739919696b9b0c9ae398f1b1e9db8bd3e2e27839f62b3551c22ae2517f8abb69e57e23d125002bb466889b7352e69c1e9dfd9abf1c5433f274e928b341
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/943434b08dffb54e8cf04ae7bee34923
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/77b7bbc5d988cf36ecd10609e091cf24dea134cd32c7ee96dec7bfe1a4942553b6205653edc16c8454261f621966daeb267f42562172bab4cec9693ad733d867
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/cb9e371947ad415de048636ed78ca48f
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/c00b696fa146e8c29b37f15f78ab3325db9b3f5b3514e615f145b4eb7c9c8788662cfb6255b7dead596cad8c576b378f7459c2c85d462b597ba5e21adbac0536
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/485f061ee8425f042e4dd3042388bf8a
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/845a47a36c61b305bb70b1249f6fb7c4e8f740acff90d3e850ab2e887f7d959ae263431a02305bf7587e4194463f9932769d500a19709bc479eb6e6168325421
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/f234526410e779188f3d22da438a4926
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/12e2c9fc5385ff142bf82956268230fb01a6f1a1fdb3a6c1e13afd341dd2eea970b707168d5f45960dc9ebbf6d6598af0ceba371172f624ae823ea1eef4e9031
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/e68cab4aec1abcfce12a13e3d1f67dac
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/67755b34ebe04f4d28be3be2a37df46b5e900f38dc4908875f66671fbb740cf033f2fd9af5116635f55025f330f7b1a478cd4900db9d00e4699b591a16269100
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/4a71aef80b75b2ea1a5b7f8521afcf5f
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/9deb3e9615ae15dba8c744b22416243304d30f100c8d17538fcedbc18787147505f74ecc2f933fc54101527847503142cfe84a46a95ca3c57987996e3b8583f1
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/9b28ee75d05cbaabff57fd45cc0d1cf7
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/bfd3d6cfd4a5a2fbfe940f64d47a86a598360e90619f8175a2d1306f0894610f13fc44ba099ad59d2989beabf491df08a5611bcef3fd61b6391ea0230b11a432
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/7962fc6f08531f0dcfa44bd667f31582
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/2c936064685f12ed6764c187192023118e97dcbff6ca1656f0304a40772b4ecf55ee0296b3c2a00760f5bb437162e2b737635fdd59b889d35756d697fc7e6b72
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/3eb4d78af670d446f696449a5e71e3ba
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/315dc76799f3e443fdb5ebbecf96a08070f8251930a26995de892b8e67bd35bbb365f2cc5fd93bc7cbcbc9edd08280ee8d2a36b28a704866dd3fdddae4969455
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/e73cadd0354897bd5bb611cc1c027858
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6f444a4ea22e7108ab75686ce9cd78c0db0a677e39e8434896fb1ec90b9dc013abf7de1024d329a9726dabf229a8a68c27a11f211092e676715d282efb7b8504
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/aeb310f106f31126dbe53459e36d33bd
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/cd18c115415dd92bc7fbb5c29cacc5848b1f3851c3a526ff9c0813ad46824df0a4f13a66b1e6641ed11b44b5b937390619f01666fe6d5a047f1772f0ad03c941
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/300dc28f7af6aaa69cec9a214a57fdb8
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/dcb40c5934118c204968cb963a3fae91179eb1e31f5397975ca98c8a7aaecaf2a7f81847bc426fd306bb76970794502ed4f94d8f461b96ea90362130f44520e7
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/e1f23fef82fbfcbc28899677f12658b3
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/b6b585060832d53827376ac6c00cc8bd5dfbf091c38c87020f6de42adc86dbe4fc33ec2c17f4433176c79a509681d694ed1502b179efcee2c4dd4c10a26e87a2
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/5dc96eef71dc28611bc998ef966371c6
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/781993c75bb07db96d02b5a7e779116864730a9bb941b64420a435956a7ecd501b5b2673f1854c09ece5f0c73559d5723c271d6352be57ddae6801a695629362
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/8a1fe0ccf7699ab7a7a514b620112a70
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/d002083045d3eb7c749f2e97527c1228cd317a8138ff254228e43594a6cabee47fa363785466ebc2874cc438457640ff08a836eec7334afac451506ea7bbed03
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/331be92bd3d76bb8e86991b7832ad41a
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/7b1c6df53311a17a92a41cb67ec476f947949c4ca5d15a643badaf9f01e76a186abbb6e156f95ad1605d83250df4e081164986a6b7fcb3238076b0ec5a3bb565
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/97c7f5267ad6927f699a25ce44f55a70
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/7b847c6026fd7daeb17a4459b852562ce6664b2f406664be672bcc384dd5a79b9505561fc61dd8fb78a903a2ed4978f322cccad19f5a3966bac856e877c11ef7
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/c86da6a396fcdddbd26cfd71c0f70458
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/8d5b75b43167080b8ea456e516c9ace02ee6066ce715a56f0b42cb8045b965b1cf8d4ebc0786c23be4544693ff858816a6257b0638ec11e077df32ead62f7efb
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/d72e175272ed893688d18e868120c575
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/9a46eeca8c7a8be65ed487a74227534e08a257e404814c44730f12a5bebc8cd160998cfd5ed30189aa606ddbe602e1b1788e465e4a210103c6726a7fd230abc3
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/0206fdaa9582ae3bddaed1b6fd7a8cb5
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/584a67f603f656ca5d27aa0ef2e425ad385612aff06cdc1d534b5944939a09246c93954fc153b8a89acff721e657a8903af9a640abc252d4e452f348781bca98
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/0dd14af342467eac2e13cad4acbc881f
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/918f2c66898f828414009fa6ee273da5bd654e4b787ebb4d703f0be27e388b46870d68bd58c4f45638d276c61c1bfe2f3c67fbf34dfb5578158d072f82d927de
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/b373e1bf2a24f34496754438e563a5e9
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/a739f29e332be74cbcc544903d08bbcc12c3e9f5c3d02d130ef1c69426ead2c74b14f98ac79e88ba29fb2e2dc3b28b7d81c9d42f2e27e0ce9442f6a0e81bb8f0
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/1fdbf6aa0751788611054f7e98024104
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/2015b8e84642b2434d1089908354b47b080d5683f1c7eb2c09de09abb3674e7119ce4ecfa858bf8129fd9e9075bb45f2e53a997421f2457aa9b5c4a9d7edfec8
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/85bd5a9e312e83a09fa5b7fd6abfba76
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/0a5cba5c65abc72361a780f64e64c463797aefe52994699d8d785437b773530e49a5fc2746e36bc5a31aabf4eb4343870aa448f8fa2b119ede3e1c4ea228cc9d
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/ab07ed76a796d86cb6ac2ae4fc563eab
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/485117c7e1daca401c1cfe77324e8f5961f6f33ed2a3c907f4c4a2bf9c45c14d929959cf8e4d9cca9ad112a3ce6a851e336cd793bd5ee284c87b9fe487700ecb
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/852449a26af61d8554fb1b4c22c4384a
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/a080d2da5ff4b832822e099f150f0c15b985d54678a9508711f7f435d6ceec68eba12b5f8c25db0b4841dc5c5edb003b74b4fef391292b9407d7bda73d35c4f5
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/eb999bcb67f789b6443dbfe907bc61e4
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/811f72ce250184ccdfa30aa992884f1bdd0a791fa125f089037bf1af45b844d76807c5662a904ec9312b79efc565fd0957f195a70a39248eed99ff53f3284cba
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 7effa4955a5f3..122aeb9a53337 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,111 +1,111 @@
-LLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/a01c365e50cb49bbb9ad7c63320afaea
-LLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/b53adde91d5e21c734a85056e779f26f85b6fe084fa89b679e3d211c1209d14d0b7db627f9ed9692edae31554d76d8e1e85cc34737c4b29273c2a8ee908347e2
-LLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/md5/536058393999e62a2829b5a2b1e85f66
-LLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/sha512/e9d1f6280da11ad3cd956cd4f9520f344a2657c65833ea2c88844cad7005ac7e5ba16f607623ff80d03b7b362752102366d8c5928abe827187f74669b80d56ad
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/e923316d8d68df6ed6fbd8deb7aad6e7
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/9fc91d10805660a46bf10084c04a9bc53bc540f390ea3d88cd4614a9328d0f3b386862190ffcebe09331dc0324649c052a50326c44be2477062d1573f2ad3b09
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/5b600b39079942129b78474ddbc32fa1
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/ae46cd569e408b4365f8e953aa1f7cce1f735872d032c71f4a6cfaa53e4d67829bb5a6c8f2c99cebc0eb224b02bba4c24771faeb79c6b99e641d11f5c83d31c2
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/6f0980a7d08783cacb5613752b04d41b
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/d45e5be28d6c786582bf79b3fa65d67eec12b2f792a408b32be91d76452bbf95255b6e60019b6f1af409dee4bd95120ac1c6fcf3e139613a774e684f4ab6595a
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/e25c9371e8d59d69dcd8009a892bbe38
-LLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/0c67ec6ad0e31b98f1e13883ee2436a4eee61ce5cf53da01df3d12b670e7fe9d6ae408d2dbf622604fe95fe0ae14bb0240c1882616ff1d6c2bbfe8045077a90e
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/039f50bf67a682eaa9d69474f5980302
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/03a77ca15883b7e7535f606566d782af8b5706b5d2eda5caf33a3484baaba1f657b65d3e5be9bc40050f0a61521977839da8ef4008f4593296fe318cb3772377
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/30b471f0630f67245effe1481ad1ba6d
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/ba7c039b13915ed7b374a75c81578ddd5a8d569cd1022fd39cd40f089c39463d97c3306668ef072becae38171aea44713e27c4d34f4893c671f12ed08824939d
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/38110539776130f9fb2bdbdb1b6a4598
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/d8dc28c542ff6a07083e60feb8d7b731cc1b3b0a55ca332200b62e8b0218c2db30af6a5870923db70ba4f47a35f245a6b82504ea1af410e9a43dde03f41981d8
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/7916ed51676598067620090a9744e110
-LLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/46d9be864409108b2c87fac4b87582637fc10d96382ba265a008cf6edcb046f10c5d4e0de4f8f824c2dcfb32fdbacf05dd13928ea9905e3e338f7aa4d5f5fc4a
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/9c51e4d7f15a1a7662d8acb674cb809b
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/e72918bc0af4806cd68f199decaadcc892a70dd9dd4f1cc23f59d361b3e01c7f3542ae295ea954fc8aa7d0121cdde2d06d53616d3196a532b1b9f45f9665ab9e
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/10ed73102caf2bbd27766c292b939f89
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/9f08eefe7c0027d7c84955160cf6bd25527688598c6191f9822c12c6558f534567f0c61e4f1397e62202c02f4c8cb61eb0094c343cdcaaad5221d90f147be210
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/b4fc948b8532d6add7177b77b89feccb
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/6a7c022e4ad205112a1c4665e28286672ed0460925d1e55fca04af9afb6a6a5fcac4e641ae0c1040181d81b1762b2a2ce05cdc532725d5da0702a44668bb8893
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/7b795c94a4f3dd8d79c5c4a85ecdc8e7
-LLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/bd6cb0c2f2ee25823fd9ad9ef931d7d1518c5315db70c33ddb959fece0fbdd1eb030630a5c63dc9f9e50b1bcf1cae9c2f7d24585f9de100e6fc56c4a4b6f7a06
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/9812ca2eb64521bfd052821824227598
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/f939353f3fc667fab78b1083365c51bb15eb218bada2e061900bcd16550db5ed6765569ac9990e91294f030c04787228fed14324e2f192ffec26c6dfe851c9a3
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/cc4e38e0a6cb55e16935f70e2e9035a9
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/297d7268808ab95317d576301f81fa52db3d8f9e91e7a2fa554e87afc21bf195fb904a585c39b4e83c7ede6a2ab64efcdeb929cc109894c3caaa7de9cfd41ae1
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/40459a6691760b6c5a986221bb1d3c7e
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/4b3e4bfeb1e1ddddec786d50a018588d6eb6c2b97fb6e68e1c24026f51aab59229cb8a389be075da5abf3fa50ea7fa36b870ef17c374e97d8e85377abf4846ec
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/07958dcb7b13ee0fba1379d729fae641
-LLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/358c454d66b48846888fd696c64ca0d154bf9df81c27859169e6b3274fa9aafbd49935309e976761a4d95c3f2b80481834ce9691d08a2a456c20be15f92319e7
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/53572cb82b955197819eabdc1164073a
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/cd7da353ddbb1a18305a3b4b20bdaa92f60bc3bac1fdab32878cf53e921b8782f4e87bbfb55a7c7076ce927c42f02d86c890f8b82498eb41d58e2702efb0e044
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/7420d73f14f4124c79e6477e72ecce25
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/992f5ecb3c9b162946bc16e8ce680bc56e5ee5b928e05600b87620554b9f7a22998a03e7380171d659b25790b8408ff656649e9c0cb7a8f91625ca38b0b4cb2d
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/c52e1f13d082a786e02b9a28a5192ad1
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/fcb6e45b4627b6e62cdfaa9bb333863f99ac4b629082fffcad62f223711e1c36c72be497a6905b74f99f259213eabade3cd7d9b2d95ef6c77049380b46476a4d
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/abe229cb1c155815bbf39735fa2395bc
-LLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/41ce4c5dda1d85818387a8c0b8061290ecb630b243542b9d8f9f28d5c4364b010ec9dad682a5c62859165d61664b1f43b5343a79bb7afccb79de0746ba4eb0ba
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/7f78b28890669a0140ef864153edf179
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/f610aeeb183e6c85e0e50d0a2b483b16d11cce83a237e70ab1297914062d78288dffe14f0a63e03dc8680c5bfc096b9e66a2fa6620833648896535c221640202
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/106d266049c26933d4398ddd9c7e3af9
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/fff0928621c86fc50283ed5e929f88ac6191822d2af8280a61a08628d099203002b9a43c69df96d85de36efc364c7dd317e0db48d9e25699440e2279fed4789d
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/47e42e23351ddc592ed7e4c0acec01bd
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/90f9e8b3f54373ee738115e9105b0ac94f298c574d96f8698ca72cb917181d82146c9f7dcde4468022b36cb2767a194d643e0c044af0397922b13b28bf2f6177
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/29923410db87d76d9a86362713378f3e
-LLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/c1cf57dc3a3da5b16d14b80fec560c2886fc619addff07d388469aebf04b625d700d89aea52f6f0d50767dc2a4e426b017743fd66ebc15185fecd65904267b8c
-LLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/540a5a6a7b785271e5354abb505868c5
-LLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/61807c0e2f0cd25a234db25d171aa078c09d70d683ee874de79ea647c52a5875b8802853051b0f47a12809ddfc88e868cfda6551b789ba1633093fdd325ebe6a
-LLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/7195f5e0bb86694fe6c6708510806a11
-LLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/ea058346b3a600ca5fd3496d0a7f8ce7634a97eeedacc5013e458088462d82762eb92f8d82e6355bfebe4befe397cf0970fb8ec0005f8a80a4e8396b24f7205d
-LLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/c59ab62f38a937f322326472a6faf6d8
-LLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/54e6b32c0c3599804d44cfebc2d772a00130c3f41403b9211d16bf36112b9de9070fb3f663d09d239c4392daf4238b939cca1267369277a74bb7d176e841d732
-LLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/b5e5b32c0573a376ed4eb76473f98cae
-LLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/9f2fb572354d13b6444bd7ce786de90e8b394ecc6b69bce537b9636f3e6906971da317c6ac5a1da4a9ddb4a879060fde75aba5c96059cef5338cafdea78b47b4
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/f4f58134ca48a8485d70ac135a7b8cbf
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/84d3ff5c4d62c56b0fcdce4d9f6c83a5b079d182ac3dd6521ed4df3fa7e8f370cf4c0a3ca284f25a034525141094ec422e3383951c35c8fcfec97339fe477fb6
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/f797f871fc6c676da2c8006f2b2f1863
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/a7b74a8e4d3f0ec5a76b2fb96ad3131c4847eaa46209667cd1dcc6e60c05fed3a584e9f302a05169a04088674529d2fec4deda5a9174735dbc21f98a681375b1
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/445e297dcd1aa83d9f7d29cca4f5e3fc
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/b90c9c7daf05b9afe7d8725c97491efe7e401f055614061ed894f140eb24c2f02f542daa15a4506037eec09a777070fefdec2e6594f1eb34af3eb113954b9543
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/401ea6f6290a5fffdf4cf55a72b41655
-LLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/87349bc4dc337f86e192068b82acc374bce467af8b1ec696fa5b9a98f174647ca8a7aaa98e48bb7ed308d2dd8dff459c8af622f7f2f9761898779879a7069de2
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/977e40f66ead0070ee1b10c16b4821c4
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/de1431693f04469f280b512874836a03ada21d0dda05070e8486043999380fb73bddfcd15d1ba98cbb173bce90b2b69589064c02551785a868dc34b45a9dd17a
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/e74514de403eed67e4fbb647426a0fc0
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/3994895cbb78320048ac330cfd29700ef71ee48081a1eea4c881bee7e307c95caf8fa74be02f8dbc39a5a73b47e7498ede7e42b0c432fb263984f5122a6922d8
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/2fa81725e463088df57dbfe286319e77
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/c3d58b6537674ac8b4a3281df3204a7ca6071c8acff7aa54fcd506e898ce872dde81147e955a708fd2ac3ddad31c4d7809c7bb152574e19bbdb72dfed2a75221
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/397ae293e48777f768c2e98f9e6c558a
-LLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/526536b102e0650b3fda0c8f59639a3d97b8fd72c75c4a066da0955637cbaa5f331d4c1e5e11af48cbaaa813a274f6dbc360f75da9de357a14762703290a1fbe
-LLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/3524c7e60e752ba00e982730e76fa40f
-LLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/39d5dd8905e51ec8fddd4c29142e1923f6379eeec6e5ce9e52ecafcebbc1642bb06bc912259082ece485071c32f35d56ecd1c19af270794d54a1e0a52b28ac2b
-LLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/md5/b7e92004bf50eb5634b0134a41acd80c
-LLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/sha512/ecf7dc9f70b74879ad067de733fe482e058231b76a2c220d0eed055f4967b0a9ae5d3e572b03bc20916beafe29f40a404c0e03ae18398ee8f7f95801b085e536
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/8356012f5f046bf0c931ddbb600e18cb
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/e9324a4bb5342c65dc978425c2da2c61baf1999dff9d30163576699b5be347217bf8750ded3f5a80c166f65ef94c150ee58e7f708791a41f045c2d9d10ed9f67
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/f28b04fbb4f1781d36d4bf0d667737f6
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/62a6cff9bd8a73a17f8fe851cf4ad69fe70b12a3688323f3d5e169b942b8d2b5a7a2b1a6583a23e0d0c98fc9631b77cf0a21078999d45086f4620f08c0906782
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/ed5489b9b7d1c2fa7632c91630079c88
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/ac27506410deb7ff4046def2bf277315e11a7cbf503631d62a749bee50fd09c4cfdd21ee7621f2232ecdac20c7be8b87dfa2125832aff8bb5cd65b7f9e7ae875
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/c99551c46f7ae080f3d881247d83ad2a
-LLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/35b8df0053c904ff31db74d5d2b65fda7585b30753a6a96bb0237fc72e51cc39503fa93a0bfefe453941b047f82fa5aa08429ae1431b1590ef7bb358326485e1
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/1cdb2a5b49fe2a096d094ebfb5f298e1
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/8919a3d2ccf821ad313bdae173db7519d627a6006804cd695e9836fd25ac2b4db5a4c3ba4faa673b89b33ccbc0c23f2b37ef4759f167ceba9a5564275840029b
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/74e55203c31cafee26add48f1016796e
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/2fd6a34c49fb4c62e8104b91d34c01fef8cb79ecb076bd0c6e325db46a77b79652a741b9e9a4fbff454e7810b3b4352d62eee098b0a155496bec79add7f9ed5c
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/2ba719127a88be8d60251d0b324c4aed
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/f117558f61da9a2ded579ed0f44151a6480f001aac9c082e3fc6fcb0737d400f97a3e9aa66a311ad6cd7a0b9c53dc2c37be7a0f1d7bad7b742ace2d80f20fed5
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/4bd2592a0f027b04599b35fe79dcc2de
-LLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/6240c9f909a222b7c6f51b388d1b49be5ecb51e9a5c17bbf3aab53782e5de23c3e60877208fb0117d46306b9d3a1527ce8debf3790e5e66255938465c11d7570
-LLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/md5/1f161fb77f9b880a8d825e60b83a288c
-LLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/sha512/0913c1033dd51c79db2169ea3a40a0809eea49c04e59aac7502a86cb561d7a1a2d61a903b77f92d7fbcf1f26bd9e9e956e3c88124792e9a0d8400fd7cfb593e2
-LLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/md5/78e5ea646651847d21af1a710b709660
-LLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/sha512/c8789f8fc1b8695f1dc1e49fe8d8512b5a1b23554d651479c39dd64570e3fe141018b5d168fedc5a6f5141a2e71a65dc30f87376374f8f549981e326c87ab39f
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/8bec2b0452f7c87fc98ed9ccc103759f
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/adb89f7904850c31c42f8326fc67fc0331d652d02546368e1db0242fc940074ccdf569b065d77d4803e1ad6cc39ed3353caddf4d0f8cbd92db7d9e95c5564bc4
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/2765a9d83b62b29719f3c5ee63370565
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/4e67a737e953b9147dd143d5d7b6ab6a3bf5811510664be41dc9a1572f89d75d97c4c2b334fc4c60320a8552845444ba3f12230e9843edc63884264db7d6e3bb
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/4e9a5587cbc5926224ae0d4a36c581ff
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/247c989e3843fc4da54f4c133a96d5bcfc111b84801f52b1b322b548d667657a932e94fb7e0778d1d0151d8c5cc1d0a21d3aec555254bb4d8c8bd37593554ed0
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/8a71fcf24c6758307f0491ccd29e99ae
-LLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/cbe1131e1202e15ffead552a8176c1a86e03556c9d85c6fcf886d0f32bdf31898e3d588e2d6e5723e89267b98cf111abf1bf90b1c6ec6879b7ef8c87d52a7e6f
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/341e3f0b5c160100f5e12783b8f779c0
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/65b2c2091db1364adba4fe1e4ec6b9d6582432a0a0751cd0a3aa1c69798633b3aa5ff09d3de4e47d552d55d5ba81bc86662f1784cff2ed58e800452488cf9d50
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/249910dce0a9ee089711b71626972b26
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/1eba4ecfefb56a00390e5c528c467f921d64e9ca40f5fdb4d7fe0d7ee995f03d253887f7fe40ee285f03b12fa7a194543d18cade6af8a24bf47e56b06a78d901
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/7bc3125dd810bcc44ea2d454b6caa683
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/86742a4476481b14145855ead8a5acc6397782f6d3445f900ac2de0570f1fcf53563cf5e1f3cb59886282083ce63756604f1ca2434e9e427cdc1bd1f68373581
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/4eae06d9e6272aef23afc191501810fd
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/fb75927982b1428b05b765bd5ac017b2c15d89990b7e6cb582b9e1a3ec04d09801d25d5cc6c037a12c205edb7c0f7a2d33832a2d1de7920711e9720dc3ca3655
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/cd86e18a63cd6e84a1493acf0df4e267
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/1dfefc4600368467ab90ccb527a9fdb012b9b7f485d932a0db8c4b1b81985fad931b74494b76ef2162e46280447d39a055b5681b33a17c564c50094de29aeb13
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/c7cf7daa7c11827ae4f9fb2e16f3cce3
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/dabe2940606a671a8e3b4f28bb9e813d000650203c382372142457020f2ccd498534903aa99320afb7ff960a62d752ee6cb724e74745bc1bad1051e12cf78ab4
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/62e575b89fd92d9206abebc19b084abf
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/7ac029567fd68fee82b7096e2fe278ee5cd2935494433b1faace036469c54bc471d614d0bb339750429dd88f3e723165d2dacaa627f73c3647c6f3b51a4a3034
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/5d39ef811bc78204ebfc7e98111469cf
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/10fc9a64d63351e168bc79fa63bcaa6fd49c8483e5ecc40a66216192588367e9b47ec3ea2c047e88f39ea8f1caf8052726f4bc8858223f7744606156b4133970
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/f072fe487e5d1b717aec49a6244adf05
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/42b03a2562728ac86e751abab2e8233d583baf006e69b107d002a9258844ad53f62e6332eab3790364940d478c7ebab6d3e0e2194220e8436f40e6b75063d1a2
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/eabf0239298f13ff4893011e75828bdf
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/34724d9c9a550c85d406021d7265e1848b002b8f212427eebff6e8f03ec6acc336efb0c2cd9d9e1c76329e7c84a84a9d852b8de5897550d957e0e9385129033d
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/1910b5daa31db6542f0c762901ab7d43
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c43e8091e9946ba1d8849734a25b258df95b4759a79676565b624930d4a19805a78b66b1d193e528f95174d909d7895d4a4e49fe8ca298a24dc40d25c95900b1
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/a5198b13dc75ad3454e05aa6cdaca48f
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/9ec8078a1a7246f1545fe074783d6b88ce9b50f62b0438ff5637f6dedf5bcac427cc252c350354b7063f79f4e31a19f699c168c15bc6547a207da497026c2827
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/f569654ecdd8ec2a50986ccac8388c69
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/9b50e3be1577a753f0ce42704846bd126229d8dd9f28bfcbda58c4f18e4b9ca4ec6bb9b57de61b3b9af8157a2983aeffb8af782a073e5e19a8ccc261cbea9601
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/496de8c9e2361f44ac6933480620d07f
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/02a8ecfb6e81e0fe07fb0d616a84a590e23e944588c18348c32265bf6bf19196beec189a0bc40514e379e97a9c8bef83557260839800fabe9f8e39e96689713d
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/05bc7406fd0a703edbc912bb3230eb37
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/898dd4c19dd0f22dcd1bd44264daa8dc64340c890c3368fac7451da1ac872a687d55b5eb50ae4e156c2dc4ece226ec05775daebafe9d8b53eb83b72d2986ff92
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/d6ca30fc3a2796ebda2451f80846883d
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/d7dc96e1bbca38272b1ca78b3ff995fc30434937a58815c63d0a9b4a017964cfb269a1f3203ad8374870257152229941d420f098644375b5f4d1b88fe39e0dff
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/6eb1a197150ad6c165b82c5e0e0db102
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/a159598c2bf351ea79d01e8a454a82bbd9823c080399520af3182e57259957ad07834b03c336e6225857da365e8ec1aa9f65b0ddd0821883ae817cb81f8e6dab
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/116d849cb2fb4b1c8c517397b2b04192
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/7b2596c76d2814fc30992ba78e5c8f93519442fa76004187de9830732b80bfc6c77f5d7aca042c20d8f868cd682bb6f71e3fa32940bc8c7b401753dc4ac2f331
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/27837dc854a173bd37a20f92383f6913
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/1719205cba6de969e8724a99444bf958d5a7943ae90ee2dd11193f56ddfd4f0edf6d9af6da2e67787a64b91d994fee76bd8ffde36486c5229a980c2c4ef07e29
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/f0016c21c045e205131ea22dc711acaf
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/6d192b7e21c7ee3327d288b890f4c5dd03e5f53dcba6905a34cab96b7ad0ab6364f5271af88d95e60aab8f569a8840d17e16f27f6fcdafcaf537d5d4a651dca7
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/9a2bad4518966db29e37e7c88388e779
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/b9a10af9dcbacf1f129d4e9b4cf562a6a4687252cc8a0fcd78f52d75c0c20be0ff32e67413a7902a628b04e7fac1091d35b64b145e33814899796009b6ed2853
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/77c4e24c1e44ce14bc6476954f294a15
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/d9d90a4ac788dbbc1b532623a380d4cca8813ecdf8b7b4a8cfff769499e50a1433bac618234bd0765d8a4f50aafb3fa724d16ac71baf75ae5a2b4396fa2bd017
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/b29e36dcf5a0aa05734f1d6a0afd6944
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/ab46a835f9843c5b3427101bcd0c5d2b8acf79693aa9b8d4282d499f25df4ca248a81fc94ddd96c75d69d3c6b3814b225eed81bec32fbe9199bffdd605f7fec8
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/a411269f925cc968a0438562262e6d97
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/04f275603134b0ea0f23da377e4983765885f2b1954d5c617134af9f103470a5e50dfda18bcddb836852db2382f1c134db40df00b36c8bd00e7a9e6ff1a9e684
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/841921e33407e15eeeaa76354aa2b737
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/e1fb8b75e141cc90916c5c81c31ee91336911983c525f38eab86682ba69679dfbe1f10c9b673323632fc75f38cacc2af47a3d5d5d1031ec9a2a60cebd68d501b
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/7342a1d7b1d2c0fed7f5edf1c331ffa8
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/dae8ca11fa8d34f99ee19a95bcd108a65b9e6a6ddf2e5a9b126f2ba1b1cdff6b7ec21e9590d70b3785593435bb71e47703d9765811db814a90aa8a47940421ff
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/10aac489dfa10a77427a82958f525da2
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/a87f721df4fc5f6e929a54d8e41e55fb366a051a610836923213bfa42a7f1593de880391131619653cc3571bb76a4c82e011852ee5a6005523957c9f0937e6ba
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/7f231fd359f9297261c22f95d8f738c8
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/fdd6441011609ef341108ff2d108c6f320d415b621a69922aeacc555c3d1ae6090a0f600f24e229a609b88ba9c1868900791a6590033b7dad333ad11f8a6365b
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/c4523a485082044553e1a89049dc4734
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/db365e63bbb5189f7f348e2fd51e627ddfebf838ca9dfc6c0f8a7bbf6b8a2a03d78ea3ccdf08b0c2674f4cf5a0979506efa643554091ba751f16051bdf42ca9f
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bcd10e4f3e5a4b00d52441e0094de1c9
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/b17fae89a3dfaa9428cf48c9c0866477cc75edda6aa3800702227cc9e3d6ebaacbd60cccc96acb4ccde56a2de531dea5a436bac8e6c450a4674daae23b878037
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/2be8cf274b7667adf8d967a27abdede0
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/15f58c9a00aca5bf828708089912f128adfa3b719cc2fa8b9b4cd7ff7722d02375bc9a961b02d5c6a6c9ab637b626d78876741bd824353aab944e1c3b6719837
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/0dce4be3e8cead78cd3d12ca0796d560
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/cd60b39f2ccfca8ae0a497292819e9cc1893f6c3b2162fa9bb3136187351cfb1d6e4855141f1e9252bdee7e97ad61c0560566c2e9f73fe77a26b7f4ffadfdcdd
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/f2548c8f4bf1edb488642245221829b2
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/1604986526156a40ea82f50ddd0465d06df9faf306835f1dbbdac7da7f97c60fe684cd6c64acd8833a9f8b1d16f80c123ceef94fc16f255f815b93f1d41251e4
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/1c268e3e93ab3a34b3c05322c2fb0dc9
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/f111ca82e196ea9507bb089b9d10990de1acb1a94778c40012ba6bfc16cf362369fb1f9dcc869ce14545439df21f432589ec004816a1ba0323c5edecc2b84211
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/b39ce0b0f143c3bef4dade99251003bc
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/62148e1e0a31d6b28effda0a5016d9335005b27ffdc5be1d184efcbb13f13e29eca52eca19cc6800d1d0421c0e67a36027e05d5fdc967dae686b5bfd112fb2b6
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/9475748210eb5b1947fe3aa6673b6c29
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/54320295e59e5903db558b6be0220442dbaf7ea78e1612d54a35cbe014541b354ea708679da00851b962140b6da77301e27b656fd478666d3f0f710382c13a85
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6a533054ccfc3d1b0920eabcfb45ee03
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/3871620aeea2ccaf6e4b17a675c5504624fc6d8ed57bf4e5b66e0372b7124e4f3d1e0f10baa1018d5a1ac5bc4bf0e9d2143e84827712fda1f512fed24829f1b9
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/3fc6d1b7d59b98823d6016f97835b7c5
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/745942235e40f2ab71a5eaef2768842823620d4a4dc7454a7512fb2bd95bc8a74323eec6a4b33edf1ef935151c18a20172f60fcca2fca1ff3a37b1e019ea4640
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/f069af39cbbb650e293093b5989324a8
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/34685eccd8c1cf7b72a52bf353de16bd0cac13959584217ce5d0995b52f506909955a7051ff7b29ab9d9c3f603af8f7db936f11e4bde83f5acf16415de62880b
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/819a9695c365b9365b6cdba7cf9288b2
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/4280768862b19918e11b6a7ed09f150270e71cf4560b18b224b3591c460c9375777e73e41eda375271d719f23b211daf3ed51b3c87bf4ee4429344d14f1ed7a5
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/28ae362155ce224cef605cee53e36d0b
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/d90f25e57f92a9da68245ceb15316e3868bf657d7e744f37cce5ccb4945777ec82fc5d470ba4fc104fe7aaabfff7b0dc260838a45331e4360b0fd14c59a55666
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/d10ec63510dc1a043ee0a4e37b49eacd
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/54c393208d1f51661e631cba62a21c0685fb58827067d5ea7c42fb3d6dd8c8db99d8ee1b3c304abc25510bcb0265d86ca03e1ce19be4faa252d97cfc8a1b52cb
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/2c1e000206c9e7c6c8e7515eb8115e3e
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/12c0ead798e43448a30699b5386b3d88aac49aaef9bae283ea6d089a1c66df7293f4f220a2b5c3d96e73e556e37e745f38d81f5c68e09a86a2b19a6695eff460
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/21d6c5d5e422412b88ffce50862efb29
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/5e8e17ba79134e9752c7fbd28b62e4616574a5e1dfcb0980160a3aad28a2f6cec4e48ed1acf73ca1f94d74397f7ee3eba53cb1280699e40c451295590ede3fe3
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/293fdc43431493f915a3e0a5b3c6d587
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/27e13a4334a3bfb3c91fd06abcc4eca7a347f4bffcbce40834302d153ef29756295121b42ac433c266668af1428ffa08ed12ce75f21fef44cd7ac1d8bdfd155a
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/2825dac8280d0563b7f521a9eb8c0563
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/7f4549ac7b63e58d8c149f6b22bd997545713477a1df3b32adf640f3951580df1645f08756d9ba80c479160cf5759e3f9372396655a35cdca14f4be4afc4ae22
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/0c0da0eccec4a092fc0e9a915716ed6f
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/e538e29c4d52d9aaf151670619702541fed8231ae4c7fb9431a425d10eea95433087034a37da8fe468bd27a1c882f6f8eb9549ef71964124db10e99f4b402ba5
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/6b4fd19277c978306441da3b58ab86a1
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/6216b3e1dc6aea979d8b5abc4cc0faf510e4e64441b1d18b4b36c45d65e874e9046e14eea67efb88f3219449ef048d34fcb751b15c59f8a299aa822b426d50ae
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/b7956d25e0e5ced19df637b4fadaa532
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/ad632493095a8fc3638ff48514c9902215378532c1455cb19d70da9f2ae46fdd91ad4a8b5a3151bedd38dda9f07c21f9a25d8e095ded7ba843f9bbeb005e1bd4
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/392f0f0f61fb672002c7473c64a63ccc
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/d620dcee0b20e3aa4b2fcb7ae835933b33b5e4c4b5d9102b885c70b1dcec535239eb5a3d6b56b51f7b049943a2c79950bcd4a4425610f7a1531f6c452eac03bb
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/0b41650067323bbe0c5edd5c060b517d
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/111a21a5b491a77c69ee724b37d15b0c7baea387bb6a36695a1c2dd5f6e2eedb0ed211513145d8a6ce4dd6329b2de67e9bfce1b03fbf911b906a33a39e573f9a
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/a9079da821bee8e4b5aebf47a46cd9f8
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/7088945264d1ccead492e81636086390fad91b0e071e9f3a54ef903b619ac2a7bd38fa5e0e04ea1e299f3985e04838cd5b7a2dffd666b8e7dbbf3b419f74df88
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/4ccb3d0eabf8253cbdc1192b04c78d4f
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/9d817068dcc2b60c77fa639aa7632cbf071746e7dba62fe524c095f86e88b9323c3ab82ed5af0dc8b1af9c3e6f0da18be53d92e7c05e2d056c84e5a4e974b6d8
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/a88f7a9f42d2cb5567c84d7fa2a2732d
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/9b16cbf75e9971dd4950cd79aef85396a7d8522a572f1c8017af82725cb335674741af680e1dd10c731987a321d3afd5e3e85718d3c3fdd1c9de4803e72a66ac
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
 LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
@@ -138,115 +138,115 @@ LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8
 LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-libLLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/bcdb469da04c788fb81a723ed285d268
-libLLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/d44eec8843f4b22fb8319e1a6e138f96795d82000a8f90f8157455e6a08c4706d2a2704a70ba96c33226bed6fdd27ac52da6a78c5f2eefddd98f8911fcaa966f
-libLLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/md5/237d2d1a2fc596e502502ecf35a23623
-libLLVM.v16.0.6+4.aarch64-apple-darwin-llvm_version+16.tar.gz/sha512/9619bc2c0cb38cf38a53624a0a6f2711154d409178d7d9e510babef007c7eda33a9ffa4e5ae29c56bd52c77a230c4fa74f1730150232ebf0491736ede5805c85
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/2963b428aa281238a11df94715ccc7ab
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/04f34b768de43175f7b453e5e5d4516b1f42f42fae04be511a01fa865e7a778b4044e0d0cacec4c8efffbcd7b462a1b61c3c7a796c7c8ee474085b40d59e9e9b
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/bad975489e45f75190a930e2adffdc5b
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/48cb7d77d85d9ba6826105e3707914f773e1b54412b0165da407f9248ad1b9f262c477a11b0d2fd2efbc638e9fe20102b3a6ad4e51d8169ecde0a8ded863be26
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/e47e1d4be6d503f069dad542c04f2391
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/d6e98a2effa12796f305a90e401ddff5f29df7203d017fa08c681ef0b91a807f5dcecc63ec8a690c25a664c6a02bb55a205e238eb6696dd421888ce771fd7ffe
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/9289127a28c909e8e7d5f3c44a20b4ba
-libLLVM.v16.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/5c253a38b9b06f227123fc3d7d9f2f55ca7445169f6c6f0ed73dce1b369039da6edee91a69cff88d6dda1651989685570c7fb40e2e196b0cc73e40a3e68df2b8
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/9219850cd1c29f480f7ac7545fcbaf50
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/42ac83c9ddda05b0ff83159bbd72190daf05d6573f715ea7d2e0c015365ee81471069e43b2a58be4ffe340139f4ef5902c543820920537d0918f15ad9510bead
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/1ec5846ddedf6c8a57bc3b5633df9626
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/a40b5b13f3ceb0a261e7fc28209334a6ee4fd7d9490535810b1471f7dee1a5ff6f80fa98c4adbe3f56a6fdc10f2b2b22d835b974ef8bec4909aaaff4a87f1bcb
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/1f65b8354b9235b442400560acd0e7c3
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/c2b82578425e2a378cef3a1028b1a55492d3ef8aa4e2509ef06ba30475b95d1896764a3745de4eb905711f57b9ac7fb451d5d5f44afc5c95c9a0526a08197f97
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/57b2b92268a1bd13f042cb8cc4f652a7
-libLLVM.v16.0.6+4.aarch64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/187a925182cefa8eaf42b9b4df42e9b5999deb27d864887ef65ba97fb09b1f6fbac1b0c7675aef0df91dce5b51adf35493bb6edf1d5049a73d1b76eac26144f9
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/04bd3427ebb23c98f580ab27b8b73809
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/fa2fc5bfcb2ddd555ef173e95bcde24320cd5e54af7dc3e24f2ba6828753cb52f2c59381191d5b14a76ce53634203c40d126d533afcd1b620b718a75e38a51a8
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/c4e68d1829702b7cde26bae87817273c
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/4f656cd0b95f7bbb17acb49ef125c46e4d3d9e3336db5dbe80322e5089fad53248172acbae068b35ea82204839a11309336c8296e672c2d0b4aa8778bcc003fa
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/9372fa9a23ad5efe86fa002834cd8b71
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/6eacf49a32264fc88d5a034a96c6621e0ff9e5ebcfece0c702718c4d2027adb02ecce4a11096755010052ee4b61f99216f0c3f4bc3e31ef643dd6eae028220fb
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/535eae6a95d4d6be3be3b69eb180b4d6
-libLLVM.v16.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/df9820469c84d9c354dac9ac8ebddee3df878602ce2abbced63e77dd0f67e7e12929003e4c62def63be5b8cb90337707535bbebcca5fe1c730bafd90523fdf3d
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/b82947eae6dff1fb15445b7dfd98c0db
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/a314e0297f60e55952960889445f4a9347782d8416644cbbabffef2af08cb1c9e59940a6f59b09627aba8783287749489d94ac2614aef4bd7f58b572bce146ad
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/78457fbe2cb70afc8db031808080c732
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/6ffb4628c4f2ec4651d0605709a67a5d5b86f1fdbd27a47551adef9b95d402c6bf0cee916f06f8d07de82cddb7e3b9fd5a557f98197e0c5431296917f6294dce
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/72725d8f28c4e22596e8181e632b3d87
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/90dcfbefbc58529616f64d7b1add5d8cb4a2e01406f7897aec64003e6c73b822be50c63cb8c2c9805d73cb3e7cd729846a39b6813d4f11872489c88d298249db
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/494f84c84942b12d0bf26e4d2e8e946f
-libLLVM.v16.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/b5e1f1e2be533e7e5cbfb2dcec3ed573e4d6a7ada20442ee29122a2ff4d79246f140b7381eb33d56c436f893a65626a949474fc7d1a33502e40b857aeb5c01b0
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/6f7e0ad54d6bc43bd539a3026c2dc564
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/c9a4b83795605d0550c8ed065e94df2523a45ff451b6cc894300b642cccc2778b9134e77a609ba62aaa838761c5b1572bd90436dfc96dab02319808036358a59
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/md5/e2ace2d6f83a72208f4475c6b19bac4f
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+16.tar.gz/sha512/83b537561732308f126efc2b62433b3a0410d6e33834e2593dfdd029c1f874287c4c4c005564f64ded7b7127f839522d24626077f260b0a79cd9be64f9773d32
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/3f92c8b09401190a5b5d7699b3e6e5e8
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/460e99bf7735ea4dde2adaced43c66c9e91a515e13a516775fee0a02a75d1c1270ff7390d2632af683912ffbef4e142e9d55e8870c3307fbdb4261515ccd1447
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/md5/dfc28578d1a2eaa0fb95dee5763024e2
-libLLVM.v16.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+16.tar.gz/sha512/9188d85d365c65cdfc487ab354e7593193efa3be09b5dc95701cb6244ba276409aa52ae0dbd3907be1b6e156a7d0506c6fa2ba8b60035b774d7ca5451233b287
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/md5/8a8f929967e0d26152f145d93a79a4fd
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.asserts.tar.gz/sha512/9d474211b4ec025a56fe2a77bb78e6865fe14235620accc5110a4170a672295419bab55c750d2f7fc0fa1cbecbfa959e07126b8355c95231658446697f489b58
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/md5/a8f7ff725142e3de01b6d76ac20fad71
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+16.tar.gz/sha512/f29e03f0d7cd5610972c5b1538fa5714484641ce1281871d9f960f026ae9107076cfb9c45ad6bc5cd65f85cac395d978222c95600f4c91556f9386a84ce3088a
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/md5/f641a49cd631ade07e66c46c30c66570
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.asserts.tar.gz/sha512/2081495e58ea755f43b76cc4ebc1d09e726f26893e2642b036165f75d417184e2038f97ccbe537f431aead46f8151036cfd4afe96a32192395e48fada13c83aa
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/md5/51b06c324130d8e8935bbd8a0b5a5740
-libLLVM.v16.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+16.tar.gz/sha512/4150ad22761ed19493338d6d155e8c5ecdddf979b212f0fc6cae5ee3722ab4b6b0d4b0493778c665b81f2d4468e43589745b14b270b32953dfe71631d59ea4f5
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/ed66dbfebe14efcdcc1b71ff3a491fff
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/550db082a181cc53e5336f13cdc34c469f9d5934caf2fde41935068fd7bddc74102b822181d7cfb38848e7409e38975b100bdc2a96c112df335fd91ca326a90b
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/703841665078d4061bb8f6839c49546f
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/c48786852da4b367356fc87703ec2dc4efbe9bbd66a16a1edff2f60f009ecac9fd95c54fd5de2e0a56ff8c59bcbf24df2609080c245aa8da166806a55f9868ec
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/8d8bc21967705c1310219f9f91699a3f
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/52c44373d708f80409a1a83271effef97dffcb4828bf6d7094119378bc625cf3a2f3f7af649a590dc827fd2859ec3d302287cd235b65cbac9d1273ff241aed8b
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/59ca3888cdbf9ff4b55d7a1092bbaba3
-libLLVM.v16.0.6+4.i686-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/8b30cc6eecbb496bfdaf2b9c81d5f0a08ae1c02f2433e1bf9dfcbb936d438e12c2a0de99014f325d76dddfda9b833207f615fbcd1d9a25cfc393593c32478621
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/9298b06192d36c20b56f76348122eec7
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/ed173f7e0e7663f2186a519cc53429ad326958773a2211dfaff15ac20326617bf85c1233019b5ee996071b1158353ceea307c6ef2f6f617b81771a69072d3c49
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/b267df02f127fbe710a75ce1b24ec70b
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/4d78a547ef718810b4e5f2ff81e1edce7492808218e3313587afbc25d59877a2529df3405a9000eb254f88c59822da5a00e764153143b9f7418b0f4adc46c5c9
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/8719ff4479ca28614a530a97540be161
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/d913eb261dabe4074566cb4620d7559613ac539b5f4fa5bde8db5d93c0c729ed5c59d177972f283a449a24df0ec76548ac7db26f8e162d7f08619e9bbfdc11cc
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/f3c3cbbaa2fe0289815dd022608721c3
-libLLVM.v16.0.6+4.i686-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/951adf71911a4b4ec8ad2342c6d4e27797874499e10b8467b56cda5d36496130e72db7e8cda58f0512e04d94c1c144bd66d076451f6048c4c399a9af162202ea
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/f9ee9ffa905583656c3405f553667e06
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/91a96e8b8eba7d6505d6bdb28af993c7f8724a61c0d7179228035cd631535e5307880884249947f481a82b4f6a37f91c3cc15025383c7ce7302c010eb1130f99
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/d80972a0609c688f0a1064cb995e84ff
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/136c5f35ad7c41080472bac15f1a433134468f6b512d4ab7d41408da1d27a3aa6b0e5a6480ec185e4a1eee84ab222231f4de21fae7f852a324dfc702a80afd25
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/cc16d38262b9f3e1057d50bc57739ea0
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/c62bcbde36850de855fe7c0e010a2a403c8b923da1c1bbc8384410e356c112186d1dc874f664260a8e819b0bde8a457746a32a1fc988b103e1277cf00912000f
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/70b7a3e70f0f6566e34dcfba524d3ae7
-libLLVM.v16.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/ae211588739af26ce61f2503adfc9e9711ecb1db5b8ff9072086accbfa55de8ba2086fd09d766d2de15ab3fa6a6a2790be363f20f03b146bd81764a8c6651506
-libLLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/md5/f2e71e81105619177a8d007f6ad8e9a6
-libLLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.asserts.tar.gz/sha512/dd4f6ee07f69ebd2f49da99d3de3b4394d0520fae6e506fdf4a028e3602c147b0a725f02b0e4712e6568815a460876cfcc05aa453e10ca30f9378925ca12b1d7
-libLLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/md5/fd64aaf12cac1e97a2d6c6a8a73b6bdc
-libLLVM.v16.0.6+4.x86_64-apple-darwin-llvm_version+16.tar.gz/sha512/8917408e6f023de7d1f05027d389d9aac7f77623506e9d621487f601e3bc53053b99510266d31840985a380082e7af578c29c400a6dbccc31165bb90bf55bc41
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/md5/d05a0f7df67d362e0c48e6a81a7efa73
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.asserts.tar.gz/sha512/8a8adf8b13b02af3ac4ab29e314f689089359b35ac088cf30e0ae8d4d3cc24ca7d63ab908ecc18b6617935a01909e4eae2cdc392b6c7ee69fa0f3cbadff41349
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/md5/d84ad34377aceaafcfb2922141b06790
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+16.tar.gz/sha512/785bf236810f8c9ff0a6477c978059c136e936150e97bf2c45ec23b23accc0aa2df28442cb456d64f31350d1c49811fa4997029fd9f1044890284be494d68500
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/md5/275083a9fb3c2cf281f64f08c840249b
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.asserts.tar.gz/sha512/01598d993a34faa33948d5a56a91401e100018b154112431c4b124e6e5356afa89870699006a6a4358d1bee1bc8155a1c96efba1c62a6fbcbf796087e1334f41
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/md5/a7e0e0734e86acf47998c94441922a68
-libLLVM.v16.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+16.tar.gz/sha512/a96b8e9fa2f34711c7826c4a8615d326fba3b6857ee77b9bf6120dfb3f64638da54fe50c7d6b2fac5aed77755da8d2529ae45daee7f1182b04f8248a34d02f9e
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/md5/359b3ec71148c1467f3c763e90b5a7ce
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.asserts.tar.gz/sha512/673dca9861ec87cc349678fad4b29489cda2a2f8ed02eb87baa893055e9574a0457c69bcacb17a8b3899a380dd43c44cd9632fd6e60bfc26625ceaa8ddf56385
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/md5/118c299f09ffd7946ab66844e056c9c4
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx03-llvm_version+16.tar.gz/sha512/880df9edee1f67ad1c68b504f03dcac492809aa1252589634300441b469cb84e9697771fd0c516ce25da5223814303b0c1d2bea48342d2b42bdda028272e9cbe
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/md5/ecb34de28c31cdcab980c45d2c90faca
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.asserts.tar.gz/sha512/bfb6cbb1ef7a712cb36c7d26de346b3bd7d10f3bbc8fa412b23e7332d2eb432a1bbec338cf92d7899d29036915871ecf70148468ef06dfe0042f5ae547505b1f
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/md5/eff0891d113de03516e229eb0d9d9c33
-libLLVM.v16.0.6+4.x86_64-linux-musl-cxx11-llvm_version+16.tar.gz/sha512/078df9260d75354eeab5c6213e3ae926aa4050bc5c3f8691fffa4ce83e32648fefaa78bc67ee3d76a42dc4bad432ffc132e0733d15fe5f85a60f344fd51be639
-libLLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/md5/b28f51c7f01699b1b7488d3c9b7afaa1
-libLLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.asserts.tar.gz/sha512/297895fcbfd2b46e62e9e685eb08aa7710177a753ee7d6f3211dccd790f2677c9c57f8738785c35368c43dcedabb374ec57c7dd54110e0f153b0b8cc4b40bd5c
-libLLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/md5/3822dc96d157aa849a5c73bafe221b19
-libLLVM.v16.0.6+4.x86_64-unknown-freebsd-llvm_version+16.tar.gz/sha512/a74d9ca78fa7daab47a9a0191356803ce98c8f3d7180073a949fe491d4364f148ff3fdc35d4960dff8ff7f6ecb65b31a471b171c7cab32e89b8252827b059775
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/md5/ab93b1ece593bf2fd37902d7cfcb26b9
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.asserts.tar.gz/sha512/3ee486621ec970ba991162a7ac80bc75be46b528a277d256b385a15c96018077862d0708eaa36234530d47d2f83fa934dc7993d4aa32cee0b2885e1c45cb16ab
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/md5/22a246271f82a70c9a72d8387fa340ea
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+16.tar.gz/sha512/f61f51bb3b7c9d253dcb40ca6a478ac44a6c47bb1f81fc7f562ee682d16081b2307695fb4feb0f3ca495fb8349aafd2b659a4f82b395f7d82e02dc909354b780
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/md5/f642a67480e79f269e3bbfe7b3ceed33
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.asserts.tar.gz/sha512/45930e3ae3bf3922c8b3096474cbe22035432e80d2b0f4ccbd424c69e874a76acf4a724e055a5ef73ce40ae00ee6ede60b210b8847c30239a6ec242550a3e464
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/md5/344f2a9d2b85a6c527fd6fa024d6763e
-libLLVM.v16.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+16.tar.gz/sha512/73909c58087568d8654d48ed5fc952289d8b3a4e26e956e9220cf254b8fb2c4b6630ebc9258502a6c3b67ed3cfe0cbe3a92ee3d346b228d842af03f0a42794c7
-llvm-julia-16.0.6-2.tar.gz/md5/71ca2225b042ec5ae666c99fa1bd19b1
-llvm-julia-16.0.6-2.tar.gz/sha512/6f2513adea1b939229c9be171e7ce41e488b3cfaa2e615912c4bc1ddaf0ab2e75df213a5d5db80105d6473a8017b0656016bbbb085ef00a38073519668885884
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/2ea6046caf5a3d519ab1c3309a2eea31
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/079720b30c61ded8499eefdb314477d58bd121e9f326d98696ee39b2ed91f806d5f67e68b6fbef8613a992175fe34694e5efe83e87ef3bfbed67d6b7fc41ebf9
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/62c49bc7767d1ff114dc6b6a996449ae
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/c708472b325cd73b94e10003bf3267b0ecbf3627072302fb22e78336974f2c7855c8597420efc954bca30aee17cec55277aa0c95a01cfff38d5d77df50c807f7
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/766a2de98d275877bb676ff1f23e972f
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/3b353ea038fafefc13ccb4a81c7242d569c206362605be374fb312cb495f385796d052c3a7e08c7fe6ecaa3018e2a7e3dfa43d71a8c3a94987f7dc7aa378fd22
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/0684a6b210b799a8a0f45a286f3dfcc5
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/4221e2d74117bd7e89aba2945030c1507e51999b236814fd23036565364c328392e87032daf1b9fe274ed89fcf9a6dcd203f0f1c8602c2a08d3fcfa189a5fefe
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6b460256e923637e5107d67859eb60ba
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/7d3f2736afe4022842529b1355cf9914b7a1c7b1e261f814a4523ad30a0cf0189056d5117a06720bbb7a844a435bb632ddbda2daadbf7e01c0120452cd13e6a3
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/c2b13a6a296adbb4be91dd3bb5be0877
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/9086937e718125afd535b0066ee08a3523161a94fa7ef3c9a3e86bfe760f251b6ea7b035888e61a0e7f192ed25c9bd0f4dc153df86e08569e7067a7a30ba48c5
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/758d33fe0b2b3d0371708614365450e8
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/79a662f72ba1b89b373d1d143ee880a12cb128211e79182e7befe8b3e50298b594de2ce489ca8bcdeadb17fceee811622f8bfcbc3e232cefdaf9927177469eec
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/2dcbb811be8985bfed3c8b37733c0d40
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/17f6fbd96ed5029f360c101cedad127881e14b42498d66f717448d99ca1909057ae79169d934e08157edcc7467db4b3941bdda26a2e9f42645963eec51f27e29
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/bd3b904b5f9464aaaf87c41b899c8ca5
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/fa99e8025419a18f548f658ea589771c2803480c3cb3a25cfb75e26ed0993b7b37bba204d7cba1475319a71159813b2b58a3b3327ba24d264cf80ef24263628d
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/b4f9038d5c3c13207111ee1a9a918cba
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/e8b97bee30f597cc06d31175e12f0c2035aef0054e8abdb431f31b1e9d440d561bd9bc6637a403441aa7f3e1d2a46c600734e17e3b7ed0ae899c92df91758780
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/06d8e634b4a6914efc18b7962df52021
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/cf6aeed1eaf652e5830e34dd2ba88abc33668953281146106bbfdbc92f5f225645f00ff5b4a0eb902baf904362ab4eb32192fa50ee5b2672e8b031fe2550f9a8
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/53e83804b63e6ae4d0f1c97abcbbd1c8
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/45b3ee9b105ef2ef106fa8ac7b8e902cd1d6bf3c9bfb57edeca9e14f1654714d23fb086b369a9fd3cbb828c04fee4cfe80d2b2a2bfaa852d3ac65c0d213d8c62
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/91b6cf00564053d385e30b34e5b8778e
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/9111f3f02b49bf78340c9b0c5c1325a1ca09b62c83aefece1121573dcc21dce095060351f18997971e5cfbaab346cb12c75cdc0fbe8fa92aca2e8a68b5f5f577
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/f6c91b71dfd73c7301a4e3de48e072de
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/581d7e1e4d85aeaf082fa31555074471705e391de0771bf66665807afb5192c79c481ca30e73a25f4e2d48d4d325f0198e39bcbfaed2c9bc7477ee917667f5ce
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/ce41ee46959e5e3a17b6c99293afedb7
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/73d8c5af750ea9deef822aec58d8697243ca154bc4435ac0b0ab8c90fc97750e91fa55f8de7b8283eb1ab19951cda3e3c4c60834bcf13730163e593126a8eb57
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/67ed5b654852dad400aef17fb542703f
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/07f70c57e27eea37f520f6f0a954b54d2506530d5eb5a74e5a8526ba8ef55a948073c49037544b602d03d0aa482704292eac943f0a83421386ccbfbf22ee8510
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5b8bd88d49ce21e5b63af6f77782eed4
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/cef1c561ae388b2baa08e39dc195989cb795d8a2747f5f11e0dc9d9e107b9e99dbba465335376beff2e1b326512f6afc962775e0b246f3edcfadf509235cabd8
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/5fbf26d20b2ce3f61edc9a9ca2eb5284
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/2c564c95d648458b9a0f0c963246cf5564c625107682f680390b6db5fde0e2b15a964fd3fd23734b5b2bb135db1fc698812d61b3f275710593f4defaee4a9c23
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/c81bc29a75acf4f806f3eb13bf890604
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c8c922a0a4fefd549f1c2ba396a3cab9cf7738aa82e7ccf7ca29c090260e2d73ec45d6f2b07173d584f6074b10fa04052114deef6ecb6f53ea87f1924074137a
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/1fcb40ba1a427105b4e7d13a6c11dc78
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/392c9ee85ba7ab6697bb8979c7f443d1d25f7ac9178e96a886401cfc68d75a43ce98bf3038a7ba70a9a990f65e604d38e043472cec3badb25fbd1b38cfbb7162
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/427a19eaf69725d11bb33f48de9cb205
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/542e209b10c13d8dca867247a7414f84adb832f40051fcbdf0dcb09bc9664a77248e1b0ea1687805847dd9f5a05b86475dd76aba427c9a1bc83f8502444c60bd
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/ab34bfa2950014936edd13a7b5db8170
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/6376b25d0278e5c97581480fb4d54371b09a08be88f4cc39d2c7b3875f1189cef60c1be6bea5e12b0cf306cef8b394bc7d00f8b0fd95d749bd1b4eb318af7e15
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/cb6300fe87fd7cb9840f3bc44af26878
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/a7984cd90fef55559142fc05d91b0da1f37f77f25214e93ff7641b7c3958f08dc7c082611915dbfda4bbbaa392656ac8604d4f75369777dacfb78baee2f99b16
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/b8a4e8ef43340e9cbdf5e4479c6a5a56
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/fc249f2b666c8a8129e05ea08c773cbeb7af6d37791f271461eedd99adcfc5082e8609ed096d8a46edd1e73505352712a41e0ddc247a371f78227aab01fbe0f3
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5864689df3298be4b1b4df1ae0412d3a
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/8f32f73e366c3a6993fa8d6b8cd1a9391611b0644cd4a77a4f7a235c037fdb75308d99b5a23ada6e4a73ed5fbd8f929a981d6bf317d79d52396220c221619303
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/6bf798476c4e94716cc47a95580104ad
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/9dbd27a000dd3c3dda9047d366a667c4b179cc61582525adb0f8227e8055413ce46efcbc1530305400239656e2f1016fb8833fb7f4734714078e035d388f3531
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/66e2889f86ae6bc1977419e6d9be729e
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/d0cac798c4979b4d818d36596b173e523cba3f41ff7ab1e2111f6a75c3e819e563e207a547328f005c5a93c7f8f88c17bf43c1139b5c2690df4f1d719f82920a
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/0534b72d6d33c8573f79dce8a2a5a6e6
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/6beaf1b45eec8b46fbf92f692f53e6df40bf48e50589aeb5ef99240a5a3ec9089ffb350dda6df24530937d613bf6d2cc4da76e92921ea00def9d2d38ac5bbeba
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/2cf9a1ca20472179ce4a9eb3a949457b
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/cebae06ccee12a14d20d3056ce0519b1e774e3c9d9200a783262fcc40aee6d7aabfb08714bf53b88e03d8b09a96d3cda248a70c16188f8c707b291642998262a
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/4712f6a46e0ff407ece958a7701511b9
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/9a0a2dfa2076b93027f766277a6890cf94d67c131697f74945e92cf13ae64e84c09d3dd744498986fb22ad5e5465300aa9c8ae6632fcf919a0932515edfcc1e6
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/b944ae477232ef10d213b4c7743280fb
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/25ff757620baaf6fbacb375b103dc0dd9af6a23c3d3bca567c182a6357a367ca125d7b6c66927d7db23816865b6ec783157352fba08532336de467be80efcb9c
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/52345a44b3ac74b3cdf93852bbc63710
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/3e5b449b0f1bab302c45f9ee9f04d2cfbb01ce24e86096aa610fdf360ad65828f1b73734beb28b3d3c249ba8ef657d2663c5492940504f47c973038733b15248
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/36e058b96771b4cf77e29b800227fa03
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/98873cb2963c4469b0f69ad1d9d9e27056aabfb46a2642dfa3507b7fe2f0b0fc41c3991a2543125291783699e39fcbcac0bd6e92fa8f0df97609a85c340fd25b
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/3b3823fbafabea289a769958f633dcdb
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/91a9c1ad6f37cb1186ba3392935fb55d49e0f8d6afc768cf881886f9b1d8b0a2b0ecf0c81a8e32e36d32cac04c065ac852bdb95ba5ff6780c00a763583a02973
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/bbf060d61b294b86f7e3dde381b00b8a
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/632372d41f6e400a10fae27c6cd06a5a344cfb5902cad7928cb4133f14f36f0a3373e69e73ce9baf52f518340593c3a5a16173ef59a1878e6300e9975aeaa157
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/3d730b713e01cdb5a7a5a46028afd41b
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/052ab4fa7ac3b2c430601753ab078cdc9fd6db7f65ee0b76bb05473f4c5b99ec8919ad9d347425f1928cf619548e992c86ba97f9994218f50bca617e43d2f0d9
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/bf9dcb92ba8c031ae62ed4434fd5447f
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/e53be14dd02a2cef8eccafb9301d29c51d652c635703529c1444947002993f6639083eb8bef13af21c9796717ce4b3129dcdcbe2751a1173d39e321db8f6e3c7
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/b5cab0fc7c6643c6dd161f1e553ef1a0
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/4032634449e2669479761c4323096b152f8df4948e3a97eea10f0b400fbf2a00d1edda59b74a714b62c4e204b113d8ecda78d828c3344ebe8bd750d14b3c4c7d
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/054e06d882173ede2886c510e8519c80
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/eb97ec25354badcac1b8a4a09fd9e04cfbb7d35493c54cff82af9ffa4c2dc5070c9232a86e900d6eb9acb03f1c572fcde8d2a865477bf6c9fbfc139763a9dd1c
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/f1c23200365b659f0dc07cc6d0a32c60
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/fad13fef7e7584b3f756fce9125950e788e79608cf5d0c023cb8f8a4e79001afefa8060f7866875e4861a268b3020e50305e66bf472360c1d92fce12d7a81ba9
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/69564913bae176a167d24d3291ef7af7
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/b8eeb86b66d767218e59671bdd597623238eea72319913c2ac5e116faec3f4c13739a24f3b95338ed857ec29e714dc0308e4ddbfe359332b3c27ad5235052342
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/bc9d5637fe30f21d2231a98371e798e4
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/4efbc2823322abe80d0134d35926767bd9cab717cde9308726a6a8891e5a707476138888c695ed399e3dddb57baf17abbc43a0a338cea2e5c0f472ab427c12e3
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/8492ff91e6dbd1a66edd8aaf0390a582
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6443bd2fa9c5beecc2b002c26595f2cf3a8e2ea5eb49aa4c00f7252a6623fe0f8c01824941ebe5475460641285c4e56a5203056c1b93a78250b7e48fb5ac9e00
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/6918c9978fd8b5887c66eee76950478d
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/d455a4f433bf3ea1b5100b9d45199bc785e4b6fbc7659bf06cbde6ada471134e7d4243d3a3a1f71d579126ef8371d70e59f174e124b3ff8d4842e9ee83e2dea4
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/075f87d106dd95c8e9c6e7e157b5e9db
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/8132379d8f44a21082c7a90f58a7dffb0c6ee725efd58a959d4023787411b080d72913bb1e89a35072f97aaf1ca512ab1d027b37eaed819e3c053d7a0cf64269
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/4cfc2838a77f05883f82e50b3723dcfe
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/20079c81cd6a4020b087485be1ab4928b3bd3e1a53728cc98137a35b969484278093bc75a9e51ddfd8331556577c5fb3109d74dc2eccffa93b5390e0fabff2b1
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/5b8cbf00631bd4540b7335a86302a1fe
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/51ba9a4b74b740905cee4baf7f4e5f3620ed81e0746f49cd352d874ebedab95277c5031123f880c9239b7dbf505b10f6531f79c8a6b0482a652b8324f4137cf5
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/11010cc2d58b1a8c6a6e7bc24df0c0db
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/a6bdd9a2a2fa9a572e74ced69c3ce9d1b84cde18155ec9bc7dfbaba411ee6c43d229e6fb333eff66fb63b632b485b46b7cb1657c0c49d9d9bb849fa13f0bbc7b
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/8afe26d16d9fdb0fe6c0248c51b4f053
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/32a92685f417c1887aef3cd8a9cadccc4de3e560ba8fc42e8db721f273a3451927b24dc4a2c2e83446e32a84d47f714fc3c22ce71989f2e97c5ca23a1783b8d6
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/59d8d911907127ff56f5eafcd8663300
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/9b0bf6f9d8d32ccbec349c249b79fd0fa3b4949c04b69c9d408f19dfa3b4f00e5cfa51b798234721f72f2793161d6af6491856e10e6a507976b0da6ed7a8065b
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/b0d9a7eca92d40ecbfa47461d52659e2
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/dc4a91e164d88ff51b4a642b556d5767156f28d1efafa533f5d7c619e05535e2000afb2ea47469a90f5a19f970e8f0522f35d59ec250e2f9b42ce22fadb9ffd3
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/92a60309ad33391415c6703edbbd5423
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/2fe90ac804d94bcf0d4058a8b8f0c274e405ffee7df0175f5e7ccd5014b29a813af48152870e1af0a79df8d3eec3118c233bc4f5b3f8439fd9792931140ee944
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/0964df17cb98d2d869a33468477f9901
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/2c062acd62175d32dda773e9116608ced814a64ab06ea73f89958437178e2603b268638e88162fb81c22e5947cf4cc925b1af10c6f9320be22c92b279b278992
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/7dfb8e61e972c66f1d754cb979bc0309
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/d462b6fe7aea75f6fee6c5c2f24576569b5deac8027fb88240e16c55a54d68b7dcb06b3ec4ab514616fb88549fc2f10fb1d587a641d6f29fa66273904bb9cfd8
+llvm-julia-18.1.7-2.tar.gz/md5/5c0ae4abc4ce31a86d5d6d4ecabc2683
+llvm-julia-18.1.7-2.tar.gz/sha512/b4d1dde929a8670eec1a9b25abe23fbc926a922e61b60ed99b52b440cd07cb026e7f746878292db4cd0cb422d9b87ecc4ee4b2b141f8e9411855d18da51facb9
 llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
 llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
new file mode 100644
index 0000000000000..a90d28717dd85
--- /dev/null
+++ b/deps/checksums/llvmunwind
@@ -0,0 +1,32 @@
+LLVMLibUnwind.v14.0.6+0.aarch64-apple-darwin.tar.gz/md5/d8584e0e3dc26ea7404d3719cea9e233
+LLVMLibUnwind.v14.0.6+0.aarch64-apple-darwin.tar.gz/sha512/7a0396eaace91b9b4d013c209605d468a7ff9b99ede9fdd57602539a6fa6f3ea84a440f32840056a1234df3ef1896739ea0820fee72b4f208096c553fc54adb9
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-gnu.tar.gz/md5/d6edea561b61173d05aa79936e49f6b7
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-gnu.tar.gz/sha512/9fbe29ec6a33c719bc9a4dd19911ceded9622269c042192d339a6cf45aa8209ad64c424167c094ca01293438af5930f091acba0538b3fe640a746297f5cc8cb3
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-musl.tar.gz/md5/3ec68c87e4bddd024ee0ca6adc2b3b96
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-musl.tar.gz/sha512/be3cd9d5510c2693dee1494c36c479d32311ff83f5b2d31c08508a3dd370788961ce46e9025afe148a0febd05942fd294370a357dd717bee353d8a108617f6de
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-gnueabihf.tar.gz/md5/8ca5a926d69124225d485d679232a54f
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-gnueabihf.tar.gz/sha512/353f540b342bc54877e7a41fe65c9eeac525fd91bf4cddbe1b3ec2ed93c3751beaf8316a4d31530502b067100b160301262e10cbe4407db3abf1ceb5d9a74eb2
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-musleabihf.tar.gz/md5/4e5b576958f2a2e708eb5918ceef0de0
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-musleabihf.tar.gz/sha512/2e98c472d3ee25c2e062efa4eb21ac9cfc49b26ea9d99ad4a8e7660c4c09f121d31193bd161f54ea332ce94785d601897311e9e6668adb1e25e2b666e0d5bb3f
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-gnueabihf.tar.gz/md5/1c81a886e799663ce8d04400c5b516a9
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-gnueabihf.tar.gz/sha512/236b78b9a17eaae74ab07349ac8dde16c3abbd48e0d075abd1c195d60efff48e2fbf799554df114ea3d3dba937e0369430a2788bde2a1201126e026ef6cdac42
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-musleabihf.tar.gz/md5/0371f43ebcb571d0a635739252b88986
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-musleabihf.tar.gz/sha512/605318ae3737e26ff89d6291311a7db3bc3ec7c8d1f2e72ae40fd3d9df0754ee2ebfb77687122605f26d76d62effb85157bc39982814920d5af46c124e71a5ff
+LLVMLibUnwind.v14.0.6+0.i686-linux-gnu.tar.gz/md5/cd3f1cdf404b6102754ced4bd3a890f6
+LLVMLibUnwind.v14.0.6+0.i686-linux-gnu.tar.gz/sha512/65fe2c5b1e04da1e1d8111a0b0083fa0fa9447eaea7af7a018c09fe6d5506566c491bbad296a7be8c488ca3495016ae16a6879d69f057f8866d94910147dee03
+LLVMLibUnwind.v14.0.6+0.i686-linux-musl.tar.gz/md5/abac9b416d2ba5abcf5ce849f43ffa96
+LLVMLibUnwind.v14.0.6+0.i686-linux-musl.tar.gz/sha512/fed677ed6f103c56eb9dd4578fa37a56ed2a4bc803aa1997c5af19762a623d2f82db1f72f429448d66fcef3b37af2104e6cb782f023aaabef086a921a862b042
+LLVMLibUnwind.v14.0.6+0.i686-w64-mingw32.tar.gz/md5/4c71ffd7c8cabb1c0ed6290b193883c5
+LLVMLibUnwind.v14.0.6+0.i686-w64-mingw32.tar.gz/sha512/6b1421a3268170467225112167cdb33fec962181993a2dad5594d4ee0623ac88ee0588cdc7d0656dc1cb9129ef96f621a97a224731cd161134d7d63c8fd32c16
+LLVMLibUnwind.v14.0.6+0.powerpc64le-linux-gnu.tar.gz/md5/06faf505f0dc354afcd01113cfc57af2
+LLVMLibUnwind.v14.0.6+0.powerpc64le-linux-gnu.tar.gz/sha512/1f9dfbd403e2ce121e126c217baede178cb1323012bb5e3cd1f778ff51e4216aed9dd69036e2baffbd60a6f5ae438ddaba6c13809459e94bb00be3f7bfc8c30e
+LLVMLibUnwind.v14.0.6+0.x86_64-apple-darwin.tar.gz/md5/516a11d99306e3f214968a7951b07a06
+LLVMLibUnwind.v14.0.6+0.x86_64-apple-darwin.tar.gz/sha512/885738599bbd96f20083f9b9368ce3f243bd5868d3ac9a45189de6cb40b6664a6dcdaece159989e504670231db8c2addfa8d544003eb0cdabba960e4ab6a4470
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-gnu.tar.gz/md5/d851b90ea3f9664774316169fc494e21
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-gnu.tar.gz/sha512/a1f529454f0881baaa508481ba97ecffb040fa92141b4cbc72278adcf8b84f0766fa918aea7fb99ce690c4fd80c36fec365987625db42f4e7bb36ad24ce177d0
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-musl.tar.gz/md5/dc4e86eb2effe1f6cb0d0ceda635f226
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-musl.tar.gz/sha512/c52de384853890f9df81aa9e422c1ba3fde12b2ae9c7b60b9ecdc6d0c88eab495dd336af2b6cd2c31d6eddcd0a213954eadbc7884bc39ce2039cec672eac32fe
+LLVMLibUnwind.v14.0.6+0.x86_64-unknown-freebsd.tar.gz/md5/8477e3624c73a820d8ab82a53e1e10fa
+LLVMLibUnwind.v14.0.6+0.x86_64-unknown-freebsd.tar.gz/sha512/32ce031245a5b59a779cd77fa3c9bf05ee59e48c913b75d4964bea49f37da232c59a42ad993f7b5edc88322148c1d7394984349682bfce3b69d33a51756ac8e3
+LLVMLibUnwind.v14.0.6+0.x86_64-w64-mingw32.tar.gz/md5/7be93eccbdb0aff427c43af651073d66
+LLVMLibUnwind.v14.0.6+0.x86_64-w64-mingw32.tar.gz/sha512/89a61a81ec664c72107ac09e717200b00434350bf77064267180bc0c101a59e0ee8c8af4dd6fe75eacdeb14e82743c138b2fc558ca08550d8796b8db93f89da4
diff --git a/deps/checksums/mpfr b/deps/checksums/mpfr
index 1bb4eca6bf4ad..050e9cbd8d5a8 100644
--- a/deps/checksums/mpfr
+++ b/deps/checksums/mpfr
@@ -1,34 +1,34 @@
-MPFR.v4.2.0+1.aarch64-apple-darwin.tar.gz/md5/f9393a636497b19c846343b456b2dd7e
-MPFR.v4.2.0+1.aarch64-apple-darwin.tar.gz/sha512/a77a0387e84f572ef5558977096e70da8eb7b3674a8198cc6ae35462971f76d684145ffae7c2ddca32e2bd1c8b2ccb33e4447eb8606d5d5cd5958298472b3ea9
-MPFR.v4.2.0+1.aarch64-linux-gnu.tar.gz/md5/ade253017d195de694780c32f9161dcf
-MPFR.v4.2.0+1.aarch64-linux-gnu.tar.gz/sha512/1b68de5f8e557b7434c8c1bc016227b58683b56c0977b763422ea85a673bec446fcfee3a4f69e1d4689abb9bb6bf47f2a50fbb56ecac6a9d40096e66bd0f2080
-MPFR.v4.2.0+1.aarch64-linux-musl.tar.gz/md5/7dbd121c7192ccaf7191de5ab8d91afb
-MPFR.v4.2.0+1.aarch64-linux-musl.tar.gz/sha512/8614e3cb28491b24a0ec5060b44abaf264b61c91ddd29d70105ff583bd3112cff1b9bd5ed45e39f186265333982d5eeb8bf35fedc3b51b2a009cc7a51046b50b
-MPFR.v4.2.0+1.armv6l-linux-gnueabihf.tar.gz/md5/adb2b7fdf111c8b19df1516cfb278bb1
-MPFR.v4.2.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/0c47aeffd05a194802f6c4e0e2779d56fb46007e6c3e145ee6992854a21a317a9d51512c59a0ce4ddcd314c387945225c6557d6c2ab6961ae4848875e8983de8
-MPFR.v4.2.0+1.armv6l-linux-musleabihf.tar.gz/md5/c30358bdeffcff65ba9be906cd35889b
-MPFR.v4.2.0+1.armv6l-linux-musleabihf.tar.gz/sha512/2857ec27ae2d53a451d62dd241ce9b43f7ee182bee180ecd9ad92c907c66d0b0ab2d1ea3b20fe61cc176ae44ecbe6041305cc8a9343b396c9cb54dd77a1e2868
-MPFR.v4.2.0+1.armv7l-linux-gnueabihf.tar.gz/md5/a1e30436bade2150c9dc924177f0c321
-MPFR.v4.2.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/d2f4662c494fefda66847e7a085edda3ce396383aafb4e17fc2e176191b0f530541726c261cac3467f13136e8ec728c8a7cf0e352f3e9ebf960d153cbfe766b8
-MPFR.v4.2.0+1.armv7l-linux-musleabihf.tar.gz/md5/857e3c82804e7c853d21603f18caa715
-MPFR.v4.2.0+1.armv7l-linux-musleabihf.tar.gz/sha512/86cf3e940fd66820b5269e9aa2a49c3fc3077857bec037a08e0d301b0bf3cc5c79ac331cc6370d852e20f4acf8f601c49d5dbe24e96652e4411b3f33a11e3f45
-MPFR.v4.2.0+1.i686-linux-gnu.tar.gz/md5/5a432be79a112e67e970980f4bde13a0
-MPFR.v4.2.0+1.i686-linux-gnu.tar.gz/sha512/94198b23ac94dcb9dca95938a46b9899c3ef329bafbb13b32076cd3415b89f11908632c7c07e90549c01bd9ed7fc9a002dae07a645f85b8509234c49be729621
-MPFR.v4.2.0+1.i686-linux-musl.tar.gz/md5/4ce71dc250c2469f844a02c6ee6571a1
-MPFR.v4.2.0+1.i686-linux-musl.tar.gz/sha512/134b67b23de75ab172594cd0fac55b5c265730bfea195978698e3e6fbc47d65617652bd72d90ba092ed1bac4c29d5b2c109df5d8dc60b5d8f91159fd58575b67
-MPFR.v4.2.0+1.i686-w64-mingw32.tar.gz/md5/be7239432e8a26c59e2d418d310bd6e3
-MPFR.v4.2.0+1.i686-w64-mingw32.tar.gz/sha512/3144d84d41996fc19bfc9ed4f36755838470e17dce79895b37d93e32ae1cb1da428f2136948f939b19548d7dd62830ae43c434f88efbe192ed3184bae2df5970
-MPFR.v4.2.0+1.powerpc64le-linux-gnu.tar.gz/md5/d818894054b38232ba02ee0e129f6fe0
-MPFR.v4.2.0+1.powerpc64le-linux-gnu.tar.gz/sha512/0e73ca926f3e06466d1899f0b3e9ae4abe15102804dce6716ce23154344a571773c40d276f0038a0ae4e626799867ee715428e1d961334a01ad3091745367e8e
-MPFR.v4.2.0+1.x86_64-apple-darwin.tar.gz/md5/9652148df4e771be39713c4f43d3ff61
-MPFR.v4.2.0+1.x86_64-apple-darwin.tar.gz/sha512/91a0219fd1880dfa90d196fa403f4e1df0347ced58a4772492196b94476f346d80696885a4f3520424494bc09679cca0c0ccf2f6e9247d60b52ebdf564485e72
-MPFR.v4.2.0+1.x86_64-linux-gnu.tar.gz/md5/4de39327a792be708119ac7b43957628
-MPFR.v4.2.0+1.x86_64-linux-gnu.tar.gz/sha512/447b59d5589a8517061627668e8baed4366408cacc9d8e063528b9b795de6d27e4005844578310185f03f568f4948bc4a794624235875fb61b6187264b6f483b
-MPFR.v4.2.0+1.x86_64-linux-musl.tar.gz/md5/f9b8c3c094b339341b19828cc5e1d47c
-MPFR.v4.2.0+1.x86_64-linux-musl.tar.gz/sha512/c661e7c5bded3bdf11b2bd5e5ef4ad8e446934d9b82dfe26f0be1b83cea98d7e56e0903bfc1075f91c8d23401cc6b3b722f2d60f46d73cab884e81fe518aba27
-MPFR.v4.2.0+1.x86_64-unknown-freebsd.tar.gz/md5/e402dceae753abbdd8b11f3c8d96e0dd
-MPFR.v4.2.0+1.x86_64-unknown-freebsd.tar.gz/sha512/235f001f3b0101a6bafaeb45fb49d2992549b6c2f42a4e7ba38e1fa8c59246fe7463598e7cfda5ead50c9805dda0b82a23b5ae2af4ec993bb771611163e58907
-MPFR.v4.2.0+1.x86_64-w64-mingw32.tar.gz/md5/c5bbd2217060491e2773bdd84b055e5c
-MPFR.v4.2.0+1.x86_64-w64-mingw32.tar.gz/sha512/74b059b22990ab79f243284687f571f47447457ac2c1cb4c4548ea1f3d8ea01b7466281f48429cb39e2d11394fb86650bfada7acab639c6537a143a95bd6e7ca
-mpfr-4.2.0.tar.bz2/md5/f8c66d737283fd35f9fe433fb419b05f
-mpfr-4.2.0.tar.bz2/sha512/cb2a9314b94e34a4ea49ce2619802e9420c982e55258a4bc423f802740632646a3d420e7fcf373b19618385b8b2b412abfa127e8f473053863424cac233893c0
+MPFR.v4.2.1+0.aarch64-apple-darwin.tar.gz/md5/816f9ff59070f21f1df2f310e2606c06
+MPFR.v4.2.1+0.aarch64-apple-darwin.tar.gz/sha512/dad9adba7a8867d1ce26d77efb5c33b602b920a2cdbec84ea58a054cfab3ab7df54d2bda101de72b71604e7844993f1e216b002ba092e69277d0764040216c81
+MPFR.v4.2.1+0.aarch64-linux-gnu.tar.gz/md5/c1e3c9619af6454d8adae9bcbd911dba
+MPFR.v4.2.1+0.aarch64-linux-gnu.tar.gz/sha512/5d916492aa73d11e022a7ca3f31940ceb8f8667bdf878ba29d6256736a380a2f6a11ac90cd8de3f1d3454a79165db240a1b971b9794fd21692ed64502ec34b9a
+MPFR.v4.2.1+0.aarch64-linux-musl.tar.gz/md5/8ada267e2d23eb0c65ab2d2df02362d5
+MPFR.v4.2.1+0.aarch64-linux-musl.tar.gz/sha512/0c7f18e6d0f3e2052541e3279dfa9a74eb34067ac4fea0b17ab805cd73010cc83f8d7cb4eda8f4a904da398268d1c0d638c35521a9f339f8c7c3b5f159f27277
+MPFR.v4.2.1+0.armv6l-linux-gnueabihf.tar.gz/md5/42bdb78eee83f496d7da699ad9603913
+MPFR.v4.2.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/edaa9ece1404a606d6b635406ad5e721c8d094ffa1c73ce19222afc2b4ea7b3b9e23e7c5589ae10fd9f4c4aefa265773bcfce6c510efbca57782115d43daeb13
+MPFR.v4.2.1+0.armv6l-linux-musleabihf.tar.gz/md5/2213207772b8a50de4768816fdc20e2f
+MPFR.v4.2.1+0.armv6l-linux-musleabihf.tar.gz/sha512/d24debc38b8135ac5c10c4ea19de0c69126b6881940b4e182118e12cc2c7cf0aca2db065620f0cca636742da32eddec5bda3b4f449a035274f05120c977ed449
+MPFR.v4.2.1+0.armv7l-linux-gnueabihf.tar.gz/md5/a0d9fe20c9ff0027b6816ee0102b1f9a
+MPFR.v4.2.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/97ce02898dc0d29a616048fd7ecee3100a710f7a30a21f2276c01675749034a5241be88bd46dff3dbf9ea0adca98a4357bd16e43fa9520e7a02477494c2d072e
+MPFR.v4.2.1+0.armv7l-linux-musleabihf.tar.gz/md5/7898b9047c914b290b5928af5df63030
+MPFR.v4.2.1+0.armv7l-linux-musleabihf.tar.gz/sha512/cbefa9588752c65751630832417c1c42e4819d49ff9a505f61c2567ef4271097e585542fa898efd61409a43e439d827bb79f693a0937d0a3a427b39535979588
+MPFR.v4.2.1+0.i686-linux-gnu.tar.gz/md5/15fa598e5c1c723ff6cd2ad3ea51e437
+MPFR.v4.2.1+0.i686-linux-gnu.tar.gz/sha512/2ec4cf0c88363bc9fb39522bbcd6a9c2311c38efb166f604aab118fed39712beea68367ff5c4cabb2b7b3f5a53469414b8552fd22a70a637cbbfc936f0c4851b
+MPFR.v4.2.1+0.i686-linux-musl.tar.gz/md5/6dc6a00d3ea22e2c60374d49926598d6
+MPFR.v4.2.1+0.i686-linux-musl.tar.gz/sha512/4a90356091b53d7238dda59f6e9c5c420614f16460dc67310e581611ad46a2dd3324d6164cfecf1bcd660b8f2e473f0afe137aac954c608b11be3acbda648e14
+MPFR.v4.2.1+0.i686-w64-mingw32.tar.gz/md5/bda99a916573607716c61473153a1927
+MPFR.v4.2.1+0.i686-w64-mingw32.tar.gz/sha512/ed3f45ff5ac8f4588584dd80036d9f3623651c87276a9b624955c831009dc33f8804c2845bd187ba750515725c29d65ac5d70c71db1b953c618cd771d2b066d0
+MPFR.v4.2.1+0.powerpc64le-linux-gnu.tar.gz/md5/ac70f716bddd5323b4add663b473b52d
+MPFR.v4.2.1+0.powerpc64le-linux-gnu.tar.gz/sha512/ebb0f5ea76c892b7a4e4636706e71f476aaea58bb88e1734a7966c44495fda8c81318e0e8629e208185f0fc8d0c73b6f3463034cd831dfb5fbbd493a0689bc06
+MPFR.v4.2.1+0.x86_64-apple-darwin.tar.gz/md5/ff13e865e3be717b0fffc16296cb2f56
+MPFR.v4.2.1+0.x86_64-apple-darwin.tar.gz/sha512/98479210910945714da0285a40803674242581894a731ba4709c70dc1341849e736a88aa4914df0ff536c15f8848c417e712ff6abeb25047d300f8b215fd131f
+MPFR.v4.2.1+0.x86_64-linux-gnu.tar.gz/md5/48194b9f92ad01b168e8b9612f4c9559
+MPFR.v4.2.1+0.x86_64-linux-gnu.tar.gz/sha512/638eb40d23fd492972809cdc3326ad4c2c99d3eae1ca5f7c0da6e0e335bb596de2899da5b3e65153225654b2cd9a805298e7241a21395e07d0b333eb1f101b5d
+MPFR.v4.2.1+0.x86_64-linux-musl.tar.gz/md5/0babbb823964ccebf63b42fd07f08936
+MPFR.v4.2.1+0.x86_64-linux-musl.tar.gz/sha512/880b685d9b456fa2bf78e707273783423f9ff00791b529eba00c5e1b94ff96f4ba01e680152a4d6b45b695e3c1169d07f793db42c5a4120861813d5458dfc828
+MPFR.v4.2.1+0.x86_64-unknown-freebsd.tar.gz/md5/f11d634e5a19177fe36b2b2f6f5727ca
+MPFR.v4.2.1+0.x86_64-unknown-freebsd.tar.gz/sha512/291245c06edf31b2e39b6774359ebd4f95b924f19d2a7e8581822a5bf908426d00f0452c061a027da0d7d4bb2fa1bb7ef8ab6d8e49bc848d6d7450a8d5c8a9c4
+MPFR.v4.2.1+0.x86_64-w64-mingw32.tar.gz/md5/e6d1347d5da312f7301d578ce9d7c4d9
+MPFR.v4.2.1+0.x86_64-w64-mingw32.tar.gz/sha512/3ea4b944172be250677ef271f1e10c2b95861755f203795a50b8d0f76f72498897059271e44e038625c3b73cccbd0165685d60afa994180d42e912bffbe86729
+mpfr-4.2.1.tar.bz2/md5/7765afa036e4ce7fb0e02bce0fef894b
+mpfr-4.2.1.tar.bz2/sha512/c81842532ecc663348deb7400d911ad71933d3b525a2f9e5adcd04265c9c0fdd1f22eca229f482703ac7f222ef209fc9e339dd1fa47d72ae57f7f70b2336a76f
diff --git a/deps/checksums/nghttp2 b/deps/checksums/nghttp2
index cfe5dbbab17ee..f8226d4f68b3d 100644
--- a/deps/checksums/nghttp2
+++ b/deps/checksums/nghttp2
@@ -1,34 +1,34 @@
-nghttp2-1.59.0.tar.bz2/md5/ed847a9f718b0bed4491a51344e8e6c8
-nghttp2-1.59.0.tar.bz2/sha512/63c497afdebb98a3a38fe41ff9ef0027d670231fbd118cef22b6340829df5de91df3d4f503135d54e54f9ee056f71efbda640b78f4fbb9ff56ba085177bb41f8
-nghttp2.v1.59.0+0.aarch64-apple-darwin.tar.gz/md5/531f9c56fe3d2d4e4457cf4245fc8a81
-nghttp2.v1.59.0+0.aarch64-apple-darwin.tar.gz/sha512/497b26a66da86ad8d0a28323a18b4ee9cd9682b6ff8ef681d760cd17bc3e82ea98385e2d621e8060a706b50531afdf6f63fa3717005446c4159780f73d17f8da
-nghttp2.v1.59.0+0.aarch64-linux-gnu.tar.gz/md5/9e853f884f638534b5ce88112c7b1fbd
-nghttp2.v1.59.0+0.aarch64-linux-gnu.tar.gz/sha512/72da4843e3b2b01f568d61894a7c83ace1f175667e0b64663f8bcca9a7315509f5b91f96b21508aba6009d7d33ccdf75f4aed8a8dc45ed0934654512ef2200f0
-nghttp2.v1.59.0+0.aarch64-linux-musl.tar.gz/md5/25f1489b3cc3a5bef92af5bf136e62b5
-nghttp2.v1.59.0+0.aarch64-linux-musl.tar.gz/sha512/abe1f8c5faa2a1ae6308d1c13850cc6288b13f744576de574fa6e0871f372da9121059814b467b9d5763c2bdd54ef4a38ba842a394e23735a2cc5b9c7fec13fe
-nghttp2.v1.59.0+0.armv6l-linux-gnueabihf.tar.gz/md5/33448bd8e19b63811f07e7f8f6d84fbd
-nghttp2.v1.59.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/b327d63e3742528f2d3b17dc97923429acd71156e4b688118295f7e7257c6313869e742b2475ee2540436d5cd68ac9224298c9ca0be4ec7c8a8b9741b65d210e
-nghttp2.v1.59.0+0.armv6l-linux-musleabihf.tar.gz/md5/8e6836af6f1e7532b1ada585a31bc9ad
-nghttp2.v1.59.0+0.armv6l-linux-musleabihf.tar.gz/sha512/db17877b4bafd608fe844959e1640060161a6e36ba762053b9f7153d075c3c3eadea61d72dd9f9683cec3c4b0564f284eb0b443d1715a5af77afa6f8abfa2ed4
-nghttp2.v1.59.0+0.armv7l-linux-gnueabihf.tar.gz/md5/f537e4776bbb1d9e85f8eea76df99679
-nghttp2.v1.59.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/60bb3612a8021c194a0766775583b137f5f48ed3c7d4e1afbbeeef617a5a959494c35df34d2e9a68e53b8bb676b67de5b0b47f6e98543ecccd8fe10cee2ea3dd
-nghttp2.v1.59.0+0.armv7l-linux-musleabihf.tar.gz/md5/ffecbf49f667cd0a2c09919299ffa54e
-nghttp2.v1.59.0+0.armv7l-linux-musleabihf.tar.gz/sha512/c34cdbe0074012c6889dfb9c93c44a4b1b0ed86197428f97b1d7b1bcd829f384121b8fc60f2c608e4206dc203739df9382a0653e75362eef282616c65251de24
-nghttp2.v1.59.0+0.i686-linux-gnu.tar.gz/md5/bb8b7668b5de15e3cdab955f905b6681
-nghttp2.v1.59.0+0.i686-linux-gnu.tar.gz/sha512/42be179bbe6a9b4c9a7f086177a23bfa0d99ec3b8ff4741a7656bcd302de9613cdb1bf556e483d924a3362c150d3286de30531d226ac402ca4a464fad3271175
-nghttp2.v1.59.0+0.i686-linux-musl.tar.gz/md5/298fb89147eb458a5f948ac6298213c1
-nghttp2.v1.59.0+0.i686-linux-musl.tar.gz/sha512/693b41d8e764f1475b79aff74c036697779dc82ac5dc79cd4d25c15ef9627e398c50cc43ad4296b96c3b6fdbb4c7514a041396570b82b3d35ef27c6c2286e8e1
-nghttp2.v1.59.0+0.i686-w64-mingw32.tar.gz/md5/b69df3b2a034b0309b5ae97ecf6e803a
-nghttp2.v1.59.0+0.i686-w64-mingw32.tar.gz/sha512/1c349ccb0799942f117f56f0e2f83f898b5913071651e4bb8d1750d7ab6fdc9b6e3e894ee760623b27ae23ba7910a6674b5bbda7db3a8388fef68c0908e2ec7d
-nghttp2.v1.59.0+0.powerpc64le-linux-gnu.tar.gz/md5/fc101f34e22f8885ed4141514572f2ea
-nghttp2.v1.59.0+0.powerpc64le-linux-gnu.tar.gz/sha512/c7377429fcf47a0a7e9c2a1ca78d4eeba819f6f7f2fb1191fa46b7ce516db6a489994993abe50e2848cc7ab600a747cbc13e8e6484077cb8ca8fe37775bd0016
-nghttp2.v1.59.0+0.x86_64-apple-darwin.tar.gz/md5/9f81d5bd0aa9b1b4e1536020f59496ee
-nghttp2.v1.59.0+0.x86_64-apple-darwin.tar.gz/sha512/c8a0641b5255a70e305b4baf5689914fc4abf795ade4ddb636243d40828195bdf2d2df394e4f8680beb70f0e497d5ceae90f6453b32ebf5d77ed03239bbb64c4
-nghttp2.v1.59.0+0.x86_64-linux-gnu.tar.gz/md5/954ef507adf403630588734a5c1ec42e
-nghttp2.v1.59.0+0.x86_64-linux-gnu.tar.gz/sha512/e4de84a962e2a9a4b9b61fa69d0816db5daa271fa3440f01eb116856e5f8c9cdd0cf0aa63ff9fffd51073548ee81e94d83b4d3fd096efa6c38983047ac8a556a
-nghttp2.v1.59.0+0.x86_64-linux-musl.tar.gz/md5/d14e89c0473ccbfa0ccb6291069f605b
-nghttp2.v1.59.0+0.x86_64-linux-musl.tar.gz/sha512/5255c8e9b501b636fd78b90b46c7e46d583f1ec3328a512153f2460c3f5136ac024e9c9f05c44be8efb8eee687b95080154c6c086358755c167ff9defac37146
-nghttp2.v1.59.0+0.x86_64-unknown-freebsd.tar.gz/md5/3a396ec020b3e9f067ffbe36315f250a
-nghttp2.v1.59.0+0.x86_64-unknown-freebsd.tar.gz/sha512/bfc14de5851428548816f289032dce4adcfd17d3ffddd77785777f573df5950ae40df006164ef256eda19a9402101c05dcd994e8bf4f566ddb73a51b8cc1a641
-nghttp2.v1.59.0+0.x86_64-w64-mingw32.tar.gz/md5/300bf8563122e081a17a836fd0d62b6e
-nghttp2.v1.59.0+0.x86_64-w64-mingw32.tar.gz/sha512/7967faa9ff64897e4ea6cddff8e70c823472b9b0a814055b7032546cdada76904b28d4b3d0408377108e54b5aac67b83177c03afd71424bb2d4a5bd1d3f96827
+nghttp2-1.60.0.tar.bz2/md5/ec20d9a6df7cc006894f72f81f9f2b42
+nghttp2-1.60.0.tar.bz2/sha512/95b76dd492dad490640469c4b806dd1a446f11143bc990220ff106fe4bfb76cdc4dfb112e0297c543b2d828f2870aa09ba820d88e3e9dedb29c8f3d3c9741af8
+nghttp2.v1.60.0+0.aarch64-apple-darwin.tar.gz/md5/dbf9f8161a124dc88ba44b54094b96e4
+nghttp2.v1.60.0+0.aarch64-apple-darwin.tar.gz/sha512/1997f473ea802afb09d7e13feb4eec9c11ad1d161cf83659ef6059a7c81639e00f8a3461c3538c81ea025e359b0927c3a362cef4a57e6544ad27588683142203
+nghttp2.v1.60.0+0.aarch64-linux-gnu.tar.gz/md5/d2e821a693d7d0720f0158b3e19ef7fa
+nghttp2.v1.60.0+0.aarch64-linux-gnu.tar.gz/sha512/4165a1282d125b461d670d7d953c8a06b6508d1b97383a4126bc2fa9641454a9e0be749dbbaf772f2c2e6ea8cc3e64eb980cb0e09ac3d2fe5533eb3e6f7fa9e8
+nghttp2.v1.60.0+0.aarch64-linux-musl.tar.gz/md5/61ecc91336fcddb0f58af6af167e9a81
+nghttp2.v1.60.0+0.aarch64-linux-musl.tar.gz/sha512/802c7455e8f1ddfea74d3de3ceb937d1d10312f51594257cd406aedd67c181ada6ee5115bca00f8ee340a1471e2903bbe0159a0c08b80c556188647345e2c85b
+nghttp2.v1.60.0+0.armv6l-linux-gnueabihf.tar.gz/md5/2998ae8d24d1bd540a29e0c6054bfcc8
+nghttp2.v1.60.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/5b2235a0c8bded57adcbab11dbe97b85a7d6d8a083c155bd74b0ac5546aa861730e88b615f1cbfa1071fcc2eb252aae8508e926ad3d5a1ddf0374536c260217e
+nghttp2.v1.60.0+0.armv6l-linux-musleabihf.tar.gz/md5/7ebec92e3b340e25b952ccc4e714aa2e
+nghttp2.v1.60.0+0.armv6l-linux-musleabihf.tar.gz/sha512/eb0e5c584527182816203ce9bfc35688a969803104ffd17dd4ac3720c27a4fcde3b3b471bf66fda8ac83ec8a56aa82d6d40f492ce06cbf6af39fafc60f35574d
+nghttp2.v1.60.0+0.armv7l-linux-gnueabihf.tar.gz/md5/8c124c0daf59c622aedc7b9f1423d522
+nghttp2.v1.60.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/6e03246d1bfef7f184da68ac0eacc975dcb99172f2f352ce4ea5f5ae77536812163874d7ffc4fcb2df65dc51079880fdb83afc8988b73edb241cb641dc72f1fb
+nghttp2.v1.60.0+0.armv7l-linux-musleabihf.tar.gz/md5/79968e1cb68c3b0518db528933251b0e
+nghttp2.v1.60.0+0.armv7l-linux-musleabihf.tar.gz/sha512/f40790aa9a86fa2f44072c36a33416a7a8b4778881233989f8ed64ccb84f59ccdf3632b7a9d48d3e434e26cbd48c020e5d208da8fcb96e4e4ad41757e050213a
+nghttp2.v1.60.0+0.i686-linux-gnu.tar.gz/md5/1580bf21084fa62ec26020f5c89430a1
+nghttp2.v1.60.0+0.i686-linux-gnu.tar.gz/sha512/cf83afe1bb796c57e220c0ba32a6990830df50cd91f82c781f2722d7b0ca5e5fbd8b708a0204be65bb8838c85b548f867c97e85941d124b81c67e01738f1db1a
+nghttp2.v1.60.0+0.i686-linux-musl.tar.gz/md5/605eb6cd67b6fe3a1ba2d95413635831
+nghttp2.v1.60.0+0.i686-linux-musl.tar.gz/sha512/2c626b76898b4d782038661601fe34580c3cd560a519a46c4f6bc62d94ab987c7f2984350fc65933c00850cd2fe0b942fc64fcb23d2fb7db29bfed5866291b1a
+nghttp2.v1.60.0+0.i686-w64-mingw32.tar.gz/md5/5b5694f36addbc503bc1e78e57159e5a
+nghttp2.v1.60.0+0.i686-w64-mingw32.tar.gz/sha512/e70069b1dde2cf4dd041c4cc1c1ff40f67c20a8954b88d997fd7bf03d925b08148bc55293380dffce8c3b550a0e5768c94066e1a3b881ce4109ee94076c9a8b8
+nghttp2.v1.60.0+0.powerpc64le-linux-gnu.tar.gz/md5/1d073bba8e90c970bf1325a3d150d129
+nghttp2.v1.60.0+0.powerpc64le-linux-gnu.tar.gz/sha512/7e6f3895316d47a701944e8ee192b56f66aa05bf212c41164d25a0507f0e54c4c58c856e1c464fe3ec3eae78e0fe09ba8cf8b595c246faa3300a797750677180
+nghttp2.v1.60.0+0.x86_64-apple-darwin.tar.gz/md5/27d405bf53d4d438f74f91176d638741
+nghttp2.v1.60.0+0.x86_64-apple-darwin.tar.gz/sha512/59c4b4cca09e9a99e2e9ccc765068870824b907577c385313568ea29cd395caa3352bda230238888412b625e4a428a24c9ae0e59d122730cbd8025c6edbf0196
+nghttp2.v1.60.0+0.x86_64-linux-gnu.tar.gz/md5/ed1fe996e4c3e51d9ea8f724883dd3bc
+nghttp2.v1.60.0+0.x86_64-linux-gnu.tar.gz/sha512/0b20db04ef7b2cc470b9abaab05d0e1e7ea3d674c1ed47c63e1cda00b98a6f10ce19ceb77ebd5ece28f6e4a2cf46227f5858f767ff0f04feed867c57941793ee
+nghttp2.v1.60.0+0.x86_64-linux-musl.tar.gz/md5/cf3fcdb5720633700e4f9a9d8cd0cfc0
+nghttp2.v1.60.0+0.x86_64-linux-musl.tar.gz/sha512/d8f87b354de0f47be21b8e3aab6c2b05ee2af377e4bcc7df692fc4dd361ee5b731a190a0d9b4d4fdedf9c3a6a8a300f43338b38ac096da39ec13d4b79b544144
+nghttp2.v1.60.0+0.x86_64-unknown-freebsd.tar.gz/md5/b83aba7b3bd97ed7de770a597d6ec374
+nghttp2.v1.60.0+0.x86_64-unknown-freebsd.tar.gz/sha512/e40e47835bb0d5d548fbcfb28a64124323422bcdab411bcee7d4288cea765c6c82d7f4586980ee28b6ff310c6c7313aa4185ede192cd94839fbe708ab1ed14a7
+nghttp2.v1.60.0+0.x86_64-w64-mingw32.tar.gz/md5/37ba862c196b4d7c063cddc87722f7ff
+nghttp2.v1.60.0+0.x86_64-w64-mingw32.tar.gz/sha512/ce0b70b4ad5cb30b83e672c3875fac7bcc8fc039506f05fef552a9d9cb53f053187dd02da4550dd7e5ef9aaaf8d587cee331eace0335f663d4190bacbc4ff9a2
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
new file mode 100644
index 0000000000000..a834d041324c4
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
@@ -0,0 +1 @@
+c866a3ff71f0640c47cda5d31f76c8e0
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
new file mode 100644
index 0000000000000..31eafabe3a66b
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
@@ -0,0 +1 @@
+a2db523b0068cb727db7e3a2210578f6d1de46493c5e3a9114ad961ed1553d10b646d11486fe4f987e43a9e2ea289d1923a63861f5fc56cada94bcf0b96b7dc8
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 34eca34c4ec2c..08bd98646c24b 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,94 @@
-OpenBLAS.v0.3.26+2.aarch64-apple-darwin-libgfortran5.tar.gz/md5/62b6e0f8591668113a4985d88f386a16
-OpenBLAS.v0.3.26+2.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/fa5a15d853701818604c48bd152b599438eeeb1da67db552beb21b284027c313a9c7d8779b7236831a5f2631912baf881b0d2165aa77cb7ffec975a5c113a631
-OpenBLAS.v0.3.26+2.aarch64-linux-gnu-libgfortran3.tar.gz/md5/bc83bc702eb292c4491229cd6142fe06
-OpenBLAS.v0.3.26+2.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/d4e872be87cc450e1547e51ba15540f5f0082a874788c6ba414cab839147e69f7f37b2c4080a79bdcd411efec829e3510b83df8a9c319a3b132a117614d205cc
-OpenBLAS.v0.3.26+2.aarch64-linux-gnu-libgfortran4.tar.gz/md5/436ae51ef0b2f7a053ed8fb19fd55fd1
-OpenBLAS.v0.3.26+2.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/686c9c04a11d2f8a4879876ae34c410ad9e5cb0fddecd44aea9d642e662af9766a07bf4846405685835968253f2cda004a7d7ca1a0a04b2799984489294ed4c5
-OpenBLAS.v0.3.26+2.aarch64-linux-gnu-libgfortran5.tar.gz/md5/fa57f3598b2c24ec3111c60f84bb6ed9
-OpenBLAS.v0.3.26+2.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/d6111ab03a8940892e8023af548de52da7b8c3c76ad2cfcf98a78cadf11b95ef4c39830dedc5a3c6a20e8d95c64fb24a4ed655971ea83ba11f69ff7a3ba34292
-OpenBLAS.v0.3.26+2.aarch64-linux-musl-libgfortran3.tar.gz/md5/184444acaf2f975b4bf5168a32577863
-OpenBLAS.v0.3.26+2.aarch64-linux-musl-libgfortran3.tar.gz/sha512/419da1e017208596e89482ac42005ed2adebfc917419cf900fad3b45ec40f7da46acffc3e6461f46862e3c4642588cd629f6dde151fd02edd6642c8d98d77ea6
-OpenBLAS.v0.3.26+2.aarch64-linux-musl-libgfortran4.tar.gz/md5/2aef4330a9995541bae287a3c2583ea6
-OpenBLAS.v0.3.26+2.aarch64-linux-musl-libgfortran4.tar.gz/sha512/caf7ae4ec0c4fddfe522409d1a5c3be5a376620baa9ddce63f62e4856cea4b1ab2818ef54f3fb5ffbcc4ae73247459d0de2ac7b93eba4bcc1ca744f4fe69695e
-OpenBLAS.v0.3.26+2.aarch64-linux-musl-libgfortran5.tar.gz/md5/308471abda2c5eba60976d227f6d1c1e
-OpenBLAS.v0.3.26+2.aarch64-linux-musl-libgfortran5.tar.gz/sha512/225242f309d343994176664f590471ec2e56254ebec877d97291c1334517993510422954774749d23da4c52f32a41e06aaba072bf2e3cbb3b2a8dac6c01dba8c
-OpenBLAS.v0.3.26+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/69618373222d15ede34de1f913a93dac
-OpenBLAS.v0.3.26+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/585fa3233d6c88c7ee5571da4dce7b83c53643937a0077d816c24c556b471926c808bcbd46c9ddddafbe24567f91039525b3a260505854e62e9b9b09acaede54
-OpenBLAS.v0.3.26+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/874b639fcce8fb077d1a9cdecc916a6c
-OpenBLAS.v0.3.26+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/0a065ccd197822b028da36e157aed19fbe440d7eedc94fe1955cd8980423d477bdb73db7e0f99a6ea683701a2c8fc649e3967a781694a7219d82915d1c22767c
-OpenBLAS.v0.3.26+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/03683c3d2310870905bc386c5fc89eb5
-OpenBLAS.v0.3.26+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/2e0f4ae61623cf094cb8396522dfb6e2a7cf7de23a4dd25fb3fb2f0a414fd0b8d6397ce1c74d3a88296fe4fe76d1a61e1df87b2814bf87852564c097aa1f45e2
-OpenBLAS.v0.3.26+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/9c9dbe9afb8fc5d1f2fae971e1dfce1d
-OpenBLAS.v0.3.26+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/e592f91a2c0a303a059a9694565555247b76ab20ecfcf9c9f7235e0b67b458d530b35e9a2263f3c37b7ecc884c02124fac134f6061788348475fe1d546e07f1e
-OpenBLAS.v0.3.26+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/ede329cb3e37e7c9008fec5273e38ce2
-OpenBLAS.v0.3.26+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/bbb081f217c977e77d77c2091918aeff99855dbe99f4396059954e057edf41c63e926769afc3a2360eb87cef5612a2c0de519852182dba51d9306be10c4508fe
-OpenBLAS.v0.3.26+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/c884cc9c752857a6a020891040c1bbd4
-OpenBLAS.v0.3.26+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/67e671c05964b2a1456b9bddfe300cf29f88f4712cbe0c11cae01955abb31e70d9ad9a31175d2e29e4c5dd02d22febcafac8abe47d78dc179aa1e40776c51883
-OpenBLAS.v0.3.26+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/69618373222d15ede34de1f913a93dac
-OpenBLAS.v0.3.26+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/585fa3233d6c88c7ee5571da4dce7b83c53643937a0077d816c24c556b471926c808bcbd46c9ddddafbe24567f91039525b3a260505854e62e9b9b09acaede54
-OpenBLAS.v0.3.26+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/874b639fcce8fb077d1a9cdecc916a6c
-OpenBLAS.v0.3.26+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/0a065ccd197822b028da36e157aed19fbe440d7eedc94fe1955cd8980423d477bdb73db7e0f99a6ea683701a2c8fc649e3967a781694a7219d82915d1c22767c
-OpenBLAS.v0.3.26+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/03683c3d2310870905bc386c5fc89eb5
-OpenBLAS.v0.3.26+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/2e0f4ae61623cf094cb8396522dfb6e2a7cf7de23a4dd25fb3fb2f0a414fd0b8d6397ce1c74d3a88296fe4fe76d1a61e1df87b2814bf87852564c097aa1f45e2
-OpenBLAS.v0.3.26+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/9c9dbe9afb8fc5d1f2fae971e1dfce1d
-OpenBLAS.v0.3.26+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/e592f91a2c0a303a059a9694565555247b76ab20ecfcf9c9f7235e0b67b458d530b35e9a2263f3c37b7ecc884c02124fac134f6061788348475fe1d546e07f1e
-OpenBLAS.v0.3.26+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/ede329cb3e37e7c9008fec5273e38ce2
-OpenBLAS.v0.3.26+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/bbb081f217c977e77d77c2091918aeff99855dbe99f4396059954e057edf41c63e926769afc3a2360eb87cef5612a2c0de519852182dba51d9306be10c4508fe
-OpenBLAS.v0.3.26+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/c884cc9c752857a6a020891040c1bbd4
-OpenBLAS.v0.3.26+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/67e671c05964b2a1456b9bddfe300cf29f88f4712cbe0c11cae01955abb31e70d9ad9a31175d2e29e4c5dd02d22febcafac8abe47d78dc179aa1e40776c51883
-OpenBLAS.v0.3.26+2.i686-linux-gnu-libgfortran3.tar.gz/md5/030e3714c2544175861a411fc2520dc2
-OpenBLAS.v0.3.26+2.i686-linux-gnu-libgfortran3.tar.gz/sha512/5ea2318d786b423770e77cd820fef4bd995da7f6aaea70f73f55eb9210508e89f61fb0c6a728421c6fd3859ab11774f3111979c53a6c48ddf7a528a04295c4ba
-OpenBLAS.v0.3.26+2.i686-linux-gnu-libgfortran4.tar.gz/md5/ef935e9cf567efa381de917a0cf66fbc
-OpenBLAS.v0.3.26+2.i686-linux-gnu-libgfortran4.tar.gz/sha512/81a463ca756d4d363fd32f735d32d7b6e8c6f325401e943a8210fcf258ecd25507beac17b9b24ecddef0eb8521a58fdfcf6633794724f86e6977b162e099c70e
-OpenBLAS.v0.3.26+2.i686-linux-gnu-libgfortran5.tar.gz/md5/ce1a4b1dc5e3c001b5c12f9cd81cb3fb
-OpenBLAS.v0.3.26+2.i686-linux-gnu-libgfortran5.tar.gz/sha512/8f82012c0e9f955be2ab87551742a334f865cbdec95f0a62103c5127d6176838653884a6790234c5548940cec63e73ce95095bfcfaf241dc445020ab449c6687
-OpenBLAS.v0.3.26+2.i686-linux-musl-libgfortran3.tar.gz/md5/acfe348c1c1f33376469dd9106603222
-OpenBLAS.v0.3.26+2.i686-linux-musl-libgfortran3.tar.gz/sha512/2048723ce88ef28b2b7fa42cc73eccae8b663466a496709931ccecf06b4f4a9455fdfa3b5bd05bba4d847d3c0489251ecc3a34e7e3fb49bcdb40aacd4b53c140
-OpenBLAS.v0.3.26+2.i686-linux-musl-libgfortran4.tar.gz/md5/d82eb058ea705286058230f68b43c9fe
-OpenBLAS.v0.3.26+2.i686-linux-musl-libgfortran4.tar.gz/sha512/c97ca8dd352c3067ec437b1a8b2b9ee06bd92157f0806dca55fcc3107a94e784cdf8f6d5986aae93ccb608fb90ba2ab9bba4acf7562c1fd519804897ab158c10
-OpenBLAS.v0.3.26+2.i686-linux-musl-libgfortran5.tar.gz/md5/014aa93d275d56e6176ab50f4040ede2
-OpenBLAS.v0.3.26+2.i686-linux-musl-libgfortran5.tar.gz/sha512/cd3d1dd1aab373fe4e1135042dee3fc959371fcbebe70f6c60327df35e389442a2f47351757db84cdc1d181b659ce2d6a0892508de6af27b01324e3119d9dabb
-OpenBLAS.v0.3.26+2.i686-w64-mingw32-libgfortran3.tar.gz/md5/0b3f11710883a3cac2551cac0d72aad5
-OpenBLAS.v0.3.26+2.i686-w64-mingw32-libgfortran3.tar.gz/sha512/7d86f74c585dff33ed9384dc1c77b600a6893ceca335340232536f6e2cec1bbc86e85763c932c4b81c4012f12d68d7f37634e2f685aff6a999b875eae596cd9c
-OpenBLAS.v0.3.26+2.i686-w64-mingw32-libgfortran4.tar.gz/md5/64d7806e859c804161a59641db744f7f
-OpenBLAS.v0.3.26+2.i686-w64-mingw32-libgfortran4.tar.gz/sha512/7ebefd3ee2abd284c82e15d2497fd08bd4eccb264af7aacc1378c75d4b1dffb11ab95eafd98029c513ee7c667749a4917e26e4d57c65ccb77c5ba52cd8770009
-OpenBLAS.v0.3.26+2.i686-w64-mingw32-libgfortran5.tar.gz/md5/2a91e979a37722549d95cbd607e822fa
-OpenBLAS.v0.3.26+2.i686-w64-mingw32-libgfortran5.tar.gz/sha512/f08880e18e6a361458a495c8860382a00001fe5d2989852b034ff9623b4517c8b75cd3f03986f62d72222e3cf07f8f2eb123785577500c7cf0e79109b9eed960
-OpenBLAS.v0.3.26+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/a417db847b1d610e3f52a891f98ae2e0
-OpenBLAS.v0.3.26+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/16da5454b0987ecae76b6fe547d28f699684aa5fbf05a0e579960b71b3ab75dc1f3602fc60182fea3556eda8d905a870f03dd976009ea1972e9693a584e584f3
-OpenBLAS.v0.3.26+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/c63388278f7e037c232d9e7b29d3c725
-OpenBLAS.v0.3.26+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/bedbb30539530e16314c97b87df59caf38c75de7b68616cdca78d27e241dce06527d7265924902d557394a14a6348dc2b5e45c4a006158a6b9182382183bd0ff
-OpenBLAS.v0.3.26+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/5785f320f43600a791b543fb0bd8b00b
-OpenBLAS.v0.3.26+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/37ae82c2f57ec7c6ce0cd3f93b33870602f44bffdbeb2cb9bceef4515f5851aba73214d4a1fd24606fe4fa9bf0bf0a5552e1aebd65027ae57f76f3379a04344e
-OpenBLAS.v0.3.26+2.x86_64-apple-darwin-libgfortran3.tar.gz/md5/ebd368ef0d4fd4f39534bb52b506e109
-OpenBLAS.v0.3.26+2.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/a81bf2244b160786530ae449be1e81981d17146ccf833ea5afcea84eb24644002d5824591c96b7e89fe9642911a865a292be062d325ff2307d850b4cfbd0e7d6
-OpenBLAS.v0.3.26+2.x86_64-apple-darwin-libgfortran4.tar.gz/md5/f453ba9786d0916c9fc8c46281588c5d
-OpenBLAS.v0.3.26+2.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/7dea0549e3fec7e18043b8b4f93491ddbb4e041325330a2e6e28d137c25431c12371b534bae5170b22c5077d285ee23fda1579bae54c894a46a885b4571daf84
-OpenBLAS.v0.3.26+2.x86_64-apple-darwin-libgfortran5.tar.gz/md5/3be8449cb3046322f21b1d17be41bd1f
-OpenBLAS.v0.3.26+2.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/89bb94d645879bd2daa8e83fc9b391b84f32b8bc1f633caacc5eda138c8034b726bc5c7ddb931040f24b77482743cd2038fed6d1d17698f287c09f176a6085c8
-OpenBLAS.v0.3.26+2.x86_64-linux-gnu-libgfortran3.tar.gz/md5/2410734a7fc47705361267023c001272
-OpenBLAS.v0.3.26+2.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/e9b239a75ee7521145f30caa52ec4e8b42d99f5246df47c8e203497c577f161ec4f3e7df7d7fa8135b4f947f941314bbbb00fa56ed813bbbefd6f887af1a768b
-OpenBLAS.v0.3.26+2.x86_64-linux-gnu-libgfortran4.tar.gz/md5/06212c9081e7d355bce7e7960e17371d
-OpenBLAS.v0.3.26+2.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/aeb9913167f6e860c4f2082dc15310fa4fb2a0edcf70a6aa31626c59e132d2af9265544f1b18161d994405649f2923abd4cb21521466768f1ed4fe0ef8243bcd
-OpenBLAS.v0.3.26+2.x86_64-linux-gnu-libgfortran5.tar.gz/md5/e21eb44972f00ff7c054e3fade50602d
-OpenBLAS.v0.3.26+2.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/8d9c1bb8c300e2523fd7a42ef4d4bcfbaba94e29e69c630008a194a62e575648c9f89a42301d1cbd0e74613ba41e5282d69bd8e24376c64fedb1d2657dd1e4a4
-OpenBLAS.v0.3.26+2.x86_64-linux-musl-libgfortran3.tar.gz/md5/aa1501555419f9bc6c243e77d1a413c8
-OpenBLAS.v0.3.26+2.x86_64-linux-musl-libgfortran3.tar.gz/sha512/8a4c5166e76c14bf6c65953bdcb8a4b222dee61123cfdb243876ee5af2d5d04e3125149936ce71c25cd90b0af5208d733ed9a1944b3134634de6a2ddfd1796a8
-OpenBLAS.v0.3.26+2.x86_64-linux-musl-libgfortran4.tar.gz/md5/597ff5c5a516278129a3077daeb4ffd0
-OpenBLAS.v0.3.26+2.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0b4b3e58d046e7a5f3eac36e967025983575df1979c5bdc73330481982e229f832735d1a7bce373009e666b9094ba5877d290bd06d9baf5f8a9efef0d3b0b4ac
-OpenBLAS.v0.3.26+2.x86_64-linux-musl-libgfortran5.tar.gz/md5/410507ba0176c16180163556294d359e
-OpenBLAS.v0.3.26+2.x86_64-linux-musl-libgfortran5.tar.gz/sha512/659167ca4b307c00afbfbc08a8bade4c144a0567c13354c81ef17dbd210f2a98b40289bb9f166a70cd447193c236561c7a6df2603ebcd0dec501529d4e976508
-OpenBLAS.v0.3.26+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/e12abdd6fa5a9211cdf3472ec9e2bf2c
-OpenBLAS.v0.3.26+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/cc68370a2e2c76303272e0d91a71ae1a9408233d7f39292682fc901b8474d8272ac42f233c2d0c3cdc07c481968bee2486831e8df48d9f34877b45e3af694b62
-OpenBLAS.v0.3.26+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/a02fcf5127ae14a00204c1b43b1c6807
-OpenBLAS.v0.3.26+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/e502418902d1af01efa1c9d6db94659b1472db8af812cd2507fb324836e5bab3364e54bca9a2ee809114947b4210f5fe3ec640d7bb741114a66e110d2668ddab
-OpenBLAS.v0.3.26+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/f4d49963b37686671d1d6f1f5b4b680e
-OpenBLAS.v0.3.26+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/0c395e956243b40bc29a820ac755740efd932f56eaba85a554041c223238300f0370ef2d051cb67357bf9a740950bbc78de1fba52522bd53a9d3a8f24799f975
-OpenBLAS.v0.3.26+2.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/fecab0026d2b7b7e0261ccf1f9aeb282
-OpenBLAS.v0.3.26+2.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/a9032b79cbc18448b141bc61b404e156af2ccaccd3919f0d38d5f2963e9e89c93f4992ed58217f969a7f5201b4489e0bdb91761fe507ee7f07e65e712f9ed584
-OpenBLAS.v0.3.26+2.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/28d440488310d3730db018612078bad7
-OpenBLAS.v0.3.26+2.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/a24e5e8f8da4734ce10b6d959b566df0ece3f3ba30737584ad3e7795d07b950e2bd57fb13d4a4f21eb92b29c38cf40fd846b5f771eb820a769e50f4074e36142
-OpenBLAS.v0.3.26+2.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/7202113787553198c32b0b6964630b5a
-OpenBLAS.v0.3.26+2.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/9553d90d0580bd6f1e6676be85029db615564bbada470ee8751619b5888e79ee23e1e699f1aba1e770a29c5ef1e4d13eb8c24cfa787ef22167c9100ed6af478e
-openblas-6c77e5e314474773a7749357b153caba4ec3817d.tar.gz/md5/4971eeb7adadee085d7c991db416fe7a
-openblas-6c77e5e314474773a7749357b153caba4ec3817d.tar.gz/sha512/7b85c9fb7be54407ba627d77897f40de4395d6d307230aa7df83cf8e0a41f545e4af4ae0576abb40cc9e0c385e1c6a488100dff292ea307439a89587c07ba66f
+OpenBLAS.v0.3.28+2.aarch64-apple-darwin-libgfortran5.tar.gz/md5/312aa603d089d680205dad7d5da58195
+OpenBLAS.v0.3.28+2.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/ffb0069561f52f8ac2f8affe937a00592e0c5d75c6d64bb0d5c93d1c925c93a46b763638031c88818b9dcef4a7b149ee3f15792a812e87f57a8ad086604164c4
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran3.tar.gz/md5/7c43d9e9ac07820130a3d5faefdef882
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/3ade0f098796148c37b118f9c052bad4e40431b4792f001043f040f8b1e4b7c3bae512f56ea21e6c0111246b2200e7720fe720a56a19dd11d1fba789344f29e3
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran4.tar.gz/md5/cd2fe87dac703c8bfa25406aa732b88a
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/2aea68bd8f1db2ac920951c8d9a47ce8c071f3736ee8aad8d185a09be25234a0ffd11b9f9640015b82770ba3b3fad9aa511cc43501c1bb5a3a44f1fb7ccd5692
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran5.tar.gz/md5/e3db2bf2f1f38aeee8530c78f3ec049a
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/a0ccb92e818650ac3cbc292d5af1a000ee9b123953cc3eb16e2479e926af3f2be0ed9858e3c0c1075b1b9dd70ec1e51b9dce2c9d45b999d296aa050d257a3cb1
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran3.tar.gz/md5/5bb605738930037259e773ebdb4a7041
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran3.tar.gz/sha512/967e0f33be7b743d9617627a947a802286962a46c7c3b2418aaa1504cffc5f311b01e1702b35ded18ae3686b1914c6085213b03fa8a51e0a7ca16dc4cfee8504
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran4.tar.gz/md5/ce175e82b9c6597c546552e79a43f934
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran4.tar.gz/sha512/8ff5dff293d9786fc4f541b209b35afcbe325c13ddd0f9c8f9bfca8ba5c318c7890152260a5441b9e9088751ce03b1ff8f0f5d6fd4f142fae34bdb7390d1952c
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran5.tar.gz/md5/cae6aabbdccf31fb78b234785b52d48a
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran5.tar.gz/sha512/ac842023e5db243fcfada22adca051bd2ffa04fca496454539931eede159e5d0490d444c338684c2d178c3367b23b8f3d76c544e30f1897bbed181f56237619f
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/5d1f45f53dd1730051095fb8e027b14f
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/0b1f91e86b5078b7cd6b64bc429a0e63bb5adf28df1baa336e67819fbd2c09f59b643c39e580f63e3bbccdc631c5d5e14c7d8afa6af94250453ce5286958f90f
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/8b3e3ea928975c575798d47466aafb82
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ebac0f7047dd8b97d85e4251953a23824701af02754afd6808f13eb276326b30eb292c85fa717fbd2f21b929e6a9816a012b8ea378a0fa27e671f81435f5d3b9
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/5aacfce96d5673b4d8341cb097d22c4a
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b84dc2b8cbe5453555182c3fcd8624d7a2b28fe3826d54fde3b77ad2c33e60309317d150f07554dd85e168b0ac1f91537a5c2c17fff9c02dd9216f01161e4965
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/dfeac22ee204868cf254dab5ae79382b
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/710117eb7400a0aacf69d6053730eb3b3ff4767f8d38defb2aaad94aebf1646a794489e78a8f46b469901159cdca73dd2b9460fff11e95daa4a2642cab721a25
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/13ff2a40bc55839bdef76b796db1eb76
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/eb61fe6c0221e8f9d7a626b8d088ae1497155341dafb69835e7d53af76689ae212e1e4621e0729df5d896888c0b2d7354a24f7b57fe1d68f0b35c26bcf096699
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/aa7349724ba1d47256705777e755289a
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/25ab56c44b7d0d5de17344f39071e6894e878e89b5e35412a3c9fe345abd2eef76d7816cabb6407c7c521c3bf67a4741b37ad7e580962ead9275273e431f1fb3
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/5d1f45f53dd1730051095fb8e027b14f
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/0b1f91e86b5078b7cd6b64bc429a0e63bb5adf28df1baa336e67819fbd2c09f59b643c39e580f63e3bbccdc631c5d5e14c7d8afa6af94250453ce5286958f90f
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/8b3e3ea928975c575798d47466aafb82
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ebac0f7047dd8b97d85e4251953a23824701af02754afd6808f13eb276326b30eb292c85fa717fbd2f21b929e6a9816a012b8ea378a0fa27e671f81435f5d3b9
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/5aacfce96d5673b4d8341cb097d22c4a
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b84dc2b8cbe5453555182c3fcd8624d7a2b28fe3826d54fde3b77ad2c33e60309317d150f07554dd85e168b0ac1f91537a5c2c17fff9c02dd9216f01161e4965
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/dfeac22ee204868cf254dab5ae79382b
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/710117eb7400a0aacf69d6053730eb3b3ff4767f8d38defb2aaad94aebf1646a794489e78a8f46b469901159cdca73dd2b9460fff11e95daa4a2642cab721a25
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/13ff2a40bc55839bdef76b796db1eb76
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/eb61fe6c0221e8f9d7a626b8d088ae1497155341dafb69835e7d53af76689ae212e1e4621e0729df5d896888c0b2d7354a24f7b57fe1d68f0b35c26bcf096699
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/aa7349724ba1d47256705777e755289a
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/25ab56c44b7d0d5de17344f39071e6894e878e89b5e35412a3c9fe345abd2eef76d7816cabb6407c7c521c3bf67a4741b37ad7e580962ead9275273e431f1fb3
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran3.tar.gz/md5/53087cc770708c57d2654fd0095b64df
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran3.tar.gz/sha512/90961448ae40b0445bf881d0815aec54d2096ad235dc8e3db8d698a72961ef9a97e7fcd08f79c83cd1f7c5a341464f52a90351d927d5f1c3e9c8ee32b17970db
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran4.tar.gz/md5/ee910e19faa961bde11fdf90c211df9d
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran4.tar.gz/sha512/f5cfecfe965991cfd7843eff71efa71d6842058565bb63657e909b2942e58a8c7506aa66335308961e59f392da16e1177d79542ae509795566a14122f67a1782
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran5.tar.gz/md5/fe52ba7ca8e16f37aa04b79248e0471d
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran5.tar.gz/sha512/79b5108886d60f12424709a841e359dc1cf23cef21bb0ee6d1a48043ac48a35dac1637e43c8ebf3f2e10dd34721993a7a12c5776f2975dd5bd7b6e29e1a9adc3
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran3.tar.gz/md5/88d8ff421d29456f1d7670ceaf8867ca
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran3.tar.gz/sha512/91c1bd8142845d11fecba87a719315a14218e3863955ddd2ed82cecd4a2c177a48c660b6aac374ee9a11008245c0ced1bae70eaf5a1a6e3114db02e09a96396f
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran4.tar.gz/md5/3035066a53032b551e49f56b323e941d
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran4.tar.gz/sha512/f218e152a1c92bd374599814612add8010aedc78113cbe06465e8a1ee7f66892bb654cad687aa55555e74f3a65d74608692d41c9f0ce6c0bc63475ef62ab55b7
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran5.tar.gz/md5/f7cf36ac9a0cbb535952ec73f2e6c9ea
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran5.tar.gz/sha512/00ab052d9fa4a72a640545782019f24ed6017b36aa89c5e659ce73b1e821817f560c09f71b26c027c0a05bd13567c71a6d7f5995d1c39ab233bec56cd3a7fd9e
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran3.tar.gz/md5/b65414bb15539e5aa2f5f1c7984edb94
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran3.tar.gz/sha512/847ada020bb92fe6ea81dfffaf855707a529c9c0f7e246e802b9521e5c7d4aa36104d04279c09a905a797184cdf05a6fabf84711b7661ecb14e9ac2fba251f61
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran4.tar.gz/md5/0b626ebb8b3fc49b946723a9a2a21a91
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran4.tar.gz/sha512/b5bba23878399fc1ff20abc2e2eb4acb9691ce982f290e33384732452774a0b447bd0fb01ee696d10ad8b03d99eec905662af92bd3b499d9fe6db419e05d2573
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran5.tar.gz/md5/cb99d7d4944c5283a1a0142683e1d377
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran5.tar.gz/sha512/b77d3225e60f49506917bfff78c187df7157dbc834eccda2fa03d03eef8214b225682888a411a8b6e4b29a8d7e2b0ca625ea8c56b84ecc39e1f4f1012523c096
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/c6e5d4867a068e08b3f56f474e498b81
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/de6249439758a501bfd27d3ef04ec04cc06edf64de73f0709a6a40a2eaf40bd3d5d77dfd54b7b19e2f6bf6c104b4416d3e225faa0cff4cb631785c08d90b8614
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/32e70466cfa3cfec65ab4cad3abc5f03
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/2642385a5e9fc8e9c3839a5a44f9753b21b5078725f7d0c3e1ebe96b76129a3b8e2627d92629dee4f6fd7e8e51e86a7fbedc80cbe4d1a6812cea363559950da0
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/e2332831bd88d57132241697952819e7
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/ad03edf9ac56bf6311f0ca70a1bc359242accfe82cba9e42f39f6cb1c3006226179ff9be8218847889cae10fac13bc33f60837e1e3249e309172da7fbc25400f
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran3.tar.gz/md5/27c24775af446a44a72a28ffd197696d
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/2af8caa33bee88efff84653f3932b04e8fd4aabb1bf16d49fa73657b0ec13c9457fde7ab3f953fc9b01da5c2841c3c9b588e3b0f559b89df0e6268468d1f7cc8
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran4.tar.gz/md5/414e701d918d5fba08a12de6979db4b5
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/949886d388b80e19b944d102852f2bb58ffa03c42e624986dd9dc076797c996634d4a8fc0f04544451d6848c2079969816979e1f68a999b2747e9dd5472be7a6
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran5.tar.gz/md5/29fcf62c0280cc10f91d22189a2e8de8
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/02e75d4ecf9cd922157a72c0ca2e713cf336b125df3982cd5f7cc4f2a04367ad4c2b1190ca2a0a9df8b639c7ebcfc9783066e99dd0b13acde7b02038391e8567
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran3.tar.gz/md5/147d5e8eb2ec78fc8a31bdb091fab001
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/2319eda568800c0b1f2d96a8a36c59b1bbd792c06de1d740aea3f1e49798242426ea8d10c100c42c3c281702e2b4f5b673b6ab5252b276d48542e875bcaa3094
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran4.tar.gz/md5/448857d9c4b2e95afc12a14c75b24055
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/3e7c8cd55e0b15a30992b1e0b48a6e2ae36fd9babf689fa5595c7de94aec401de1d7821d45a22bf14cd5c45c708bc8fa3511d34d732dadd4daaca3f49e200bdb
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran5.tar.gz/md5/3aaf417685b44e0e505208f7b31b981a
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/f7b1d123e48ede93fe624a79d9535a8915bfa3441d7a6f9c6643467027414c9f2538e299858ea98bbb49d4e6d385a6a491063cb1878ac3b0b3d6a8f7ff0a48df
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran3.tar.gz/md5/5723136deaaf4b2e5960fb0774943288
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran3.tar.gz/sha512/127ea8b2b0d8d4586a23a2b8ecbf148d512efe68626e89b0688c3c9e29ed9420b45ae86755c1467313c565f9f3835762051d7086a815b813dbe6e9eb05fb4be1
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran4.tar.gz/md5/80b1b9cf5346916edda653174a987aa2
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran4.tar.gz/sha512/77e1387ec969bbed4945d2a598a1cd04d258265c4b2d5c43af92118eb32e0c69e40619a20ea1835f277febcfea068b241343d44932afef832bdcfd2e9f618f0a
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran5.tar.gz/md5/44dcedf01c938d1a1c67dd3bc90ab61d
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran5.tar.gz/sha512/e490d49b8d41d73ab3e71aca8c691ca58704f0fc6930cbfcc203f97b8db8d83144bad597a2c53ff0c0c4f7c40316d975a1b589a3603873d508f6beeb75970c5b
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/0e8a7e88b54cb836292c289d1c456fa9
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/0e9b3af6839b9c41c950bb4d8b739f0243a890af7092ef9f3a00e4931f2acc3820afb78e40c7bfef716dcd3230c1d0acc7b0b37f30eb47441b476bd7540745e6
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/5fc47ad55780c99ef9cab7ef1b26d9c0
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/c531201e4abddd652efeb5801658f5c1e4891578f181e99d6e41fc0d3bc6347b82e5e928ff8a717ee1e75bb0a6a765260bf7c99fce44aa24c21f1c5a5e3c1e3b
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/dc127f3ab984b5d47b325d5701ab73cd
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/50850911703320894a2e1e996c5de4613b5f9e3012f5cbf591f3677799599c45d9cc4c42cf310bdc6ba91ef550e52f6424bbbabdf47f96748d4669d94e6b46a4
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/937847e2ad00539f3422d1ecb9d26d55
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/751d889661ddd46cd5718b49e34f826a4fb34b1b992251a5a975bc0af15b74a75d8a56f403e8fae570223477b2b8927d9cb36764e4b9e466045d5f317b8e7196
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/180c54c50362d05696589b270693ee8f
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/2e3b76be5b7c4a7dc45f07e17493abd7ef9185e92429d8fa4d38766e0da96dd0777b619a9e420d2e1142bdab2ae1f755f9bc9ad97ee9a7927741778f89b9135f
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/2f0fac7c96af66ea63fce26e409f4db6
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/141522971447c38b4908342f3ad09ffb18142d2e79b44f66fd80047b44c09216c9b94c39f776e3093f9ceb6bc4d6270cbbfb4209b2fc0debfe93e7145cb4dbff
+openblas-5ef8b1964658f9cb6a6324a06f6a1a022609b0c5.tar.gz/md5/f7a1fe86cefbf7d4f2608843c7833ca7
+openblas-5ef8b1964658f9cb6a6324a06f6a1a022609b0c5.tar.gz/sha512/5f6020e958967a12a3c5b18bde13331f9c0602bd073563f35cd7cec848c92b45f30ca362819b12cd16989c0e4641ee3e63db8322d1092f61b31ba2e4068dd7a7
diff --git a/deps/checksums/p7zip b/deps/checksums/p7zip
index 3a3986977e3cf..272f1d768161f 100644
--- a/deps/checksums/p7zip
+++ b/deps/checksums/p7zip
@@ -1,34 +1,34 @@
-p7zip-17.04.tar.gz/md5/00acfd6be87848231722d2d53f89e4a5
-p7zip-17.04.tar.gz/sha512/ad176db5b657b1c39584f6792c47978d94f2f1ccb1cf5bdb0f52ab31a7356b3822f4a922152c4253f4aa7e79166ba052b6592530b7a38f548cd555fe9c008be3
-p7zip.v17.4.0+2.aarch64-apple-darwin.tar.gz/md5/b418adbae6512a13e04407c120ba78e0
-p7zip.v17.4.0+2.aarch64-apple-darwin.tar.gz/sha512/818f7afb0d3ffbff8079f5f4b8e9745a847148ac9cb5a261b6ca2f2f3a1dd722fa93f798645129bc9bc4a48f756bf2e55605791abb394a32635dfaef31f21e70
-p7zip.v17.4.0+2.aarch64-linux-gnu.tar.gz/md5/3f976d6514e6327a9aee4a3f21a25a64
-p7zip.v17.4.0+2.aarch64-linux-gnu.tar.gz/sha512/a4dd8be97c53b864e81aae40e248759f97249fbd6d8c5b91f0ac115a84126cbfc4825ffa3876f5e8b66652b014a78ba04e3ffc1ba1d9c96786b914b1279682c0
-p7zip.v17.4.0+2.aarch64-linux-musl.tar.gz/md5/b31699d7ea671c689fa9194913fbe7ee
-p7zip.v17.4.0+2.aarch64-linux-musl.tar.gz/sha512/5c8d95df66055ab8027b047b23534743ac929befd37dc8a8e591deece22006209f94524f7951de580a5ded9530ead2ce7ec3370c482865554830b53d09f41bf1
-p7zip.v17.4.0+2.armv6l-linux-gnueabihf.tar.gz/md5/fbe2ebeeaa6e5b33dcb71662fb7040f1
-p7zip.v17.4.0+2.armv6l-linux-gnueabihf.tar.gz/sha512/6ca1d7eb1d3f6a7c4dc9860ac3d5a835abce92cddcda015a93086ecde44ed1b3d9f83a3c1e1eddc510af429ec269716dde6bc5fae4aa6bbbc3dcfc9a51326786
-p7zip.v17.4.0+2.armv6l-linux-musleabihf.tar.gz/md5/dae4b1e6060bf4431d3ead53e6b3e167
-p7zip.v17.4.0+2.armv6l-linux-musleabihf.tar.gz/sha512/856c2283c63728d8c542ce5a3d58e38c985f190774c407fc421dd30f05e0ae3467e2844cb7d535aa8a6b8fb24b21b29af75b736fbd9af67c24340609ad6b5841
-p7zip.v17.4.0+2.armv7l-linux-gnueabihf.tar.gz/md5/42b6b9b19158303c4399d651ee5b14cf
-p7zip.v17.4.0+2.armv7l-linux-gnueabihf.tar.gz/sha512/4f8792639db8599af026f592496a8c594c0fd6a62dc949965add55b1b85a95d4edc2f99960cf5b21e7beeb8e1bca1d9c1a1a34600103df04dc20d0509410c486
-p7zip.v17.4.0+2.armv7l-linux-musleabihf.tar.gz/md5/e34f5585a50c0cfce29c79e9ece60cf4
-p7zip.v17.4.0+2.armv7l-linux-musleabihf.tar.gz/sha512/87f72568f5a877008d3a8a032a85f69c29d3af4293d7b42d70419bb4c9ca7e99dc13c6b22621ca83886d07f765118451ee9f2a3aee63979d8070910887bf7cdd
-p7zip.v17.4.0+2.i686-linux-gnu.tar.gz/md5/d917247133b1c62663334b6a908e30e9
-p7zip.v17.4.0+2.i686-linux-gnu.tar.gz/sha512/4839bec129b7fbd68c61d35fd3b3af9863c757d9fec0220926d45f1f58174d88d0bbb4a472d259d1d77775b906e9c58ba707fc20e2a4a060ca9030722609182d
-p7zip.v17.4.0+2.i686-linux-musl.tar.gz/md5/951614fc7597de8c12e0109cbd81bfa9
-p7zip.v17.4.0+2.i686-linux-musl.tar.gz/sha512/f0420ddd6df82d2b3e1ece9cc5cf537cb0803d291d274a495bb9a575bb253a4241cdae38a88e43ddafaab7f6911b310a30c1b874b0a0a9bc447f8c42c5a24652
-p7zip.v17.4.0+2.i686-w64-mingw32.tar.gz/md5/cc81daf0e40990c48db178cb53a95d08
-p7zip.v17.4.0+2.i686-w64-mingw32.tar.gz/sha512/ae5bcbcf32dad20db95319c3c2f874fdbb0cd41054d6c192f2ab106e0aece1b4b0b591055b37c2c909b07b303204a75dec5c4b3c224243c2041da811f99cd7e5
-p7zip.v17.4.0+2.powerpc64le-linux-gnu.tar.gz/md5/e97d74ac4dacfaa215c3119e055a2df0
-p7zip.v17.4.0+2.powerpc64le-linux-gnu.tar.gz/sha512/8b0596ebd84fa9947e8f15f63c426339980e08c81eb4c1474b4a66af6329f0a2fe1bd31eef964d147bf9cf0213e85bdc143fab1a4f1dbfa09da5ebd9e73a3d8d
-p7zip.v17.4.0+2.x86_64-apple-darwin.tar.gz/md5/4d9a26dbfc0a02a812c8f7de20ea5440
-p7zip.v17.4.0+2.x86_64-apple-darwin.tar.gz/sha512/3cba51ba9742b616afec13a14e8e3bd3c73c835256af8f6a49d4abf32f5ddf3f86ac8ae08ffd9bc331caa8a711dd1b63f4cd082443a7863e3d512f6ca2152bcd
-p7zip.v17.4.0+2.x86_64-linux-gnu.tar.gz/md5/37b7570712ecb8677059f4280a346201
-p7zip.v17.4.0+2.x86_64-linux-gnu.tar.gz/sha512/9445add6a475bdfc2924dc52c07917c2746b07a41a2dbfdab8ad4b4e5b87b0192c13f4da5da64e5d3544bbf9c79fda3c633664eecb372e8475031789770c41ee
-p7zip.v17.4.0+2.x86_64-linux-musl.tar.gz/md5/04d6ae950d05c81c6b165721de2ba7e7
-p7zip.v17.4.0+2.x86_64-linux-musl.tar.gz/sha512/524d8ed80a1af903b572d5e32710b384702175cacc83ce2305d7f7a35d45aae7d08e2afc14a9e40c934ba4eb578787afa9bece4f820e96e4b624869cb2bcec26
-p7zip.v17.4.0+2.x86_64-unknown-freebsd.tar.gz/md5/e2a3361e91258e39db541c9dec5a73fe
-p7zip.v17.4.0+2.x86_64-unknown-freebsd.tar.gz/sha512/ecc1db9a1823ebdac290548f6e001688b5d111caede4cbfab4e2ef492dbb31844690e9b69360ed9c6ebb2affded7f352d57c0e5cfe67be951876d1fc5e87d92d
-p7zip.v17.4.0+2.x86_64-w64-mingw32.tar.gz/md5/2b5f77bb31526c469e0fd48399d0cf9a
-p7zip.v17.4.0+2.x86_64-w64-mingw32.tar.gz/sha512/a3a17af4db98b82b71c8d4d09e5315dc4fa77b38cc19f0593654b63744bc7489383d40032e48c2141d6b55e330d1538c527819378a2575a245de436bc6daf532
+p7zip-17.05.tar.gz/md5/de921a08f37242a8eed8e4a758fbcb58
+p7zip-17.05.tar.gz/sha512/97a7cfd15287998eb049c320548477be496c4ddf6b45c833c42adca4ab88719b07a442ae2e71cf2dc3b30a0777a3acab0a1a30f01fd85bacffa3fa9bd22c3f7d
+p7zip.v17.5.0+0.aarch64-apple-darwin.tar.gz/md5/2a254e251901b3d1ddfd7aff23a6e5eb
+p7zip.v17.5.0+0.aarch64-apple-darwin.tar.gz/sha512/8efb9a2c9bcab388e523adba3dc0b876e8ae34e2440c3eee01fd780eb87c8619c7a7bbdc46d703ccefff6aa6ad64c4e4b45b723136ab1f6fd6de4f52e75ebbbf
+p7zip.v17.5.0+0.aarch64-linux-gnu.tar.gz/md5/bb1f3773fd409dbb91a10f7d9d2e99b5
+p7zip.v17.5.0+0.aarch64-linux-gnu.tar.gz/sha512/e95ccc342be644570d218d25403b91a7db9ee983fbf8cce3deff453355d68d426f9301eaac865a98691025b596b8cd77ebebf6184c0eaf8b2f294bc6763b9a4b
+p7zip.v17.5.0+0.aarch64-linux-musl.tar.gz/md5/3fac518a6a70412294d71ca510958cf2
+p7zip.v17.5.0+0.aarch64-linux-musl.tar.gz/sha512/fc127790739bf8a8b918b2e83753d86f5e79ee8706bde4cc79d74d9f7d846aae99a109da4b2b3cc92ccedc1eef4d52a555a65a95f588e173e0fecc11f2ca21e6
+p7zip.v17.5.0+0.armv6l-linux-gnueabihf.tar.gz/md5/355410848192de3b02d12fd663867f4b
+p7zip.v17.5.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/8f103b41e755d157d70dacca89a0ef4610bea109686b4005e8edd5f79ed2e6419c00c2625d0ab90e6e33fa389e670490d8de263c0bdae952cc34cbbf440e275f
+p7zip.v17.5.0+0.armv6l-linux-musleabihf.tar.gz/md5/34363b227306fce34a728af54b71064f
+p7zip.v17.5.0+0.armv6l-linux-musleabihf.tar.gz/sha512/8dd7b37ce6223c9fedcaa999eb806eb6dec8c4a3133d3c07e2456cb8543b8e4f5b881c1bff2d2e25f19b1312b18673e9013aeff87d6a274eec6c451b1ba0d6b9
+p7zip.v17.5.0+0.armv7l-linux-gnueabihf.tar.gz/md5/dbb1fc0cf3bea674442ff8cc932a94cd
+p7zip.v17.5.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/c4d71d905fa420391417786ed206a0c334475dd0df8baa1fc3f6560ce548db11805003d0d0b35bb622fe818c761f2b0abe0796d1cbfce2a922da69e697f056a2
+p7zip.v17.5.0+0.armv7l-linux-musleabihf.tar.gz/md5/d188b5dd453faedb616ba9c48fdeab6b
+p7zip.v17.5.0+0.armv7l-linux-musleabihf.tar.gz/sha512/ea30a775370502ca9e271b87cbda528d0c51d63ce0df41883d4dbc1527a32f251d797f3692fcf9b883b5fbaaad80515b971a8f8fe09ba102978b19a0ecb58528
+p7zip.v17.5.0+0.i686-linux-gnu.tar.gz/md5/dc02bdde045a0b6b22cf14d6960e63ed
+p7zip.v17.5.0+0.i686-linux-gnu.tar.gz/sha512/d2d0dd14a5fc1163fea2276e0925bfa8d075d5dba1d8018e4e3160977d3b09642b2e521d8e57d049abaf0e2ea391a846f0b0136b3c59e8b476c8c52ac5210447
+p7zip.v17.5.0+0.i686-linux-musl.tar.gz/md5/0b8658147938a8ec109ee2b3b0a0665f
+p7zip.v17.5.0+0.i686-linux-musl.tar.gz/sha512/411b2950f5928c537b87ba0651c09c08e57afed765db9fee89eda8b12939ef0da94c8ba38c0a24ba46b4513a0e4cca798eb09f2b20a011099ed3cf14455dd19e
+p7zip.v17.5.0+0.i686-w64-mingw32.tar.gz/md5/98bdd8767c77a35f71303ff490a3d363
+p7zip.v17.5.0+0.i686-w64-mingw32.tar.gz/sha512/14f08071af74297df8bfe1d9f7efa3c0212e62ace573848f17b729e4c36dc3861110f3c5cc9315364c318e5b040736443a24492e86d76161993653a309996eb3
+p7zip.v17.5.0+0.powerpc64le-linux-gnu.tar.gz/md5/b18c917b9852898a9b9d6d24bcc6863e
+p7zip.v17.5.0+0.powerpc64le-linux-gnu.tar.gz/sha512/0148dc8a0bc9c95212d7f8e2f92ee24e968eb7290fe72c7ae02e286bf5c05dd6b1f10b32350a7ff37777ed5a8cc21f3303f464620f3394c7a4690ae98bf77299
+p7zip.v17.5.0+0.x86_64-apple-darwin.tar.gz/md5/da31752a2556644d39e48649bb0111de
+p7zip.v17.5.0+0.x86_64-apple-darwin.tar.gz/sha512/0695ad111263d2fadfdf9a46ce7ee80def0bf60db7d1c2585ed2af6fc945fb169311a9f1ffc6f95fb43b0b03694d2d1be9136d3d78ba2ef2b19228987883a385
+p7zip.v17.5.0+0.x86_64-linux-gnu.tar.gz/md5/2fb55d86e4eaccb0488bd637d088b996
+p7zip.v17.5.0+0.x86_64-linux-gnu.tar.gz/sha512/38ac355157d59c09f308fc29964d0e9c1466c9633efd8d3c6ff3c738abce2af45ebc6b92a29f56d5e7baa4871f9f39b14ecfcbedd4e2f4ca7c0fe6627c6b13e7
+p7zip.v17.5.0+0.x86_64-linux-musl.tar.gz/md5/f0bd567a851d2dd9d306552ffafbca3a
+p7zip.v17.5.0+0.x86_64-linux-musl.tar.gz/sha512/e60047a6e7e3496cb6658f87c8c88676f399cd9f3d0d7daa880b6be09cd5525f7f22776896f1375722b47555514ff8c018f02ce800ec3fd0ed922e16e8a6d657
+p7zip.v17.5.0+0.x86_64-unknown-freebsd.tar.gz/md5/d37bd26e39a3ec84f262636f70624341
+p7zip.v17.5.0+0.x86_64-unknown-freebsd.tar.gz/sha512/0604a880c19f9d72d5828edd75be641625c29f230b3a5e7d70ec3812c014c96b76ee7b45e0e80f49be63f109a48700e75d1e5be01b5ae7b46d42dafda9885e8c
+p7zip.v17.5.0+0.x86_64-w64-mingw32.tar.gz/md5/f02c7b2481dee880b096340a8735350f
+p7zip.v17.5.0+0.x86_64-w64-mingw32.tar.gz/sha512/08b717c1b072d1309f6af8973eb09b1a482abb7ae7d01fba79873d4310a7c11292e2e8779029f99cc60627ed0d064224bc87782e587c520f970b840b7b838052
diff --git a/deps/checksums/patchelf b/deps/checksums/patchelf
index a7122c400749a..6392e44d8f2e8 100644
--- a/deps/checksums/patchelf
+++ b/deps/checksums/patchelf
@@ -1,2 +1,2 @@
-patchelf-0.13.tar.bz2/md5/d387eee9325414be0b1a80c8fbd2745f
-patchelf-0.13.tar.bz2/sha512/43c3f99fe922e2f34d860389165bcc2b0f3f3317e124eb8443017f71b1f223d96a7c815dc81f51b14958b7dc316f75c4ab367ccc287cd99c82abe890b09a478d
+patchelf-0.17.2.tar.bz2/md5/d76db4f1a27b0934d0b0d0585b081c0f
+patchelf-0.17.2.tar.bz2/sha512/8277adf95513f88fb190536a38bdfdf438a4cc7685d8a130bdffbe064441f0f25095b6c83bbb190133e1a138963776d15b46c247dd2f1a073a1bfe1d1dbdd503
diff --git a/deps/checksums/pcre b/deps/checksums/pcre
index 9b9717b61688b..744d16540d6c8 100644
--- a/deps/checksums/pcre
+++ b/deps/checksums/pcre
@@ -1,34 +1,34 @@
-PCRE2.v10.42.0+1.aarch64-apple-darwin.tar.gz/md5/667a570d341396c3213749ee1e5b5fda
-PCRE2.v10.42.0+1.aarch64-apple-darwin.tar.gz/sha512/c1bb99e8928efded9b0ea3f294ceb41daea7254204ca30c0ff88686110ccd58138d8ea8b20b9a9d6d16a6d8d3f34e27e74e7b57d3c8fe6b051c9d8fa6f86431a
-PCRE2.v10.42.0+1.aarch64-linux-gnu.tar.gz/md5/1a758f275ff3306fbad7698df7b9b7be
-PCRE2.v10.42.0+1.aarch64-linux-gnu.tar.gz/sha512/d09508c0b255366d01f1b4d1ae6748a8e47f18c451498d30715f5f968784990949dab7540cd086396abd912f61b5f7c44c8c72a27efaba0a7fc08b71a167c057
-PCRE2.v10.42.0+1.aarch64-linux-musl.tar.gz/md5/e61147579fdc9b57a61b814bdf9c84bb
-PCRE2.v10.42.0+1.aarch64-linux-musl.tar.gz/sha512/eecaf4c1937fc04210b910ac65318524c02d690e8c4894c38e74eaba36d26c87a1fd9e1cc36f4307a11ff3552a79f081fa8f05085435eb34872dc2fdecce2d18
-PCRE2.v10.42.0+1.armv6l-linux-gnueabihf.tar.gz/md5/b4c484a3b87923c0e2e4d9cc5f140eb7
-PCRE2.v10.42.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/5931cf13d340971356a9b87f62c9efdb3656ba649e7b25f1722127a3fd70973d94c815a37b43cecab8eb0ed8d1ae02ef1a0c0a12051852c1b9242c3eaa01c496
-PCRE2.v10.42.0+1.armv6l-linux-musleabihf.tar.gz/md5/bc7b5bb1c5b0b99c121bad5a89299ca7
-PCRE2.v10.42.0+1.armv6l-linux-musleabihf.tar.gz/sha512/86b5ad4fa6f4b5bd1a76ad68ddff4b39916d0ed0acc03a3fee8eab5256aaed53abc0ff4ce9d9d9f8b9203c087211684da92fe6aa06ff5bc331ba1b3da2cba57e
-PCRE2.v10.42.0+1.armv7l-linux-gnueabihf.tar.gz/md5/3541eb26fa5a4d13e2c7d063dbd900d8
-PCRE2.v10.42.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/872181f931662edaf653351486c5e2a700e94cfa0966ca90eca893fdc75dd46eb40d9d45737c198aa4b9ad8ebab33fd78697ef35906985e4e1c9748ddf58d363
-PCRE2.v10.42.0+1.armv7l-linux-musleabihf.tar.gz/md5/fe059feb18fcc9312f1033362070fe34
-PCRE2.v10.42.0+1.armv7l-linux-musleabihf.tar.gz/sha512/5a96acf3908c964ccb4f296c449499388ed447d9a094c2760c979e02ef656fa710ede3926b9626e89fb5b0545c111e6eedff21e48416e923c17fc9ff129d0519
-PCRE2.v10.42.0+1.i686-linux-gnu.tar.gz/md5/d6c804ae6cc661d039ee3edd2f1dbcb6
-PCRE2.v10.42.0+1.i686-linux-gnu.tar.gz/sha512/256ca677b169854686ca34cf30af5a6709758b41b65f2c66d497c552858770a69a49834fd16daa2f7d481964b21a2e3ec68ff1b1fbd08f4e2257ec46b85c0063
-PCRE2.v10.42.0+1.i686-linux-musl.tar.gz/md5/092af10d8182cb4240cdd975efce4d7c
-PCRE2.v10.42.0+1.i686-linux-musl.tar.gz/sha512/79a48f4fd50ffdf49c8d57581e01ace38c1b3d7edd86d44db44b8efd93074d16faf035131a0d60c6631b8bf22f0fd8296acedba45908da56e8096c296122f047
-PCRE2.v10.42.0+1.i686-w64-mingw32.tar.gz/md5/bafc5fc1621d0f4fb2b7b271e2f66db1
-PCRE2.v10.42.0+1.i686-w64-mingw32.tar.gz/sha512/a5ac2b788fb2e4baf129fb339f28da04e9a5c9b7dbba0a1f43da2a7193917d361d961ba48abd0aeec30d2352ebaa401d667c8eec81c5f40859ef8adf8487edca
-PCRE2.v10.42.0+1.powerpc64le-linux-gnu.tar.gz/md5/0de1215b2a1e9c0efd131355e9fbf2c1
-PCRE2.v10.42.0+1.powerpc64le-linux-gnu.tar.gz/sha512/69dae12627685ae665db8c91264a79aba7c60ae97eccdc79ef889f2a5f69b465fa333aba298fc90bbb95710cfc324e3630bc427a97577855e8fb6c8fe227cfec
-PCRE2.v10.42.0+1.x86_64-apple-darwin.tar.gz/md5/c5c52b399921c5ab81a5f598b350d2ca
-PCRE2.v10.42.0+1.x86_64-apple-darwin.tar.gz/sha512/e6c8ba3aa3fbf54b37079301ab317104c6852812b23835f52ca40f31f0831678172d32e077fbaa712a8a2cb16d62bb97d475827004353e7807922a2d6e049b28
-PCRE2.v10.42.0+1.x86_64-linux-gnu.tar.gz/md5/b074dd1f85e24e723349e566350e2c78
-PCRE2.v10.42.0+1.x86_64-linux-gnu.tar.gz/sha512/236017e02c9f32b913b772dbf22897c8460e5791f196c86f8a073e329ad8925f6859afe48f3bf18ca057c265f08fedbde255360d8f859e2303c6569ab1b0e1bb
-PCRE2.v10.42.0+1.x86_64-linux-musl.tar.gz/md5/9f32ca77e79843fc9c4b5fc8ed336d11
-PCRE2.v10.42.0+1.x86_64-linux-musl.tar.gz/sha512/334a31724e9d69c6517568d922717ce76d85cf87dbc863b7262b25ab43c79734b457833cd42674eb6a004864e5c74da3ae1d0a45794b4cd459eea24d9669fac5
-PCRE2.v10.42.0+1.x86_64-unknown-freebsd.tar.gz/md5/76cde3c509ed39ca67a18fe58e728821
-PCRE2.v10.42.0+1.x86_64-unknown-freebsd.tar.gz/sha512/219c82067a242554c523be5be2b5561cd955609eac1addc336004df64a2a12e815ea40ff94d3f610970f7d0215b410f098d4baaa2c722f5cf21dab175b288b7e
-PCRE2.v10.42.0+1.x86_64-w64-mingw32.tar.gz/md5/b0771d5b0132b554776e7cee0e1374e6
-PCRE2.v10.42.0+1.x86_64-w64-mingw32.tar.gz/sha512/d4435ff703e51c88df7764a732d6b67b1ee4d3b09b915ac822af05a33347642691837818d4c389226ef1d70cd69dbac792ebe1e7de1d8900443fe162051916ae
-pcre2-10.42.tar.bz2/md5/a8e9ab2935d428a4807461f183034abe
-pcre2-10.42.tar.bz2/sha512/72fbde87fecec3aa4b47225dd919ea1d55e97f2cbcf02aba26e5a0d3b1ffb58c25a80a9ef069eb99f9cf4e41ba9604ad06a7ec159870e1e875d86820e12256d3
+PCRE2.v10.43.0+0.aarch64-apple-darwin.tar.gz/md5/f1bee27b8d9465c14eaf9362701fb795
+PCRE2.v10.43.0+0.aarch64-apple-darwin.tar.gz/sha512/33b8f6e3703f0a52cd2d57897c28e35fb3c63af459296a2fef4e414dc99239617833b2ab176068d6aab690122a34a9ab9b6042dfff54b5a30ad60429a809818d
+PCRE2.v10.43.0+0.aarch64-linux-gnu.tar.gz/md5/c55a569260e302f315f4a1bd185346ab
+PCRE2.v10.43.0+0.aarch64-linux-gnu.tar.gz/sha512/be4d2883e69d562898a157424b2baa146fe79545a8c10935cf25b54e498ca2c14fae026fa0d958d175895fe2cb695d0f96ef7f09fecbf54e1cee4a55b81a382b
+PCRE2.v10.43.0+0.aarch64-linux-musl.tar.gz/md5/fb041ccace415ccc26263968c6435a47
+PCRE2.v10.43.0+0.aarch64-linux-musl.tar.gz/sha512/06672ebe18e0f6bfa1dd2d5c02e10d9fd67236a73fd38ee2e8f4496d98f297f7866760f0be3b9cebeca348a5d748a3719e416b84cec96a90c71eac55afbbd905
+PCRE2.v10.43.0+0.armv6l-linux-gnueabihf.tar.gz/md5/4f303a4cbf26abb7bf4ffb8bfe3d636d
+PCRE2.v10.43.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/dddb3b227ee48d8329f6c65c5d0fce9f460eccaec98594a05bf28d1d9af01397cf7ef86c96e88b0e96030a7f6d8406461f78dd5fa558db8fc8f7bfb3b522ed54
+PCRE2.v10.43.0+0.armv6l-linux-musleabihf.tar.gz/md5/eade1fff90404bf3584fd15b62be0cfa
+PCRE2.v10.43.0+0.armv6l-linux-musleabihf.tar.gz/sha512/351f6fa11c39b90fcc4086bd00b1b1126ed92272595f0b745757ca4e7e360c84d244446a871029245c3bcf838b23f42d908f858e44fae7deb9002a36cb76753c
+PCRE2.v10.43.0+0.armv7l-linux-gnueabihf.tar.gz/md5/daa0a34b2cf0b71a6f8e1f9456cd4b06
+PCRE2.v10.43.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/ae72956ae7a9a5f315bfc816fdbb500937a170dfea306a28289ec9eac57d883cf2fa5a467ce9406eea80546b632a272c63bbb48b89ebe6d9f69d30366fd84180
+PCRE2.v10.43.0+0.armv7l-linux-musleabihf.tar.gz/md5/90bfb9e4efd7c92a2bb6a1a48fd88ecb
+PCRE2.v10.43.0+0.armv7l-linux-musleabihf.tar.gz/sha512/147ac98d82fec4695de0c43c87d3d9242b9c024bc6df7ad7504d17ef6a12a029ed703c4deade0e2b24faf5283d66309f880d62f8c4834f27b2cc8889587d7abe
+PCRE2.v10.43.0+0.i686-linux-gnu.tar.gz/md5/6fde649bf449c4122438fff32c0706ab
+PCRE2.v10.43.0+0.i686-linux-gnu.tar.gz/sha512/edfaa15490497723c095eaa5df26194637b0606e9dce7b89b400024ef8ac42e21f010bb31c2cee5c735ce82fc8de0c42bf2b35b095a1e70a9a111d3bfba6da64
+PCRE2.v10.43.0+0.i686-linux-musl.tar.gz/md5/73aa8d13cc48338a5071e30b3a899109
+PCRE2.v10.43.0+0.i686-linux-musl.tar.gz/sha512/200e2d3ffd68f49b76c70a5be80cb0ae9703049214674485a2ab24abaaea7aefd6dec2042a14bd48cc52b04379f57322ec1e1788dc8c00896e1074921725d9cc
+PCRE2.v10.43.0+0.i686-w64-mingw32.tar.gz/md5/4ddf0f31c97463e5216ed71afc4fb014
+PCRE2.v10.43.0+0.i686-w64-mingw32.tar.gz/sha512/75903d81668a66a5c4d830e31657391d507883943d86245998f224655406dcc6a95ba4f5fad20dcf608a98d6ccf49abe50107993448669b03c42a878d8466611
+PCRE2.v10.43.0+0.powerpc64le-linux-gnu.tar.gz/md5/64cb71080da1c97eba3a440ff53d298c
+PCRE2.v10.43.0+0.powerpc64le-linux-gnu.tar.gz/sha512/16348b96a45c7a7d86775cb1d082b4d1c060e5a8acfb37554885d8da0db87430d8a40f834f008a90f4a7b1c07b8329df96836ba0430ecec506a143b7347bb101
+PCRE2.v10.43.0+0.x86_64-apple-darwin.tar.gz/md5/31bbb2485f5e06c3616fb061ffb2f022
+PCRE2.v10.43.0+0.x86_64-apple-darwin.tar.gz/sha512/3284ee63ed1e5631267efacb354a1d90bd1b7db0bc81d7233c9580eee4a9af06093c1c4f240786c34299df89a36a17ed92598fc302074f5a200c56cc96081bf1
+PCRE2.v10.43.0+0.x86_64-linux-gnu.tar.gz/md5/2fb7e0e9bbc32dddf543f4d395b50d3f
+PCRE2.v10.43.0+0.x86_64-linux-gnu.tar.gz/sha512/5a533a3a01f817689077377835dc88edf914459ed0df7323f8f4dba602a47fd6af700075feb1f448221366b1cf7e2d717c615a5c506eb4ca2db9c600fd290fb0
+PCRE2.v10.43.0+0.x86_64-linux-musl.tar.gz/md5/b432063c93aa477dd0883428191041f8
+PCRE2.v10.43.0+0.x86_64-linux-musl.tar.gz/sha512/36475e90e29d7324046fe1da669fb37f667245a680df23f3978394964e14eb9bda3fd56703ad62cd56e27a5af77d8b6b9612516457ae803cef0627bd919e4628
+PCRE2.v10.43.0+0.x86_64-unknown-freebsd.tar.gz/md5/6124870a991e70c2ed8a64d8f3258760
+PCRE2.v10.43.0+0.x86_64-unknown-freebsd.tar.gz/sha512/4645a2d05af149467f2e4ce5e48853b57c585d6a5950c70726d04bc71a5d82f50809af141ad98e99671e764ac74965651ecad1c49a849caa8fd077c7f4911c7c
+PCRE2.v10.43.0+0.x86_64-w64-mingw32.tar.gz/md5/cc4e9f45471f538c1fefa657ab99b878
+PCRE2.v10.43.0+0.x86_64-w64-mingw32.tar.gz/sha512/eed45e621263cb307b6e8ab42e2c12cf9e1d61ad523760fd721a85765c359b74d580752ca7c3d222e0cba26a74e872a6d43dbf2dbf08e4733a3e709417e48651
+pcre2-10.43.tar.bz2/md5/c8e2043cbc4abb80e76dba323f7c409f
+pcre2-10.43.tar.bz2/sha512/8ac1520c32e9e5672404aaf6104e23c9ee5c3c28ad28ff101435599d813cbb20e0491a3fd34e012b4411b3e0366a4c6dfa3f02d093acaa6ff0ab25478bb7ade9
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index 88f5195bb021a..acec99b39879c 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,34 +1,34 @@
-SuiteSparse-7.6.0.tar.gz/md5/79a68c2eca2d4b213a304bd08180c630
-SuiteSparse-7.6.0.tar.gz/sha512/6cffb283e7620afd31c6f7517ca9d28c2927515af582680d346e0e8a4ee1289daec0b4d83660f88f309e53e54a87403291fc4b0b45fe50dffd59781ec0129a8f
-SuiteSparse.v7.6.0+0.aarch64-apple-darwin.tar.gz/md5/dd7cb2ded54840648d9bb28099dfbc66
-SuiteSparse.v7.6.0+0.aarch64-apple-darwin.tar.gz/sha512/64edba78b463dffebf7a1dbcc21665a8229fc795f22ea669e648b03595bb2bc646d0e5d8d9326adca525f325523f50bb8aeb1f1d6da12ba2c465f16f87ed6726
-SuiteSparse.v7.6.0+0.aarch64-linux-gnu.tar.gz/md5/f7e30ff16e597cae91031dd84fcc790a
-SuiteSparse.v7.6.0+0.aarch64-linux-gnu.tar.gz/sha512/a3bfdaef6a2108d0179de3a3fa56c58af4a6cc36d12f987baa6ce15842bff2473c5ea92e5ce0e28d97744c5b8ffae5fb178c14a2e504f8d0291d5dc67370836d
-SuiteSparse.v7.6.0+0.aarch64-linux-musl.tar.gz/md5/763d9208eb9a8d134d8a0fcdf01745c2
-SuiteSparse.v7.6.0+0.aarch64-linux-musl.tar.gz/sha512/657d78d577dade8ddb6b863208c3baf9e15e4cae83853f3730dce49a5f227eb48164f60b39cfd4a22f1d4bcf6d9c6e4b3fcf340bdc6c36345c60fb235bc2cfd7
-SuiteSparse.v7.6.0+0.armv6l-linux-gnueabihf.tar.gz/md5/7c71369a2edfe3eca0061339b8a30e3a
-SuiteSparse.v7.6.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/bc197a50a1031e17721a63405438c2705da456c5ad97dcae0ccc538b0a5d46fa474b49e14a8d3db3d32402e7234e4d9523659a0ec2318901b5e6cd8a5fe7ea8f
-SuiteSparse.v7.6.0+0.armv6l-linux-musleabihf.tar.gz/md5/2933e936ec3cdbbbd015206124d2a98e
-SuiteSparse.v7.6.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d3e3382b982198b5d37928ca0e2354bb110b327069b9dfeb72e9aef1d82e79e5a1536679112c0c82c20acf7e005269f0168c84d498b4ef87c5521bd18b133134
-SuiteSparse.v7.6.0+0.armv7l-linux-gnueabihf.tar.gz/md5/df109c03141c3c34dcbcee180c255539
-SuiteSparse.v7.6.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/cbffe6e5c023f3efbb60a78d6c403feb2b2ca0ff4cac4586c82ffba65d87a4bd1cff63370abfff91882d64e6cf2bb2e10e792ab21f4b7380f413c6a78b89f242
-SuiteSparse.v7.6.0+0.armv7l-linux-musleabihf.tar.gz/md5/5a82f651831d0e83214791156637c38b
-SuiteSparse.v7.6.0+0.armv7l-linux-musleabihf.tar.gz/sha512/0436c0a83d2fb40f21cdd49817182a12824b903050fb47a717a55040445393e5312596e27654a0dabe059059b8dabf82c37e4754a71f189ee1561ddeb11098dc
-SuiteSparse.v7.6.0+0.i686-linux-gnu.tar.gz/md5/b58d6186f54c4f10d1e37ac1b82ebd1c
-SuiteSparse.v7.6.0+0.i686-linux-gnu.tar.gz/sha512/7d3f1cdd2f9786fa072d8820e4ec424d282595051aa2355c84de0530c957459408f7a4d7eb213863fe91a78c03b96346f8c6f8c088ee6e2bad91c576a5e7e39c
-SuiteSparse.v7.6.0+0.i686-linux-musl.tar.gz/md5/c43f347674f1d983d76d453a1ce26c63
-SuiteSparse.v7.6.0+0.i686-linux-musl.tar.gz/sha512/467a4e3af813c7a550b097699ca45083fcdad4ad04689bf53c302b2cf769962c23b62bc39a7afa851c4a1226cd6fbb37e1e8590fd1dbca3c680f7138eb029a8b
-SuiteSparse.v7.6.0+0.i686-w64-mingw32.tar.gz/md5/cd92ad4bef9109ab60357b78ae3c444b
-SuiteSparse.v7.6.0+0.i686-w64-mingw32.tar.gz/sha512/41557b4e4254469bd1f264f0bea4335761ee18328e22d3626dd9ddb69ab8501c40d31c2848ac3c28b361454d80b8d97514478b8db96a335aa4b8580eba22de93
-SuiteSparse.v7.6.0+0.powerpc64le-linux-gnu.tar.gz/md5/8ba38d83ee767ef1a3108ad2fbdc9edf
-SuiteSparse.v7.6.0+0.powerpc64le-linux-gnu.tar.gz/sha512/37ae98d9557ae8d2ce6edbcf2b18f49690b98daa5a19fab2cda747d2f19c735430031414fc8f23bca35899cb21995170e6fe72623dc6dae25b207870b9396698
-SuiteSparse.v7.6.0+0.x86_64-apple-darwin.tar.gz/md5/21cedd18cf9f5aa83b1bdaaf2d4253af
-SuiteSparse.v7.6.0+0.x86_64-apple-darwin.tar.gz/sha512/e86ea34ac3d1d1123be7d6989893948d26a98ae28198740272c108c434650d1616f89c9176ea51c0dbc3550a1a4f0611935eec29b2aafd96d80717c99916a2b1
-SuiteSparse.v7.6.0+0.x86_64-linux-gnu.tar.gz/md5/10674b26e56d1cb4345012b965cac926
-SuiteSparse.v7.6.0+0.x86_64-linux-gnu.tar.gz/sha512/d1dba73e5449768ac728923edf665f1c11e6d54654ab14190fa3c7cd128642b2dac5b21f3c2d91313ab35e5054e6406542e074b6de2b8c0bce49269a9ab89dd4
-SuiteSparse.v7.6.0+0.x86_64-linux-musl.tar.gz/md5/9f2b78216d7474b064c710f6966b15d9
-SuiteSparse.v7.6.0+0.x86_64-linux-musl.tar.gz/sha512/32a3c124ca8041390b0e6d4ebcfe536d25c88fee28e178fb78279e52428613cb6e109ec99b3801331740985226dd40684bf20f75dd5c99c4c74335177d3a8307
-SuiteSparse.v7.6.0+0.x86_64-unknown-freebsd.tar.gz/md5/724ee53946f949618be56ded74c15d29
-SuiteSparse.v7.6.0+0.x86_64-unknown-freebsd.tar.gz/sha512/2fc9b2fdcabfe8e98a14f74ad1f94689cbde704e9f5635f6e5677af9090920627e46e903790231edb4de998eea3a732ad88b0c3ebc2ae5a158433b487e739340
-SuiteSparse.v7.6.0+0.x86_64-w64-mingw32.tar.gz/md5/57c4bd66dfd1bdf824017150d798f8aa
-SuiteSparse.v7.6.0+0.x86_64-w64-mingw32.tar.gz/sha512/26d9c15de1b5b68b9f01775a758f322b5a9f8600c66fa713b5de1009a90cec8e6db755305c19485406f9a9e88d752871b70fff5e336d09fca9bdf61bcb4de3ba
+SuiteSparse-7.8.0.tar.gz/md5/ad42a80d28bb56a1fce15f6e7332e04e
+SuiteSparse-7.8.0.tar.gz/sha512/91aff0aee26e938ba88a8f92db15b0db0ecc6ada3b60153bb299f53a45ccda131db4bc66f890c220034c900180d0bb3a5fb3e2686fec7d6174f5900a3ee64424
+SuiteSparse.v7.8.0+0.aarch64-apple-darwin.tar.gz/md5/38379e14a53663a9c23f32ed56801676
+SuiteSparse.v7.8.0+0.aarch64-apple-darwin.tar.gz/sha512/3f2a7aa7778a22d150bad9ecb8d03edfa75707a07545e65660c8ccc4b0a9fb058ccab29e21e4728741d40d390d28922d521d3841e16258cf8e26acacadfc1fbd
+SuiteSparse.v7.8.0+0.aarch64-linux-gnu.tar.gz/md5/bc52c7df0a442c0fb9aafb83d60878f4
+SuiteSparse.v7.8.0+0.aarch64-linux-gnu.tar.gz/sha512/436e79ea0774d6ffb571b513e385ef48d9cc70b72010cffdc23d606ad6c8984c8b49e2422ce8881def0722f3f608e4ecb87e6752dd80cf7988addd330c5ded13
+SuiteSparse.v7.8.0+0.aarch64-linux-musl.tar.gz/md5/87e4c2588efc39723621ac5010ddf2e5
+SuiteSparse.v7.8.0+0.aarch64-linux-musl.tar.gz/sha512/17115826716bb48f16e4593941be275d47012d112e54d8826c75fde119ffc9f66accd02353b309365b59779d7af3ac220f31ab7cf7eea165b209a93ecdc4102f
+SuiteSparse.v7.8.0+0.armv6l-linux-gnueabihf.tar.gz/md5/b1490603aa129942d8e4c9581853cd0a
+SuiteSparse.v7.8.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/e23c3532784e295ae72b811d285c3729c3f8ac1b5ee1621e831b6b2824a5b357e4bfa49e09174de7763fc3ebcab6b84ef16536bc1cf6f4bc0543b1b229209178
+SuiteSparse.v7.8.0+0.armv6l-linux-musleabihf.tar.gz/md5/f8199358882f76dd30bcce741b837de1
+SuiteSparse.v7.8.0+0.armv6l-linux-musleabihf.tar.gz/sha512/2c8d4ec21bfe253d3d32a5f5f09601b9b2864149f63f53067b157f5f7315fb04236bf5b19a1e5b4569e2c73127dcbb1703d56c7d06fc3ab9ae155902b7a1c2a9
+SuiteSparse.v7.8.0+0.armv7l-linux-gnueabihf.tar.gz/md5/cc3aa1a013cc91e7076dddf20fba9f60
+SuiteSparse.v7.8.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/a6b8cfbc345a089f12e55d8d44061dcce30f94c2d79fc520d6c5dfe433ac2e362d049fac72278cb59d4b3760ca08d5e350b7e2658fa5e8c77ce8608f67c2c4c4
+SuiteSparse.v7.8.0+0.armv7l-linux-musleabihf.tar.gz/md5/0d7797d31c30c53bf219cdc0a48e64dc
+SuiteSparse.v7.8.0+0.armv7l-linux-musleabihf.tar.gz/sha512/a7df8938ee6a04f62169bedd29c8408951cf33a43e0f529fb4d1e360bdad6462a50b2af297adb5f51fd726e1ced1fc8fcda7feeeafbeb44000bfe02a8e29c29e
+SuiteSparse.v7.8.0+0.i686-linux-gnu.tar.gz/md5/e48fa3d2e00f210e964c21e4ff27efae
+SuiteSparse.v7.8.0+0.i686-linux-gnu.tar.gz/sha512/3088c2af476285eb8549cf6aa56381156d49513a274348f86fbf01aa9ce0712961471f83fa50b261f3f365a302b88eb20ef0bb35b58c07a2cfb5dc337fdb72c1
+SuiteSparse.v7.8.0+0.i686-linux-musl.tar.gz/md5/e55202dbeca107a0c25a4f09d5d68915
+SuiteSparse.v7.8.0+0.i686-linux-musl.tar.gz/sha512/0f4de2e62016914b4d1bcb9b13bd8cb2bebefc5f0a532e103948b9aae79a20462ac7b74a3e968d4f99076c37dbbafb747699cd151e831ff89d297f78478fb84f
+SuiteSparse.v7.8.0+0.i686-w64-mingw32.tar.gz/md5/cb971bc1042196e527f95015c8bc5ef8
+SuiteSparse.v7.8.0+0.i686-w64-mingw32.tar.gz/sha512/d445a7790e3ac5392f75c9f4ec30cd1c812354b04388b4c6c6cea2423d2f0dac7173b17a8a2b7a7f4af10321601f96819a7702f9beac0397d85916d99493bc39
+SuiteSparse.v7.8.0+0.powerpc64le-linux-gnu.tar.gz/md5/12058f122b548a37070770d1847f3ce9
+SuiteSparse.v7.8.0+0.powerpc64le-linux-gnu.tar.gz/sha512/f375feeb8448ea90ce8d9f31c7e1230f6868316f06094ba0155069dded4f8da2e1b54d462ef9cfc77abd76147740d4066236dcf1fcea91f8a7141819962ad0ae
+SuiteSparse.v7.8.0+0.x86_64-apple-darwin.tar.gz/md5/1bd473f2a25f1ebcea8acc858e2594b4
+SuiteSparse.v7.8.0+0.x86_64-apple-darwin.tar.gz/sha512/034af137deee5bf0ebf3746745d09ad50ce135cd4768a2049bb9811478ff90e6ed8e2c990e277b4c3b38a3a5e9eaa856938eb86239ca445fa64b6dab6af7e996
+SuiteSparse.v7.8.0+0.x86_64-linux-gnu.tar.gz/md5/c58a86d9f25e6705941105d9e41f084c
+SuiteSparse.v7.8.0+0.x86_64-linux-gnu.tar.gz/sha512/56447062802f01815ffb014624423c6fd3ab6e16b642b2fe37972a151b02865965c95ca3d1a455c6d51cd31633aea8a732b235b55d68e6779c17b293c488fa43
+SuiteSparse.v7.8.0+0.x86_64-linux-musl.tar.gz/md5/ba6e10ba61c209df94f18ab51fe2dd90
+SuiteSparse.v7.8.0+0.x86_64-linux-musl.tar.gz/sha512/3b8fc504cfb4a3b628d5b955a482bad08c85e09e529f833855a84b847721247aaa469f96adef6b218a1ba5896cde91664cc819ba33115e3cc309e72140841ca3
+SuiteSparse.v7.8.0+0.x86_64-unknown-freebsd.tar.gz/md5/a50c69142a42c14edac4ce94b86b138a
+SuiteSparse.v7.8.0+0.x86_64-unknown-freebsd.tar.gz/sha512/963be0dccd1a594df08fe5135ef4ac13e1d707841c3e97d31ba5477d0d6ec26bad9be1c52d9fd78f199740a53950353adbdd767469f3bf01ea1e3ee843eb6c1a
+SuiteSparse.v7.8.0+0.x86_64-w64-mingw32.tar.gz/md5/7ca11ba89bd09183cc5a9320d6e8a4a7
+SuiteSparse.v7.8.0+0.x86_64-w64-mingw32.tar.gz/sha512/e1d5def1103bbf0bb29c08cdd3bf21ba60456353694985c66f8e55a31d54a32c5b891e56e1ffe30f9e1223c49283d267e483e2f1b999f566099c239b3eed1d78
diff --git a/deps/checksums/terminfo b/deps/checksums/terminfo
new file mode 100644
index 0000000000000..bd971e72b1be8
--- /dev/null
+++ b/deps/checksums/terminfo
@@ -0,0 +1,2 @@
+TermInfoDB-v2023.12.9.any.tar.gz/md5/573d9b5adaf6af500e3dfae6e3d15ebf
+TermInfoDB-v2023.12.9.any.tar.gz/sha512/e0a5bfe54346f9d5690a840628b329f6fac7375b0d29337bc70813ae3553a72bb397f8034d221c544289e40c4cfc685d5805777b7528f05bbe0123b5905c24a4
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index 637605677359d..317809053abeb 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,26 +1,26 @@
-LibUnwind.v1.7.2+2.aarch64-linux-gnu.tar.gz/md5/2c8bdfc891b817cdcebad5cec3fe03ba
-LibUnwind.v1.7.2+2.aarch64-linux-gnu.tar.gz/sha512/c06eb08411c9759b7d0043258e239620eebac36b573c58d91f5cd3c6b801a33fb42833c21e0badcd958085adb1eddcf3d37386143b7584fd2c0631efa225acce
-LibUnwind.v1.7.2+2.aarch64-linux-musl.tar.gz/md5/3233eec783f73a9723f5c9c06a4085e9
-LibUnwind.v1.7.2+2.aarch64-linux-musl.tar.gz/sha512/91b5f6eba193b6c2abaac30ad805003142dd48b8bbda36c9d50e7802a17abf04f95b5d21c3d842697653834ee2c21c222f0b96d8f142a99f3819a58493817474
-LibUnwind.v1.7.2+2.armv6l-linux-gnueabihf.tar.gz/md5/75dc5ea25f5055959d348523ed6f1bdb
-LibUnwind.v1.7.2+2.armv6l-linux-gnueabihf.tar.gz/sha512/315e3e05caec95a81fef570aee3328f4f8b617a9567249b7e535a3b4246f9a39c6bc45dc0f3c061e6a796b94d839c308f0bba083a06093df4a9932328e1c6dee
-LibUnwind.v1.7.2+2.armv6l-linux-musleabihf.tar.gz/md5/99b02885777f3954cb9569849304f6fc
-LibUnwind.v1.7.2+2.armv6l-linux-musleabihf.tar.gz/sha512/b73ab8472cf60fb9e88d391543e5349246d05b90fa6b0ea793fca0e1a67ce91cc8f12a09c9e4594add5264c0363b157b879b89ce881ae33f0f7e3315b56aec5e
-LibUnwind.v1.7.2+2.armv7l-linux-gnueabihf.tar.gz/md5/ce447b3e3a9e8f990f914faa6cb37db0
-LibUnwind.v1.7.2+2.armv7l-linux-gnueabihf.tar.gz/sha512/efb8aff3a23dbbc8b65414f7f39af4a664311120f2da61b7b70dd8c7da30230582d8256c2b5421e00b2d70062e767694e358dafbbae83a69f550a5d0ac6a62b9
-LibUnwind.v1.7.2+2.armv7l-linux-musleabihf.tar.gz/md5/535b71f32d85b18208b911a13a9a0672
-LibUnwind.v1.7.2+2.armv7l-linux-musleabihf.tar.gz/sha512/267b72cdb3fe1fc63031fe76fd3ffc3afc97775a541dc2357df4f849d585064c0492b82ef3c16efce0f0573778ee56ed7886e97c6591f7a8cda27f705b0b65e4
-LibUnwind.v1.7.2+2.i686-linux-gnu.tar.gz/md5/97b6867d8d20abef8b96f5fc17e932b9
-LibUnwind.v1.7.2+2.i686-linux-gnu.tar.gz/sha512/ff48995503447288945dab6f32f85801b81be98ec9f5bcc933305230befb51a9c0aeb588e722ebde3694cc9e73ee6b443ae5eedec352c5c166603dca8c23c559
-LibUnwind.v1.7.2+2.i686-linux-musl.tar.gz/md5/3864ac880d38adb276e353904bf2b18d
-LibUnwind.v1.7.2+2.i686-linux-musl.tar.gz/sha512/36198cf175d6db1dfbb43ed498ad25eab2392958f21fc8a83aafdb4f54aacf8c3a473d313b117a835c81e56b7a20ff2bf152dee8f4280cdf7ef0e751d736a362
-LibUnwind.v1.7.2+2.powerpc64le-linux-gnu.tar.gz/md5/b4e6b56f160be18abbaac5f36d161753
-LibUnwind.v1.7.2+2.powerpc64le-linux-gnu.tar.gz/sha512/354f92c8658b465545b2ae77826f0ae4580084ec3e7723900e064245bdb0c38bc8dc9dc04bc505e83879d8bb3d2291493c091e83929aba23d42c99dc622a2bf0
-LibUnwind.v1.7.2+2.x86_64-linux-gnu.tar.gz/md5/1d82791f522deadb3bc3232997d094be
-LibUnwind.v1.7.2+2.x86_64-linux-gnu.tar.gz/sha512/9603731e4f40c89b6a157c754c1e89ca7c38349e2099e072957123967e6d9c8bd7df1725afc9ffe1c9137ada6e364deb19b4b91fc19d099d7acf1bced3caf2e0
-LibUnwind.v1.7.2+2.x86_64-linux-musl.tar.gz/md5/278dd4a9d36169b3c7719ac83f3d3fed
-LibUnwind.v1.7.2+2.x86_64-linux-musl.tar.gz/sha512/68952cbc9ac3e379412bc8482379621809bdd1d77095a44f18b96ff5f58bc338e502305941db0cfae0b68d4ee3719d7db898b64ccd815f1b71110cbc49a4b11f
-LibUnwind.v1.7.2+2.x86_64-unknown-freebsd.tar.gz/md5/d043be8787b39d5e9a467bd8ff90be1d
-LibUnwind.v1.7.2+2.x86_64-unknown-freebsd.tar.gz/sha512/44fda1ffe4a3f4d442dcccb414efe6c0d6ab895f619d74f4aa8cdcef26d9388d7b6975cb43fc18b207df1f2e61faf3555d65db8597f85e3ab4b053be5ce72e66
-libunwind-1.7.2.tar.gz/md5/35799cd8e475d3e157230ad2590c10f1
-libunwind-1.7.2.tar.gz/sha512/903f7e26c7d4c22e6ef4fe8954ca0f153fdf346cec40e1e8f7ab966d251110f4deb0a84d1fd150aee194ed966b5c1e01ee27c821cd043859852da33a94faae1f
+LibUnwind.v1.8.1+1.aarch64-linux-gnu.tar.gz/md5/0f789b9e5b2604a39cc363c4c513a808
+LibUnwind.v1.8.1+1.aarch64-linux-gnu.tar.gz/sha512/4c9c8250bfd84a96135a5e9ecdd4500214996c39852609d3a3983c2c5de44a728d9ce6b71bd649c1725e186db077f74df93a99f07452a31d344c17315eedb33d
+LibUnwind.v1.8.1+1.aarch64-linux-musl.tar.gz/md5/356deb10e57d4c7e7bf7dbc728d6628d
+LibUnwind.v1.8.1+1.aarch64-linux-musl.tar.gz/sha512/a998eebe7a4928bd417620bef0de9728c080f5d9714f15314ac190b333efa1bd7a21207156d56c132515bd3f7154d60204f1fac2dac5468560a7017682527c78
+LibUnwind.v1.8.1+1.armv6l-linux-gnueabihf.tar.gz/md5/b0ff12f5f0c801e5e280a142a1b7a188
+LibUnwind.v1.8.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/68003f39eaf55c8742e821a228889590e8673cbafb74013a5b4f6a0c08ee372cb6b102a574e89ce9f46a38dd3d31ef75de95762f72a31a8ec9d7f495affaeb77
+LibUnwind.v1.8.1+1.armv6l-linux-musleabihf.tar.gz/md5/b04c77d707875989777ecfed66bd2dad
+LibUnwind.v1.8.1+1.armv6l-linux-musleabihf.tar.gz/sha512/fb20586a0cbc998a0482d4102d8b8e5b2f802af519e25c440a64f67554468b29c6999a9ec5509ba375714beb93a4b48e8dbf71e6089c25ecd63b11eead844041
+LibUnwind.v1.8.1+1.armv7l-linux-gnueabihf.tar.gz/md5/e948016b4179d34727b456bc768cd8e1
+LibUnwind.v1.8.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/6fc64e8ac7248540b95c321103d234f2c8633087f261e368251fe2cf6ea4e0654325716ac7017ae966edc4ddbb004a0f808d6e25cca766faaf505ca1f8f4aee7
+LibUnwind.v1.8.1+1.armv7l-linux-musleabihf.tar.gz/md5/660cf49c34a2ead1afbdcb44491e174a
+LibUnwind.v1.8.1+1.armv7l-linux-musleabihf.tar.gz/sha512/edf337d176440c210f5860e90771758335256fe9d2f179d506656bccf92a9f9aa478d176d4b0db2213945ae847dad5bb88265110c92cfcd538d5740858b6a3f0
+LibUnwind.v1.8.1+1.i686-linux-gnu.tar.gz/md5/7032a70cfecb88cdd49cc3a4879456c6
+LibUnwind.v1.8.1+1.i686-linux-gnu.tar.gz/sha512/e34acc8f270c5156ede3ac3377d0f428c672daed869570734351c6b5a8946d65b5c0c041b713dddefedef81e55c65f5683aed0fec0d366e2d0207d8b902b0e33
+LibUnwind.v1.8.1+1.i686-linux-musl.tar.gz/md5/0541c3419020334173d299cf3482ff85
+LibUnwind.v1.8.1+1.i686-linux-musl.tar.gz/sha512/0b57745d280fb9893772936cd4872b0e04f41d86379e772b889e75baffe9324ef8dd168bb4c9761c1b8372f387ce99721dd6086b1d52b9a91215f40e8113968d
+LibUnwind.v1.8.1+1.powerpc64le-linux-gnu.tar.gz/md5/fee37734fe95d1e96ebc77316df64192
+LibUnwind.v1.8.1+1.powerpc64le-linux-gnu.tar.gz/sha512/953ef70fb203db73764eeab0a37521b94e79ce70644ae16fe3157ca8d1011a0319d1928d094a3e2ed1e0489fdc0ca7dda33722095fd3aa40ed1fde150cf44c2a
+LibUnwind.v1.8.1+1.x86_64-linux-gnu.tar.gz/md5/bbb201e7455fd13b805b0a96dc16183b
+LibUnwind.v1.8.1+1.x86_64-linux-gnu.tar.gz/sha512/b1e21f7d772bd15bada17d287e1876ae586a97c6a8669e714347e7bf8a9b202fe53e8559cf19358f88bc458b2fe15ccbd616b64163cc715ce253f43f5133a8cd
+LibUnwind.v1.8.1+1.x86_64-linux-musl.tar.gz/md5/72156f9d6da9a2742d9152822e5525f5
+LibUnwind.v1.8.1+1.x86_64-linux-musl.tar.gz/sha512/53a3f1985c5ae4816693f292604810cbe948e6332aeb227fb900ba3730f4379e863b144ae87af2c0651c2b9633b35c45c7a0a6fa34958dc9f58e0f8baa2ea701
+LibUnwind.v1.8.1+1.x86_64-unknown-freebsd.tar.gz/md5/e4346df03246d847f2867df3ab5ac624
+LibUnwind.v1.8.1+1.x86_64-unknown-freebsd.tar.gz/sha512/ee01bc12726288ae091476c1bed44de224a9ef5355687fd6fd64742da6628450434d7f33d4daf81029263aa6d23549a0aa5c5ae656599c132051255d1d742d5d
+libunwind-1.8.1.tar.gz/md5/10c96118ff30b88c9eeb6eac8e75599d
+libunwind-1.8.1.tar.gz/sha512/aba7b578c1b8cbe78f05b64e154f3530525f8a34668b2a9f1ee6acb4b22c857befe34ad4e9e8cca99dbb66689d41bc72060a8f191bd8be232725d342809431b3
diff --git a/deps/checksums/zlib b/deps/checksums/zlib
index 72fd884183e47..b6fc106747c67 100644
--- a/deps/checksums/zlib
+++ b/deps/checksums/zlib
@@ -1,34 +1,34 @@
-Zlib.v1.2.13+1.aarch64-apple-darwin.tar.gz/md5/bc44b2016065fb20cbd639b3cd5dbb88
-Zlib.v1.2.13+1.aarch64-apple-darwin.tar.gz/sha512/9cfecc16a29b0a13282846ed7d4c17c420b3f62379777d3fac61a8c9c4eeaf4214b826cd9f7479f480e951617b22c96e6ca2976a709345e16fbe7f81e9bdd83f
-Zlib.v1.2.13+1.aarch64-linux-gnu.tar.gz/md5/a2d3265543017db03bc47b9d9778d99d
-Zlib.v1.2.13+1.aarch64-linux-gnu.tar.gz/sha512/c8143445222e151d7f522a98ee8f2742571542f4e71d515e88086c9d7f27b952662ced93f40c795e0de42e3a07c0cb5e1d9d8e792347f3c068cb07ccc144a640
-Zlib.v1.2.13+1.aarch64-linux-musl.tar.gz/md5/c1f2a1c562f72c7aa4b228f57c2346d4
-Zlib.v1.2.13+1.aarch64-linux-musl.tar.gz/sha512/7ed89bc7696690c03617c7413f5456ff5a1caa0dd600880ae67132f6c9190672ae451a06d23956a1969be00bf5c8f29bfa4f5bc4ab646b3b375c350f67c993e5
-Zlib.v1.2.13+1.armv6l-linux-gnueabihf.tar.gz/md5/7dff966f7bc5dd2902fa9ce20444235b
-Zlib.v1.2.13+1.armv6l-linux-gnueabihf.tar.gz/sha512/49e7b4a7c84996b697cf944b11ce06ce6064983a6a911c4539587385afa1e0119e3b1dbf816703a2c132acc90f7f114ec10631647638b59b14954382c1a82014
-Zlib.v1.2.13+1.armv6l-linux-musleabihf.tar.gz/md5/6982f19d2446559c0fd369afe84ebe4a
-Zlib.v1.2.13+1.armv6l-linux-musleabihf.tar.gz/sha512/8f69dfb7fb91cd6f7c934e1acddd83f77c2ebcc1732553f41ae1adcb7805a3304d16062133ce5094a8aea18ff5eca5f7a2df5724ae5a5cb9137caee732c1bf36
-Zlib.v1.2.13+1.armv7l-linux-gnueabihf.tar.gz/md5/30579a91f8f1c96752fe9a82bc053523
-Zlib.v1.2.13+1.armv7l-linux-gnueabihf.tar.gz/sha512/64f6a0e66ee13b086609e0d070c8742de20052e1ef43da201be0007e478c65b2f0a28a3c19ca5be6537b7c8bbeb6a4b2886c15a1e47bb2bd1cfe9d5e1590a620
-Zlib.v1.2.13+1.armv7l-linux-musleabihf.tar.gz/md5/b052ad151dbc3bad78762bc06164d667
-Zlib.v1.2.13+1.armv7l-linux-musleabihf.tar.gz/sha512/b5d2de09a4d65d898cf9ba0db34327c712f42a78cd1fd0f1d77fd8798910502049be63ccfed23de5fe3b499d9e0fe3d4cbb07c72765fd54db275e92f8f1e4dc4
-Zlib.v1.2.13+1.i686-linux-gnu.tar.gz/md5/3074702010889f586b43aa3dbbda4ceb
-Zlib.v1.2.13+1.i686-linux-gnu.tar.gz/sha512/92aa87c5aa3831155305276c2f0da091b5be4e8a396772e1a28650c2837ceb116dd2207329732b653a97c011abd7dd6ac1fc9574ac64cb3049ccd36fa6700748
-Zlib.v1.2.13+1.i686-linux-musl.tar.gz/md5/eff02476825ea7a53ab26b346d58f96e
-Zlib.v1.2.13+1.i686-linux-musl.tar.gz/sha512/14b72607d524948198e999e3919ee01046c049b3ec441bc581c77642cf37c3d28cc3c5500a3c073d62e9b8dc1efc9661b23bb925ed9c80b5e69abaddbcb59115
-Zlib.v1.2.13+1.i686-w64-mingw32.tar.gz/md5/138cb27334b8f6f9e818131ac394bf43
-Zlib.v1.2.13+1.i686-w64-mingw32.tar.gz/sha512/07fbf4a21f6cb5a6120be253e5769b8bbdf60658f9f3705222307fbe203d8710de59fd3dab7a35714ebe1a7385600d4e1b01eae0b1addca47f9d8d862173e667
-Zlib.v1.2.13+1.powerpc64le-linux-gnu.tar.gz/md5/bc69de101d9159b22b7a334e2700faa6
-Zlib.v1.2.13+1.powerpc64le-linux-gnu.tar.gz/sha512/174eb4f154594d268d970d23eb6144dd2f6be41ddcfb9bc756b2ff48f0781ad0ed6571e2ead64dab0967da91517a02cd8db2b0e33a0bde9400103b5204f78e85
-Zlib.v1.2.13+1.x86_64-apple-darwin.tar.gz/md5/60279d648bce4801cd0e311ea95a6481
-Zlib.v1.2.13+1.x86_64-apple-darwin.tar.gz/sha512/921fc557317f87012d76f5d2cb0a7bbed29cdfdb2274ed6d37577f8e99dda2afb2a8dd4725d720eb8fb0a93c0d3afe68dd54fdd3a6e7cb07c15607a8aed72f82
-Zlib.v1.2.13+1.x86_64-linux-gnu.tar.gz/md5/b192d547d56124262e2ae744f385efd6
-Zlib.v1.2.13+1.x86_64-linux-gnu.tar.gz/sha512/c6dca3c0a713ef2e2296bc9e9afa75e103a4cc4f00b5c905ebc5cff688904d6a454f83ab5ef3b6c66bdf425daa2fcd25825e50a3534c0ff109b13affbb686179
-Zlib.v1.2.13+1.x86_64-linux-musl.tar.gz/md5/f2a466b38b2ff1c895f630982147a950
-Zlib.v1.2.13+1.x86_64-linux-musl.tar.gz/sha512/191261d37fc501591005bf680d76bf518da261252456c4fef1c12bc572f9200a855fbd1b125bb8ad10d803eedbc53d4c9d7a2861e9a35d629fb40f87e5306f5f
-Zlib.v1.2.13+1.x86_64-unknown-freebsd.tar.gz/md5/36e53efdafdb8b8e1fb18817ea40c9ab
-Zlib.v1.2.13+1.x86_64-unknown-freebsd.tar.gz/sha512/3067eace2a46b45c071dd1d2c046ab21e3f4a34b87346905bf4c00ef4ea57f41c4c30e32cbd5d4b60a560fa45aeeba7b0ce95566c0889f06f00f7a25de771cb1
-Zlib.v1.2.13+1.x86_64-w64-mingw32.tar.gz/md5/4c14730c6e89a3b05dcf352007f9c1e5
-Zlib.v1.2.13+1.x86_64-w64-mingw32.tar.gz/sha512/b6fbfe93d0c4fc6ebb740dbe0aebaa31aa5ecf352589452f6baac3ee28514531a1d0de9795634f97774ebb492dd23dee9f7865c2b8ba3f70c7f03cdc5430e85a
-zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/md5/60a49c89b9409dd91c1b039266f7bd0c
-zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/sha512/83122539da9399ce5f51c2ecbc38a627405334a9a6d53a024341353c1263a1e3aef7498f30ee281a49b3022be70e992eae475691e33da7a9c6a59b83207bd688
+Zlib.v1.3.1+0.aarch64-apple-darwin.tar.gz/md5/50b48e14f0b3578e3f398d130749a25d
+Zlib.v1.3.1+0.aarch64-apple-darwin.tar.gz/sha512/d970e183035b3615b410f7b0da2c7a1d516234744491d65ed1ebc3800b55732f20bf00fcbb0cf91289b8b4660915282873fb23788896713cf8dfae2984a8fd85
+Zlib.v1.3.1+0.aarch64-linux-gnu.tar.gz/md5/ee42c0bae86fc39968c8cd6a77a801bf
+Zlib.v1.3.1+0.aarch64-linux-gnu.tar.gz/sha512/5d21cbeab03d44008c6cbad114d45c917ebee2fe98de6b19686f4f6ba1fc67eeedf968b94ed1c2d4efb89e93be9efa342bcc8a57cb8a505085d177abae14bc2d
+Zlib.v1.3.1+0.aarch64-linux-musl.tar.gz/md5/9091d1288736b218f7b016791dc1a9c8
+Zlib.v1.3.1+0.aarch64-linux-musl.tar.gz/sha512/b49cbfe734beb2af9ef8e847542d006765345cbb08aee0854779e35e03c98df25c93539b046547c6b66029987c49499ddf6cb207824b1e376900bfceaa79691a
+Zlib.v1.3.1+0.armv6l-linux-gnueabihf.tar.gz/md5/b686c85047b7dad2c2f08d1d16e7978a
+Zlib.v1.3.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/511fda619519dccedb264988e3b59a0e0fbf8f73d3ae290f238346209ebc0202a22f945257cea19afef64246574285e0322901a46bb48d7b48364c1e2eacd801
+Zlib.v1.3.1+0.armv6l-linux-musleabihf.tar.gz/md5/374be5cb926876f3f0492cfe0e193220
+Zlib.v1.3.1+0.armv6l-linux-musleabihf.tar.gz/sha512/4d3a2cc0c7c48146e63ed098da5a5acad75517197adc965550c123f7f8bcee0811a27be76fa37b6b0515eee4b5ba1c1a85c854e7b23bea36b5e21671805bedce
+Zlib.v1.3.1+0.armv7l-linux-gnueabihf.tar.gz/md5/9febbc6a3d492e34c9ed53c95f3b799f
+Zlib.v1.3.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/4cee0e2cf572eb91028a09ef356e1aa6360949e046ceec03bd37574295ddcc4a7cefca9276f7565f152697d55b35f62af2ab107cdbf402b42846818629fea9c7
+Zlib.v1.3.1+0.armv7l-linux-musleabihf.tar.gz/md5/5d0d59a6cbbd1e63193ba6f7dbb755f9
+Zlib.v1.3.1+0.armv7l-linux-musleabihf.tar.gz/sha512/ee3f48b354168342ef63509b19a26aca3301fb3e5f4f6898afe2d3b44ee3380515efd6ced5d4e06e69736d851d19352deb9595bad82c051caccaee8c55e629d8
+Zlib.v1.3.1+0.i686-linux-gnu.tar.gz/md5/834350a64b2302a9caf0250a8f6068e5
+Zlib.v1.3.1+0.i686-linux-gnu.tar.gz/sha512/63dc158c4dfc42db97875893fcdd9784d9487af855bd576dbe04d1b967ad64510222df74a4cfb1b7e67386329d2a5686d7931b81720883fc1924f0d706a0a711
+Zlib.v1.3.1+0.i686-linux-musl.tar.gz/md5/e4f96efdeafa3d74c7c348059a8dc46a
+Zlib.v1.3.1+0.i686-linux-musl.tar.gz/sha512/b47a571d94887ddcab8d7b50c6dce3afed3f56513a9d1859feaefebfad4a271d428b440df1d19ef3c2ed01ca4c8fd121ffc1572f5e252f27d0930f616cb47f18
+Zlib.v1.3.1+0.i686-w64-mingw32.tar.gz/md5/6bc27bd7dbbe17243dbbfaff225d3b23
+Zlib.v1.3.1+0.i686-w64-mingw32.tar.gz/sha512/5777661682831519875fffbb114c62596bf7bdb62011667c0f3dc5da9910e14de2285200a0a05187769b9c68c99b07024caafc16fef03c76e96e618f77149790
+Zlib.v1.3.1+0.powerpc64le-linux-gnu.tar.gz/md5/27dcad8557994cfd89d6fa7072bb843c
+Zlib.v1.3.1+0.powerpc64le-linux-gnu.tar.gz/sha512/3b388dd286b273881d4344cff61c7da316c2bd2bab93072bf47ce4cb1cf9662158351b8febb0d5b1f8dfd9bc73cd32f7cae37fdd19b0ca91531bd3375df104bb
+Zlib.v1.3.1+0.x86_64-apple-darwin.tar.gz/md5/9187319377191ae8b34162b375baa5db
+Zlib.v1.3.1+0.x86_64-apple-darwin.tar.gz/sha512/895203434f161926978be52a223dd49a99454651a79c1c5e0529fa064f3f7ac2d7a069fed47a577b32523df22afadd6eb97d564dbd59c5d67ed90083add13c00
+Zlib.v1.3.1+0.x86_64-linux-gnu.tar.gz/md5/55d4d982d60cb643aa8688eb031b07ee
+Zlib.v1.3.1+0.x86_64-linux-gnu.tar.gz/sha512/d8f94d22ffc37df027de23b2408c2000014c8b7b6c8539feca669ac1f2dbbe1679ca534c3be4d32c90fe38bbba27c795689226962fb067346b5ca213e64b9c4b
+Zlib.v1.3.1+0.x86_64-linux-musl.tar.gz/md5/95d735bba178da4b8bee23903419919c
+Zlib.v1.3.1+0.x86_64-linux-musl.tar.gz/sha512/370370f08133a720e3fbedcc434f102dc95225fda3ec8a399e782851bd4be57fb2b64a3ed62dc0559fb0c58d2e28db9b9e960efafd940982e4cb6652be0e81f1
+Zlib.v1.3.1+0.x86_64-unknown-freebsd.tar.gz/md5/df158f50fdb8ac1179fe6dad3bc62713
+Zlib.v1.3.1+0.x86_64-unknown-freebsd.tar.gz/sha512/f4ba4ccfeaf3fd2e172a2d5b3b1ae083ee9854022e71e062e29423e4179cb1fc49b2b99df49b3f5f231e2a0c5becc59b89644e9dcaf0fda9c97e83af7ea1c25d
+Zlib.v1.3.1+0.x86_64-w64-mingw32.tar.gz/md5/9cc735c54ddf5d1ea0db60e05d6631ea
+Zlib.v1.3.1+0.x86_64-w64-mingw32.tar.gz/sha512/8a2fd20944866cb7f717517ea0b80a134466e063f85bec87ffba56ca844f983f91060dfdc65f8faee1981d7329348c827b723aaad4fea36041e710b9e35c43de
+zlib-51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf.tar.gz/md5/7ce1b2766499af7d948130113b649028
+zlib-51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf.tar.gz/sha512/79d032b8c93260ce6b9806f2289cdccce67e9d80865b5bb39ac46dadffc8ee009da51c551eead59c56249c7adfa164c1d5ebcf2b10a8645e0b11b5650176cb24
diff --git a/deps/clang.version b/deps/clang.version
index a950bc32aca6b..fcd55b72de5ff 100644
--- a/deps/clang.version
+++ b/deps/clang.version
@@ -1,4 +1,6 @@
+# -*- makefile -*-
+
 ## jll artifact
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 16.0.6+4
+CLANG_JLL_VER  := 18.1.7+2
diff --git a/deps/curl.mk b/deps/curl.mk
index 8454ec5f38b1e..ae2830c3cd4f2 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -14,7 +14,7 @@ $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/nghttp
 endif
 
 ifneq ($(USE_BINARYBUILDER_CURL),1)
-CURL_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
+CURL_LDFLAGS := $(RPATH_ESCAPED_ORIGIN) -Wl,-rpath,$(build_shlibdir)
 
 # On older Linuces (those that use OpenSSL < 1.1) we include `libpthread` explicitly.
 # It doesn't hurt to include it explicitly elsewhere, so we do so.
@@ -37,7 +37,7 @@ checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 # Disable....almost everything
 CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
 	--without-gnutls --without-libidn2 --without-librtmp \
-	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
+	--without-libpsl --without-libgsasl --without-fish-functions-dir \
 	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
 	--without-gssapi --without-brotli
 # A few things we actually enable
@@ -57,13 +57,12 @@ CURL_TLS_CONFIGURE_FLAGS := --with-mbedtls=$(build_prefix)
 endif
 CURL_CONFIGURE_FLAGS += $(CURL_TLS_CONFIGURE_FLAGS)
 
-$(BUILDDIR)/curl-$(CURL_VER)/source-extracted/curl-memdup.patch-applied: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
-	mkdir -p $(dir $@)
-	cd $(SRCCACHE)/curl-$(CURL_VER) && \
-		patch -p1 -f < $(SRCDIR)/patches/curl-memdup.patch
+$(SRCCACHE)/curl-$(CURL_VER)/curl-8.6.0-build.patch-applied: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
+	cd $(dir $@) && \
+		patch -p1 -f < $(SRCDIR)/patches/curl-8.6.0-build.patch
 	echo 1 > $@
 
-$(SRCCACHE)/curl-$(CURL_VER)/source-patched: $(BUILDDIR)/curl-$(CURL_VER)/source-extracted/curl-memdup.patch-applied
+$(SRCCACHE)/curl-$(CURL_VER)/source-patched: $(SRCCACHE)/curl-$(CURL_VER)/curl-8.6.0-build.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: $(SRCCACHE)/curl-$(CURL_VER)/source-patched
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index d68a7a80d6d5b..b65ac022885a3 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -33,8 +33,12 @@ LIBGIT2_OPTS += -DBUILD_TESTS=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
 LIBGIT2_OPTS += -DCMAKE_FIND_ROOT_PATH=/usr/$(XC_HOST) -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY
 endif
 endif
+ifeq ($(OS),OpenBSD)
+# iconv.h is third-party
+LIBGIT2_OPTS += -DCMAKE_C_FLAGS="-I/usr/local/include"
+endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 LIBGIT2_OPTS += -DUSE_HTTPS="mbedTLS" -DUSE_SHA1="CollisionDetection" -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
diff --git a/deps/libgit2.version b/deps/libgit2.version
index 65f3e7670e4af..d51beb34c27f5 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -3,12 +3,12 @@
 LIBGIT2_JLL_NAME := LibGit2
 
 ## source build
-LIBGIT2_BRANCH=v1.7.2
-LIBGIT2_SHA1=a418d9d4ab87bae16b87d8f37143a4687ae0e4b2
+LIBGIT2_BRANCH=v1.8.0
+LIBGIT2_SHA1=d74d491481831ddcd23575d376e56d2197e95910
 
 ## Other deps
 # Specify the version of the Mozilla CA Certificate Store to obtain.
 # The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
 # See https://curl.haxx.se/docs/caextract.html for more details.
 # Keep in sync with `stdlib/MozillaCACerts_jll/Project.toml`.
-MOZILLA_CACERT_VERSION := 2023-12-12
+MOZILLA_CACERT_VERSION := 2024-03-11
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index 3f9738515e4a1..c293d8309d2bc 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -11,6 +11,10 @@ endif
 LIBSSH2_OPTS := $(CMAKE_COMMON) -DBUILD_SHARED_LIBS=ON -DBUILD_EXAMPLES=OFF \
 		-DCMAKE_BUILD_TYPE=Release
 
+ifneq ($(fPIC),)
+LIBSSH2_OPTS += -DCMAKE_C_FLAGS="-fPIC"
+endif
+
 ifeq ($(OS),WINNT)
 LIBSSH2_OPTS += -DCRYPTO_BACKEND=WinCNG -DENABLE_ZLIB_COMPRESSION=OFF
 ifeq ($(BUILD_OS),WINNT)
@@ -20,7 +24,7 @@ else
 LIBSSH2_OPTS += -DCRYPTO_BACKEND=mbedTLS -DENABLE_ZLIB_COMPRESSION=OFF
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 LIBSSH2_OPTS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
index 7e36bce8f4f9d..85b2c23473a18 100644
--- a/deps/libsuitesparse.mk
+++ b/deps/libsuitesparse.mk
@@ -36,7 +36,7 @@ else
 LIBSUITESPARSE_CMAKE_FLAGS += -DSUITESPARSE_USE_64BIT_BLAS=NO
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 LIBSUITESPARSE_CMAKE_FLAGS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
@@ -59,8 +59,8 @@ $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/
 
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-patched
 	cd $(dir $<) && $(CMAKE) . $(LIBSUITESPARSE_CMAKE_FLAGS)
-	make -C $(dir $<)
-	make -C $(dir $<) install
+	$(MAKE) -C $(dir $<)
+	$(MAKE) -C $(dir $<) install
 	echo 1 > $@
 
 ifeq ($(OS),WINNT)
@@ -95,7 +95,7 @@ configure-libsuitesparse: extract-libsuitesparse
 compile-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
 fastcheck-libsuitesparse: #none
 check-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked
-install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse
+install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse remove-libsuitesparse-gpl-lib
 
 else # USE_BINARYBUILDER_LIBSUITESPARSE
 
@@ -103,6 +103,7 @@ $(eval $(call bb-install,libsuitesparse,LIBSUITESPARSE,false))
 
 # libsuitesparse depends on blastrampoline
 compile-libsuitesparse: | $(build_prefix)/manifest/blastrampoline
+install-libsuitesparse: | remove-libsuitesparse-gpl-lib
 endif
 
 define manual_libsuitesparse
@@ -110,3 +111,13 @@ uninstall-libsuitesparse:
 	-rm -f $(build_prefix)/manifest/libsuitesparse
 	-rm -f $(addprefix $(build_shlibdir)/lib,$3)
 endef
+
+remove-libsuitesparse-gpl-lib:
+ifeq ($(USE_GPL_LIBS),0)
+	@echo Removing GPL libs...
+	-rm -f $(build_bindir)/libcholmod*
+	-rm -f $(build_bindir)/libklu_cholmod*
+	-rm -f $(build_bindir)/librbio*
+	-rm -f $(build_bindir)/libspqr*
+	-rm -f $(build_bindir)/libumfpack*
+endif
diff --git a/deps/libsuitesparse.version b/deps/libsuitesparse.version
index 2602a449e3a4e..6f841190cebc7 100644
--- a/deps/libsuitesparse.version
+++ b/deps/libsuitesparse.version
@@ -4,5 +4,5 @@
 LIBSUITESPARSE_JLL_NAME := SuiteSparse
 
 ## source build
-LIBSUITESPARSE_VER := 7.6.0
-LIBSUITESPARSE_SHA1=1a4d4fb0c399b261f4ed11aa980c6bab754aefa6
+LIBSUITESPARSE_VER := 7.8.0
+LIBSUITESPARSE_SHA1=58e6558408f6a51c08e35a5557d5e68cae32147e
diff --git a/deps/libuv.version b/deps/libuv.version
index 9ae54aa7be91a..ebfc63927d9db 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 LIBUV_JLL_NAME := LibUV
 
 ## source build
 LIBUV_VER := 2
 LIBUV_BRANCH=julia-uv2-1.48.0
-LIBUV_SHA1=afa1c67fa496eb49ade1e520f76fd018a1409eaa
+LIBUV_SHA1=af4172ec713ee986ba1a989b9e33993a07c60c9e
diff --git a/deps/lld.version b/deps/lld.version
index 949577bdacae2..3ca9960164e27 100644
--- a/deps/lld.version
+++ b/deps/lld.version
@@ -1,3 +1,5 @@
+# -*- makefile -*-
+
 ## jll artifact
 LLD_JLL_NAME := LLD
-LLD_JLL_VER := 16.0.6+4
+LLD_JLL_VER := 18.1.7+2
diff --git a/deps/llvm-tools.version b/deps/llvm-tools.version
index 8191c8742dcbb..1fcc8944dc769 100644
--- a/deps/llvm-tools.version
+++ b/deps/llvm-tools.version
@@ -1,5 +1,7 @@
+# -*- makefile -*-
+
 ## jll artifact
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 16.0.6+4
-LLVM_TOOLS_ASSERT_JLL_VER := 16.0.6+4
+LLVM_TOOLS_JLL_VER := 18.1.7+2
+LLVM_TOOLS_ASSERT_JLL_VER := 18.1.7+2
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 3e3ea4e79c24e..73697069a4fac 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -86,22 +86,23 @@ endif
 LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
 
 # Allow adding LLVM specific flags
-LLVM_CFLAGS += $(CFLAGS)
-LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CFLAGS += $(CFLAGS) $(BOLT_CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS) $(BOLT_CFLAGS)
 LLVM_CPPFLAGS += $(CPPFLAGS)
 LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(BOLT_LDFLAGS)
 LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)"
 LLVM_CMAKE += -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="$(LLVM_EXPERIMENTAL_TARGETS)"
 LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
-LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib"
-LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+LLVM_CMAKE += -DLLVM_ENABLE_ZSTD=OFF
 ifeq ($(USE_POLLY_ACC),1)
 LLVM_CMAKE += -DPOLLY_ENABLE_GPGPU_CODEGEN=ON
 endif
 LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON
-LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_HISTEDIT_H=Off -DHAVE_LIBEDIT=Off
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
@@ -291,6 +292,9 @@ ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib
 endif
+ifeq ($(BUILD_LLD), 1)
+LLVM_INSTALL += && cp $2$$(build_bindir)/lld$$(EXE) $2$$(build_depsbindir)
+endif
 
 $(eval $(call staged-install, \
 	llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \
diff --git a/deps/llvm.version b/deps/llvm.version
index 7e74cf2e4fd65..8e4180ef5a277 100644
--- a/deps/llvm.version
+++ b/deps/llvm.version
@@ -2,14 +2,14 @@
 
 ## jll artifact
 LLVM_JLL_NAME := libLLVM
-LLVM_ASSERT_JLL_VER := 16.0.6+4
+LLVM_ASSERT_JLL_VER := 18.1.7+2
 ## source build
 # Version number of LLVM
-LLVM_VER := 16.0.6
+LLVM_VER := 18.1.7
 # Git branch name in `LLVM_GIT_URL` repository
-LLVM_BRANCH=julia-16.0.6-2
+LLVM_BRANCH=julia-18.1.7-2
 # Git ref in `LLVM_GIT_URL` repository
-LLVM_SHA1=julia-16.0.6-2
+LLVM_SHA1=julia-18.1.7-2
 
 ## Following options are used to automatically fetch patchset from Julia's fork.  This is
 ## useful if you want to build an external LLVM while still applying Julia's patches.
@@ -18,6 +18,6 @@ LLVM_APPLY_JULIA_PATCHES := 0
 # GitHub repository to use for fetching the Julia patches to apply to LLVM source code.
 LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project
 # Base GitHub ref for generating the diff.
-LLVM_BASE_REF := llvm:llvmorg-16.0.6
+LLVM_BASE_REF := llvm:llvmorg-18.1.7
 # Julia fork's GitHub ref for generating the diff.
-LLVM_JULIA_REF := JuliaLang:julia-16.0.6-0
+LLVM_JULIA_REF := JuliaLang:julia-18.1.7-2
diff --git a/deps/llvmunwind.version b/deps/llvmunwind.version
index 7d13af9a158f7..9c2a91c566ba2 100644
--- a/deps/llvmunwind.version
+++ b/deps/llvmunwind.version
@@ -2,4 +2,4 @@
 LLVMUNWIND_JLL_NAME := LLVMLibUnwind
 
 ## source build
-LLVMUNWIND_VER := 12.0.1
+LLVMUNWIND_VER := 14.0.6
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
index b4147c2c2684e..39cf817d70658 100644
--- a/deps/mbedtls.mk
+++ b/deps/mbedtls.mk
@@ -13,7 +13,7 @@ ifeq ($(BUILD_OS),WINNT)
 MBEDTLS_OPTS += -G"MSYS Makefiles"
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 MBEDTLS_OPTS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
diff --git a/deps/mpfr.version b/deps/mpfr.version
index e4f1c8a45aeb0..ec109e181ecdc 100644
--- a/deps/mpfr.version
+++ b/deps/mpfr.version
@@ -2,4 +2,4 @@
 MPFR_JLL_NAME := MPFR
 
 ## source build
-MPFR_VER := 4.2.0
+MPFR_VER := 4.2.1
diff --git a/deps/nghttp2.version b/deps/nghttp2.version
index a893c5ee8085e..e9587297d0e32 100644
--- a/deps/nghttp2.version
+++ b/deps/nghttp2.version
@@ -3,4 +3,4 @@
 NGHTTP2_JLL_NAME := nghttp2
 
 ## source build
-NGHTTP2_VER := 1.59.0
+NGHTTP2_VER := 1.60.0
diff --git a/deps/nvtx.mk b/deps/nvtx.mk
new file mode 100644
index 0000000000000..c4d4db2deba65
--- /dev/null
+++ b/deps/nvtx.mk
@@ -0,0 +1,31 @@
+## nvtx ##
+include $(SRCDIR)/nvtx.version
+
+NVTX_GIT_URL := https://github.com/NVIDIA/NVTX.git
+NVTX_TAR_URL = https://api.github.com/repos/NVIDIA/NVTX/tarball/$1
+$(eval $(call git-external,nvtx,NVTX,,,$(SRCCACHE)))
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	echo 1 > $@
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+	echo 1 > $@
+
+define NVTX_INSTALL
+	cp -a $(SRCCACHE)/$(NVTX_SRC_DIR)/c/include $2/$$(build_includedir)/
+endef
+
+$(eval $(call staged-install, \
+	nvtx,$(NVTX_SRC_DIR), \
+	NVTX_INSTALL,,,))
+
+get-nvtx: $(NVTX_SRC_FILE)
+extract-nvtx: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+configure-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+compile-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
+fastcheck-nvtx: #none
+check-nvtx: #none
+
+clean-nvtx:
+	-rm -f $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
diff --git a/deps/nvtx.version b/deps/nvtx.version
new file mode 100644
index 0000000000000..e26c55cae095e
--- /dev/null
+++ b/deps/nvtx.version
@@ -0,0 +1,4 @@
+# -*- makefile -*-
+## source build
+NVTX_BRANCH=dev
+NVTX_SHA1=733fb419540bc1d152bc682d2ca066c7bb79da29
diff --git a/deps/openblas.mk b/deps/openblas.mk
index d1e0c03aabb1c..affd1c7a7aa55 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -90,27 +90,17 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied: $(BUILDDIR)/$(OP
 		patch -p1 -f < $(SRCDIR)/patches/openblas-winexit.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-memory-buffer-multi-threading.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied
 	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-avx512bf-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-avx512bf-kernels.patch
+		patch -p1 -f < $(SRCDIR)/patches/openblas-memory-buffer-multi-threading.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-gemv-multithreading.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-avx512bf-kernels.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-memory-buffer-multi-threading.patch-applied
 	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-gemv-multithreading.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-darwin-sve.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-gemv-multithreading.patch-applied
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-darwin-sve.patch
+		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-darwin-sve.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
diff --git a/deps/openblas.version b/deps/openblas.version
index f62282b7ea8cc..09dcdc45af1ef 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -3,9 +3,9 @@
 OPENBLAS_JLL_NAME := OpenBLAS
 
 ## source build
-OPENBLAS_VER := 0.3.26
-OPENBLAS_BRANCH=v0.3.26
-OPENBLAS_SHA1=6c77e5e314474773a7749357b153caba4ec3817d
+OPENBLAS_VER := 0.3.28
+OPENBLAS_BRANCH=v0.3.28
+OPENBLAS_SHA1=5ef8b1964658f9cb6a6324a06f6a1a022609b0c5
 
 # LAPACK, source-only
 LAPACK_VER := 3.9.0
diff --git a/deps/p7zip.version b/deps/p7zip.version
index d4a13155d9162..0fcde938eeb95 100644
--- a/deps/p7zip.version
+++ b/deps/p7zip.version
@@ -2,4 +2,4 @@
 P7ZIP_JLL_NAME := p7zip
 
 ## source build
-P7ZIP_VER := 17.04
+P7ZIP_VER := 17.05
diff --git a/deps/patchelf.mk b/deps/patchelf.mk
index 9b4947f183117..c019892058d0e 100644
--- a/deps/patchelf.mk
+++ b/deps/patchelf.mk
@@ -20,7 +20,7 @@ $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: XC_HOST:=$(BUILD_MACHINE)
 $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)"
+	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)" MAKE=$(MAKE)
 	echo 1 > $@
 
 $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-compiled: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured
diff --git a/deps/patchelf.version b/deps/patchelf.version
index bbeaa87d25136..6e4f32a0c2fe4 100644
--- a/deps/patchelf.version
+++ b/deps/patchelf.version
@@ -1,3 +1,4 @@
 ## source build
 # Patchelf (we don't ship this or even use a JLL, we just always build it)
-PATCHELF_VER := 0.13
+# NOTE: Do not upgrade this to 0.18+ until https://github.com/NixOS/patchelf/issues/492 is fixed
+PATCHELF_VER := 0.17.2
diff --git a/deps/patches/curl-memdup.patch b/deps/patches/curl-8.6.0-build.patch
similarity index 98%
rename from deps/patches/curl-memdup.patch
rename to deps/patches/curl-8.6.0-build.patch
index 596606027c447..827b02808d505 100644
--- a/deps/patches/curl-memdup.patch
+++ b/deps/patches/curl-8.6.0-build.patch
@@ -15,10 +15,9 @@ index 067c211e420afd..58dd1166cf924f 100644
 --- a/lib/md4.c
 +++ b/lib/md4.c
 @@ -28,6 +28,7 @@
- 
+
  #include <string.h>
- 
+
 +#include "strdup.h"
  #include "curl_md4.h"
  #include "warnless.h"
- 
diff --git a/deps/patches/libunwind-aarch64-inline-asm.patch b/deps/patches/libunwind-aarch64-inline-asm.patch
new file mode 100644
index 0000000000000..123643e30cdeb
--- /dev/null
+++ b/deps/patches/libunwind-aarch64-inline-asm.patch
@@ -0,0 +1,157 @@
+From 6ae71b3ea71bff0f38c7a6a05beda30b7dce1ef6 Mon Sep 17 00:00:00 2001
+From: Stephen Webb <swebb@blackberry.com>
+Date: Mon, 22 Apr 2024 15:56:54 -0400
+Subject: [PATCH] Rework inline aarch64 as for setcontext
+
+Modern GC and clang were barfing on the inline asm constraints for the
+aarch64-linux setcontext() replacement. Reformulated the asm code to
+reduce the required constraints.
+---
+ src/aarch64/Gos-linux.c | 115 +++++++++++++++++++++-------------------
+ 1 file changed, 61 insertions(+), 54 deletions(-)
+
+diff --git a/src/aarch64/Gos-linux.c b/src/aarch64/Gos-linux.c
+index 7cd8c879f..1e4949623 100644
+--- a/src/aarch64/Gos-linux.c
++++ b/src/aarch64/Gos-linux.c
+@@ -2,6 +2,7 @@
+    Copyright (C) 2008 CodeSourcery
+    Copyright (C) 2011-2013 Linaro Limited
+    Copyright (C) 2012 Tommi Rantala <tt.rantala@gmail.com>
++   Copyright 2024 Stephen M. Webb  <swebb@blackberry.com>
+ 
+ This file is part of libunwind.
+ 
+@@ -28,6 +29,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ 
+ #ifndef UNW_REMOTE_ONLY
+ 
++/* Magic constants generated from gen-offsets.c */
++#define SC_R0_OFF   "8"
++#define SC_R2_OFF   "24"
++#define SC_R18_OFF  "152"
++#define SC_R20_OFF  "168"
++#define SC_R22_OFF  "184"
++#define SC_R24_OFF  "200"
++#define SC_R26_OFF  "216"
++#define SC_R28_OFF  "232"
++#define SC_R30_OFF  "248"
++
++#define FP_R08_OFF "80"
++#define FP_R09_OFF "88"
++#define FP_R10_OFF "96"
++#define FP_R11_OFF "104"
++#define FP_R12_OFF "112"
++#define FP_R13_OFF "120"
++#define FP_R14_OFF "128"
++#define FP_R15_OFF "136"
++
++#define SC_SP_OFF   "0x100"
++
+ HIDDEN int
+ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg)
+ {
+@@ -36,65 +59,49 @@ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg)
+ 
+   if (c->sigcontext_format == AARCH64_SCF_NONE)
+     {
++
++      /*
++       * This is effectively the old POSIX setcontext().
++       *
++       * This inline asm is broken up to use local scratch registers for the
++       * uc_mcontext.regs and FPCTX base addresses because newer versions of GCC
++       * and clang barf on too many constraints (gh-702) when the C array
++       * elements are used directly.
++       *
++       * Clobbers aren't required for the inline asm because they just convince
++       * the compiler to save those registers and they never get restored
++       * becauise the asm ends with a plain ol' ret.
++       */
++      register void* uc_mcontext __asm__ ("x5") = (void*) &uc->uc_mcontext;
++      register void* fpctx __asm__ ("x4") = (void*) GET_FPCTX(uc);
++
+       /* Since there are no signals involved here we restore EH and non scratch
+          registers only.  */
+       __asm__ __volatile__ (
+-        "ldr x0,  %[x0]\n\t"
+-        "ldr x1,  %[x1]\n\t"
+-        "ldr x2,  %[x2]\n\t"
+-        "ldr x3,  %[x3]\n\t"
+-        "ldr x19, %[x19]\n\t"
+-        "ldr x20, %[x20]\n\t"
+-        "ldr x21, %[x21]\n\t"
+-        "ldr x22, %[x22]\n\t"
+-        "ldr x23, %[x23]\n\t"
+-        "ldr x24, %[x24]\n\t"
+-        "ldr x25, %[x25]\n\t"
+-        "ldr x26, %[x26]\n\t"
+-        "ldr x27, %[x27]\n\t"
+-        "ldr x28, %[x28]\n\t"
+-        "ldr x29, %[x29]\n\t"
+-        "ldr x30, %[x30]\n\t"
+-        "ldr d8,  %[d8]\n\t"
+-        "ldr d9,  %[d9]\n\t"
+-        "ldr d10, %[d10]\n\t"
+-        "ldr d11, %[d11]\n\t"
+-        "ldr d12, %[d12]\n\t"
+-        "ldr d13, %[d13]\n\t"
+-        "ldr d14, %[d14]\n\t"
+-        "ldr d15, %[d15]\n\t"
+-        "ldr x5,  %[sp]\n\t"
++        "ldp x0,  x1,  [x5, " SC_R0_OFF  "]\n\t"
++        "ldp x2,  x3,  [x5, " SC_R2_OFF  "]\n\t"
++        "ldp x18, x19, [x5, " SC_R18_OFF "]\n\t"
++        "ldp x20, x21, [x5, " SC_R20_OFF "]\n\t"
++        "ldp x22, x23, [x5, " SC_R22_OFF "]\n\t"
++        "ldp x24, x25, [x5, " SC_R24_OFF "]\n\t"
++        "ldp x26, x27, [x5, " SC_R26_OFF "]\n\t"
++        "ldp x28, x29, [x5, " SC_R28_OFF "]\n\t"
++        "ldr x30, [x5, " SC_R30_OFF "]\n\t"
++        "ldr d8,  [x4, " FP_R08_OFF "]\n\t"
++        "ldr d9,  [x4, " FP_R09_OFF "]\n\t"
++        "ldr d10, [x4, " FP_R10_OFF "]\n\t"
++        "ldr d11, [x4, " FP_R11_OFF "]\n\t"
++        "ldr d12, [x4, " FP_R12_OFF "]\n\t"
++        "ldr d13, [x4, " FP_R13_OFF "]\n\t"
++        "ldr d14, [x4, " FP_R14_OFF "]\n\t"
++        "ldr d15, [x4, " FP_R15_OFF "]\n\t"
++        "ldr x5,  [x5, " SC_SP_OFF "]\n\t"
+         "mov sp, x5\n\t"
+         "ret\n"
+-        :
+-        : [x0]  "m"(uc->uc_mcontext.regs[0]),
+-		  [x1]  "m"(uc->uc_mcontext.regs[1]),
+-		  [x2]  "m"(uc->uc_mcontext.regs[2]),
+-		  [x3]  "m"(uc->uc_mcontext.regs[3]),
+-		  [x19] "m"(uc->uc_mcontext.regs[19]),
+-		  [x20] "m"(uc->uc_mcontext.regs[20]),
+-		  [x21] "m"(uc->uc_mcontext.regs[21]),
+-		  [x22] "m"(uc->uc_mcontext.regs[22]),
+-		  [x23] "m"(uc->uc_mcontext.regs[23]),
+-		  [x24] "m"(uc->uc_mcontext.regs[24]),
+-		  [x25] "m"(uc->uc_mcontext.regs[25]),
+-		  [x26] "m"(uc->uc_mcontext.regs[26]),
+-		  [x27] "m"(uc->uc_mcontext.regs[27]),
+-		  [x28] "m"(uc->uc_mcontext.regs[28]),
+-		  [x29] "m"(uc->uc_mcontext.regs[29]), /* FP */
+-		  [x30] "m"(uc->uc_mcontext.regs[30]), /* LR */
+-		  [d8]  "m"(GET_FPCTX(uc)->vregs[8]),
+-		  [d9]  "m"(GET_FPCTX(uc)->vregs[9]),
+-		  [d10] "m"(GET_FPCTX(uc)->vregs[10]),
+-		  [d11] "m"(GET_FPCTX(uc)->vregs[11]),
+-		  [d12] "m"(GET_FPCTX(uc)->vregs[12]),
+-		  [d13] "m"(GET_FPCTX(uc)->vregs[13]),
+-		  [d14] "m"(GET_FPCTX(uc)->vregs[14]),
+-		  [d15] "m"(GET_FPCTX(uc)->vregs[15]),
+-          [sp]  "m"(uc->uc_mcontext.sp)
+-		: "x0",   "x1",  "x2",  "x3", "x19", "x20", "x21", "x22", "x23", "x24",
+-		  "x25", "x26", "x27", "x28", "x29", "x30"
+-	  );
++        : 
++        : [uc_mcontext] "r"(uc_mcontext),
++          [fpctx] "r"(fpctx)
++      );
+     }
+   else
+     {
diff --git a/deps/patches/libunwind-configure-static-lzma.patch b/deps/patches/libunwind-configure-static-lzma.patch
new file mode 100644
index 0000000000000..f8b428f60550b
--- /dev/null
+++ b/deps/patches/libunwind-configure-static-lzma.patch
@@ -0,0 +1,20 @@
+--- configure.orig	2023-06-04 05:19:04
++++ configure	2023-06-07 08:35:11
+@@ -18117,7 +18117,7 @@
+   $as_echo_n "(cached) " >&6
+ else
+   ac_check_lib_save_LIBS=$LIBS
+-LIBS="-llzma  $LIBS"
++LIBS="-L${libdir} -l:liblzma.a $LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ 
+@@ -18148,7 +18148,7 @@
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lzma_lzma_mf_is_supported" >&5
+ $as_echo "$ac_cv_lib_lzma_lzma_mf_is_supported" >&6; }
+ if test "x$ac_cv_lib_lzma_lzma_mf_is_supported" = xyes; then :
+-  LIBLZMA=-llzma
++  LIBLZMA="-L${libdir} -l:liblzma.a"
+ 
+ $as_echo "#define HAVE_LZMA 1" >>confdefs.h
+ 
diff --git a/deps/patches/libunwind-disable-initial-exec-tls.patch b/deps/patches/libunwind-disable-initial-exec-tls.patch
new file mode 100644
index 0000000000000..c6718ac2db98f
--- /dev/null
+++ b/deps/patches/libunwind-disable-initial-exec-tls.patch
@@ -0,0 +1,44 @@
+diff --git a/include/libunwind-common.h.in b/include/libunwind-common.h.in
+index 893fdd69..80ab9648 100644
+--- a/include/libunwind-common.h.in
++++ b/include/libunwind-common.h.in
+@@ -340,5 +340,6 @@ extern int unw_get_elf_filename_by_ip (unw_addr_space_t, unw_word_t, char *,
+ extern const char *unw_strerror (int);
+ extern int unw_backtrace (void **, int);
+ extern int unw_backtrace2 (void **, int, unw_context_t*, int);
++extern int unw_ensure_tls (void);
+ 
+ extern unw_addr_space_t unw_local_addr_space;
+diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
+index 7a5d7e1f..8453ffb0 100644
+--- a/src/dwarf/Gparser.c
++++ b/src/dwarf/Gparser.c
+@@ -623,7 +623,7 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp)
+ #if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
+   if (likely (caching == UNW_CACHE_PER_THREAD))
+     {
+-      static _Thread_local struct dwarf_rs_cache tls_cache __attribute__((tls_model("initial-exec")));
++      static _Thread_local struct dwarf_rs_cache tls_cache;
+       Debug (16, "using TLS cache\n");
+       cache = &tls_cache;
+     }
+diff --git a/src/mi/init.c b/src/mi/init.c
+index e4431eeb..07cae852 100644
+--- a/src/mi/init.c
++++ b/src/mi/init.c
+@@ -82,3 +82,15 @@ mi_init (void)
+   unw_init_page_size();
+   assert(sizeof(struct cursor) <= sizeof(unw_cursor_t));
+ }
++
++int
++unw_ensure_tls (void)
++{
++#if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
++  static _Thread_local int alloc_trigger;
++  alloc_trigger = 1;
++  return alloc_trigger;
++#else
++  return 0;
++#endif
++}
diff --git a/deps/patches/libunwind-revert_prelink_unwind.patch b/deps/patches/libunwind-revert_prelink_unwind.patch
index 603db03362759..80de3c9ce4571 100644
--- a/deps/patches/libunwind-revert_prelink_unwind.patch
+++ b/deps/patches/libunwind-revert_prelink_unwind.patch
@@ -1,6 +1,6 @@
-From 7ae19a08467254f0b3d7a513ef82651b283f38a9 Mon Sep 17 00:00:00 2001
+From 3af39d34f576890e7f1f3e97cc1cb45b4b76aa47 Mon Sep 17 00:00:00 2001
 From: Tim Besard <tim.besard@gmail.com>
-Date: Wed, 27 Sep 2023 12:51:59 +0000
+Date: Tue, 16 Jan 2024 09:49:21 -0800
 Subject: [PATCH] Revert "Fix unwinding of pre-linked libraries"
 
 This reverts commit a4014f33775321b4106a1134b89020a7774902dd,
@@ -13,7 +13,7 @@ which regresses unwinding on FreeBSD (JuliaLang/julia#51467).
  4 files changed, 8 insertions(+), 38 deletions(-)
 
 diff --git a/include/dwarf.h b/include/dwarf.h
-index dd9014b7..312166cd 100644
+index 4fd1dba0..3fc6bce2 100644
 --- a/include/dwarf.h
 +++ b/include/dwarf.h
 @@ -371,8 +371,6 @@ struct unw_debug_frame_list
@@ -38,7 +38,7 @@ index a26f2c99..c902ccd9 100644
        {
          unw_dyn_proc_info_t pi;
 diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
-index 8ead48f0..154e9b5a 100644
+index c11345e8..c701ccfb 100644
 --- a/src/dwarf/Gfind_proc_info-lsb.c
 +++ b/src/dwarf/Gfind_proc_info-lsb.c
 @@ -108,17 +108,13 @@ linear_search (unw_addr_space_t as, unw_word_t ip,
@@ -58,7 +58,7 @@ index 8ead48f0..154e9b5a 100644
    ei.image = NULL;
 -  *load_offset = 0;
  
-   ret = elf_w (load_debuglink) (file, &ei, is_local);
+   ret = elf_w (load_debuginfo) (file, &ei, is_local);
    if (ret != 0)
 @@ -193,20 +189,6 @@ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
  #if defined(SHF_COMPRESSED)
@@ -78,7 +78,7 @@ index 8ead48f0..154e9b5a 100644
 -        break;
 -      }
 -
-   munmap(ei.image, ei.size);
+   mi_munmap(ei.image, ei.size);
    return 0;
  }
 @@ -259,7 +241,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
@@ -125,7 +125,7 @@ index 8ead48f0..154e9b5a 100644
      {
 -      e = lookup (table, table_len, ip - ip_base - di->load_offset);
 +      e = lookup (table, table_len, ip - ip_base);
-       if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
+       if (e && &e[1] < &table[table_len / sizeof (struct table_entry)])
 -	last_ip = e[1].start_ip_offset + ip_base + di->load_offset;
 +	last_ip = e[1].start_ip_offset + ip_base;
        else
@@ -171,10 +171,10 @@ index 8ead48f0..154e9b5a 100644
    pi->last_ip = last_ip;
  #else
 diff --git a/src/dwarf/Gfind_unwind_table.c b/src/dwarf/Gfind_unwind_table.c
-index fb20fea0..62feb26c 100644
+index a7c4dfd3..2b503ea9 100644
 --- a/src/dwarf/Gfind_unwind_table.c
 +++ b/src/dwarf/Gfind_unwind_table.c
-@@ -193,7 +193,6 @@ dwarf_find_unwind_table (struct elf_dyn_info *edi, unw_addr_space_t as,
+@@ -197,7 +197,6 @@ dwarf_find_unwind_table (struct elf_dyn_info *edi,
  
        edi->di_cache.start_ip = start_ip;
        edi->di_cache.end_ip = end_ip;
@@ -183,5 +183,5 @@ index fb20fea0..62feb26c 100644
        edi->di_cache.u.rti.name_ptr = 0;
        /* two 32-bit values (ip_offset/fde_offset) per table-entry: */
 -- 
-2.41.0
+2.43.0
 
diff --git a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch b/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
deleted file mode 100644
index 4e3897dfb9801..0000000000000
--- a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-Upstream commit 8c03fdf34a659925a3f09c8f54016e47ea1c7519 changed the build such
-that it requires living inside the monorepo with libcxx available, only so that
-it can reuse a CMake file to simplify some build steps. This patch is a revert
-of that commit applied only to libunwind.
-
----
-diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
-index 570b8db90653..a383d7d77d6f 100644
---- a/libunwind/CMakeLists.txt
-+++ b/libunwind/CMakeLists.txt
-@@ -1,7 +1,3 @@
--if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/../libcxx")
--  message(FATAL_ERROR "libunwind requires being built in a monorepo layout with libcxx available")
--endif()
--
- #===============================================================================
- # Setup Project
- #===============================================================================
-@@ -15,31 +11,103 @@ set(CMAKE_MODULE_PATH
-   ${CMAKE_MODULE_PATH}
-   )
- 
--set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
--set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
--set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH
--        "Specify path to libc++ source.")
--
- if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD)
-   project(libunwind LANGUAGES C CXX ASM)
- 
-+  # Rely on llvm-config.
-+  set(CONFIG_OUTPUT)
-+  if(NOT LLVM_CONFIG_PATH)
-+    find_program(LLVM_CONFIG_PATH "llvm-config")
-+  endif()
-+  if (DEFINED LLVM_PATH)
-+    set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_MAIN_SRC_DIR ${LLVM_PATH})
-+    set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules")
-+  elseif(LLVM_CONFIG_PATH)
-+    message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}")
-+    set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root")
-+    execute_process(COMMAND ${CONFIG_COMMAND}
-+                    RESULT_VARIABLE HAD_ERROR
-+                    OUTPUT_VARIABLE CONFIG_OUTPUT)
-+    if (NOT HAD_ERROR)
-+      string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";"
-+             CONFIG_OUTPUT ${CONFIG_OUTPUT})
-+    else()
-+      string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}")
-+      message(STATUS "${CONFIG_COMMAND_STR}")
-+      message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
-+    endif()
-+
-+    list(GET CONFIG_OUTPUT 0 INCLUDE_DIR)
-+    list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT)
-+    list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR)
-+
-+    set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree")
-+    set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py")
-+
-+    # --cmakedir is supported since llvm r291218 (4.0 release)
-+    execute_process(
-+      COMMAND ${LLVM_CONFIG_PATH} --cmakedir
-+      RESULT_VARIABLE HAD_ERROR
-+      OUTPUT_VARIABLE CONFIG_OUTPUT
-+      ERROR_QUIET)
-+    if(NOT HAD_ERROR)
-+      string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG)
-+      file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH)
-+    else()
-+      file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE)
-+      set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
-+    endif()
-+  else()
-+    message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: "
-+                    "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. "
-+                    "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config "
-+                    "or -DLLVM_PATH=path/to/llvm-source-root.")
-+  endif()
-+
-+  if (EXISTS ${LLVM_CMAKE_PATH})
-+    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
-+    include("${LLVM_CMAKE_PATH}/AddLLVM.cmake")
-+    include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake")
-+  else()
-+    message(WARNING "Not found: ${LLVM_CMAKE_PATH}")
-+  endif()
-+
-   set(PACKAGE_NAME libunwind)
-   set(PACKAGE_VERSION 12.0.1)
-   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
- 
--  # Add the CMake module path of libcxx so we can reuse HandleOutOfTreeLLVM.cmake
--  set(LIBUNWIND_LIBCXX_CMAKE_PATH "${LIBUNWIND_LIBCXX_PATH}/cmake/Modules")
--  list(APPEND CMAKE_MODULE_PATH "${LIBUNWIND_LIBCXX_CMAKE_PATH}")
-+  if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+    set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+  else()
-+    # Seek installed Lit.
-+    find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit
-+                 DOC "Path to lit.py")
-+  endif()
- 
--  # In a standalone build, we don't have llvm to automatically generate the
--  # llvm-lit script for us.  So we need to provide an explicit directory that
--  # the configurator should write the script into.
--  set(LIBUNWIND_STANDALONE_BUILD 1)
--  set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin")
-+  if (LLVM_LIT)
-+    # Define the default arguments to use with 'lit', and an option for the user
-+    # to override.
-+    set(LIT_ARGS_DEFAULT "-sv")
-+    if (MSVC OR XCODE)
-+      set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
-+    endif()
-+    set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
-+
-+    # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
-+    if (WIN32 AND NOT CYGWIN)
-+      set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
-+    endif()
-+  else()
-+    set(LLVM_INCLUDE_TESTS OFF)
-+  endif()
- 
--  # Find the LLVM sources and simulate LLVM CMake options.
--  include(HandleOutOfTreeLLVM)
-+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-+  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
- else()
-   set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
- endif()
-@@ -85,8 +153,6 @@ set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING
-     "Additional compiler flags for test programs.")
- set(LIBUNWIND_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/lit.site.cfg.in" CACHE STRING
-     "The Lit testing configuration to use when running the tests.")
--set(LIBUNWIND_TEST_PARAMS "" CACHE STRING
--    "A list of parameters to run the Lit test suite with.")
- 
- if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC)
-   message(FATAL_ERROR "libunwind must be built as either a shared or static library.")
-@@ -113,6 +179,9 @@ set(CMAKE_MODULE_PATH
-     "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
-     ${CMAKE_MODULE_PATH})
- 
-+set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
-+set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
-+
- if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
-   set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
-   set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
diff --git a/deps/patches/openblas-avx512bf-kernels.patch b/deps/patches/openblas-avx512bf-kernels.patch
deleted file mode 100644
index 7e99cdf7c53ee..0000000000000
--- a/deps/patches/openblas-avx512bf-kernels.patch
+++ /dev/null
@@ -1,107 +0,0 @@
-From 1dada6d65d89d19b2cf89b12169f6b2196c90f1d Mon Sep 17 00:00:00 2001
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Fri, 12 Jan 2024 00:10:56 +0100
-Subject: [PATCH 1/2] Add compiler test and flag for AVX512BF16 capability
-
----
- c_check | 22 ++++++++++++++++++++++
- 1 file changed, 22 insertions(+)
-
-diff --git a/c_check b/c_check
-index b5e4a9ad00..3e507be818 100755
---- a/c_check
-+++ b/c_check
-@@ -244,6 +244,7 @@ case "$data" in
- esac
- 
- no_avx512=0
-+no_avx512bf=0
- if [ "$architecture" = "x86" ] || [ "$architecture" = "x86_64" ]; then
-     tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
-     tmpf="$tmpd/a.c"
-@@ -262,6 +263,25 @@ if [ "$architecture" = "x86" ] || [ "$architecture" = "x86_64" ]; then
-     }
- 
-     rm -rf "$tmpd"
-+    if [ "$no_avx512" -eq 0 ]; then
-+    tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
-+    tmpf="$tmpd/a.c"
-+    code='"__m512 a= _mm512_dpbf16_ps(a, (__m512bh) _mm512_loadu_si512(%1]), (__m512bh) _mm512_loadu_si512(%2]));"'
-+    printf "#include <immintrin.h>\n\nint main(void){ %s; }\n" "$code" >> "$tmpf"
-+    if [ "$compiler" = "PGI" ]; then
-+        args=" -tp cooperlake -c -o $tmpf.o $tmpf"
-+    else
-+        args=" -march=cooperlake -c -o $tmpf.o $tmpf"
-+    fi
-+    no_avx512bf=0
-+    {
-+        $compiler_name $flags $args >/dev/null 2>&1
-+    } || {
-+        no_avx512bf=1
-+    }
-+
-+    rm -rf "$tmpd"
-+  fi
- fi
- 
- no_rv64gv=0
-@@ -409,6 +429,7 @@ done
-  [ "$makefile" = "-" ] && {
-     [ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
-     [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
-+    [ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"
-     [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
-     [ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
-     exit 0
-@@ -437,6 +458,7 @@ done
-     [ "$no_sve" -eq 1 ] && printf "NO_SVE=1\n"
-     [ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
-     [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
-+    [ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"
-     [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
-     [ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
-     [ "$no_lsx" -eq 1 ] && printf "NO_LSX=1\n"
-
-From 995a990e24fdcc8080128a8abc17b4ccc66bd4fd Mon Sep 17 00:00:00 2001
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Fri, 12 Jan 2024 00:12:46 +0100
-Subject: [PATCH 2/2] Make AVX512 BFLOAT16 kernels conditional on compiler
- capability
-
----
- kernel/x86_64/KERNEL.COOPERLAKE     | 3 ++-
- kernel/x86_64/KERNEL.SAPPHIRERAPIDS | 2 ++
- 2 files changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/x86_64/KERNEL.COOPERLAKE b/kernel/x86_64/KERNEL.COOPERLAKE
-index dba94aea86..22b042029f 100644
---- a/kernel/x86_64/KERNEL.COOPERLAKE
-+++ b/kernel/x86_64/KERNEL.COOPERLAKE
-@@ -1,5 +1,5 @@
- include $(KERNELDIR)/KERNEL.SKYLAKEX
--
-+ifneq ($(NO_AVX512BF16), 1)
- SBGEMM_SMALL_M_PERMIT = sbgemm_small_kernel_permit_cooperlake.c
- SBGEMM_SMALL_K_NN = sbgemm_small_kernel_nn_cooperlake.c
- SBGEMM_SMALL_K_B0_NN = sbgemm_small_kernel_nn_cooperlake.c
-@@ -20,3 +20,4 @@ SBGEMMINCOPYOBJ =  sbgemm_incopy$(TSUFFIX).$(SUFFIX)
- SBGEMMITCOPYOBJ =  sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
- SBGEMMONCOPYOBJ =  sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
- SBGEMMOTCOPYOBJ =  sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
-+endif
-diff --git a/kernel/x86_64/KERNEL.SAPPHIRERAPIDS b/kernel/x86_64/KERNEL.SAPPHIRERAPIDS
-index 3a832e9174..0ab2b4ddcf 100644
---- a/kernel/x86_64/KERNEL.SAPPHIRERAPIDS
-+++ b/kernel/x86_64/KERNEL.SAPPHIRERAPIDS
-@@ -1,5 +1,6 @@
- include $(KERNELDIR)/KERNEL.COOPERLAKE
- 
-+ifneq ($(NO_AVX512BF16), 1)
- SBGEMM_SMALL_M_PERMIT =
- SBGEMM_SMALL_K_NN     =
- SBGEMM_SMALL_K_B0_NN  =
-@@ -20,3 +21,4 @@ SBGEMMINCOPYOBJ =  sbgemm_incopy$(TSUFFIX).$(SUFFIX)
- SBGEMMITCOPYOBJ =  sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
- SBGEMMONCOPYOBJ =  sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
- SBGEMMOTCOPYOBJ =  sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
-+endif
diff --git a/deps/patches/openblas-darwin-sve.patch b/deps/patches/openblas-darwin-sve.patch
deleted file mode 100644
index a2166db9379f1..0000000000000
--- a/deps/patches/openblas-darwin-sve.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 03688a42622cf76e696859ce384e45aa26d927fc Mon Sep 17 00:00:00 2001
-From: Ian McInerney <i.mcinerney17@imperial.ac.uk>
-Date: Tue, 23 Jan 2024 10:29:57 +0000
-Subject: [PATCH] Build with proper aarch64 flags on Neoverse Darwin
-
-We aren't affected by the problems in AppleClang that prompted this
-fallback to an older architecture.
----
- Makefile.arm64 | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/Makefile.arm64 b/Makefile.arm64
-index ed52a9424..a8f3cb0f0 100644
---- a/Makefile.arm64
-+++ b/Makefile.arm64
-@@ -135,11 +135,11 @@ ifeq ($(CORE), NEOVERSEN2)
- ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
- ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
- ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
--ifneq ($(OSNAME), Darwin)
-+#ifneq ($(OSNAME), Darwin)
- CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
--else
--CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
--endif
-+#else
-+#CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
-+#endif
- ifneq ($(F_COMPILER), NAG)
- FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
- endif
--- 
-2.43.0
-
diff --git a/deps/patches/openblas-gemv-multithreading.patch b/deps/patches/openblas-gemv-multithreading.patch
deleted file mode 100644
index 827c72fa48b1a..0000000000000
--- a/deps/patches/openblas-gemv-multithreading.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-From d2fc4f3b4d7f41527bc7dc8f62e9aa6229cfac89 Mon Sep 17 00:00:00 2001
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Wed, 17 Jan 2024 20:59:24 +0100
-Subject: [PATCH] Increase multithreading threshold by a factor of 50
-
----
- interface/gemv.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/interface/gemv.c b/interface/gemv.c
-index 1f07635799..2c121f1308 100644
---- a/interface/gemv.c
-+++ b/interface/gemv.c
-@@ -226,7 +226,7 @@ void CNAME(enum CBLAS_ORDER order,
- 
- #ifdef SMP
- 
--  if ( 1L * m * n < 2304L * GEMM_MULTITHREAD_THRESHOLD )
-+  if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD )
-     nthreads = 1;
-   else
-     nthreads = num_cpu_avail(2);
diff --git a/deps/patches/openblas-memory-buffer-multi-threading.patch b/deps/patches/openblas-memory-buffer-multi-threading.patch
new file mode 100644
index 0000000000000..9693b5cf61597
--- /dev/null
+++ b/deps/patches/openblas-memory-buffer-multi-threading.patch
@@ -0,0 +1,49 @@
+From 23b5d66a86417a071bba9a96a0573192237981b6 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 14 Aug 2024 10:35:44 +0200
+Subject: [PATCH 1/2] Ensure a memory buffer has been allocated for each thread
+ before invoking it
+
+---
+ driver/others/blas_server.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+From d24b3cf39392a99e81ed47a5f093fbd074d4b39b Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Thu, 15 Aug 2024 15:32:58 +0200
+Subject: [PATCH 2/2] properly fix buffer allocation and assignment
+
+---
+ driver/others/blas_server.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c
+index 765511d8c7..b9a7674c17 100644
+--- a/driver/others/blas_server.c
++++ b/driver/others/blas_server.c
+@@ -1076,6 +1076,8 @@ fprintf(STDERR, "Server[%2ld] Calculation started.  Mode = 0x%03x M = %3ld N=%3l
+       main_status[cpu] = MAIN_RUNNING1;
+ #endif
+ 
++if (buffer == NULL) blas_thread_buffer[cpu] = blas_memory_alloc(2);
++	
+ //For target LOONGSON3R5, applying an offset to the buffer is essential
+ //for minimizing cache conflicts and optimizing performance.
+ #if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
+
+diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c
+index b9a7674c17..29f8a5e646 100644
+--- a/driver/others/blas_server.c
++++ b/driver/others/blas_server.c
+@@ -1076,7 +1076,11 @@ fprintf(STDERR, "Server[%2ld] Calculation started.  Mode = 0x%03x M = %3ld N=%3l
+       main_status[cpu] = MAIN_RUNNING1;
+ #endif
+ 
+-if (buffer == NULL) blas_thread_buffer[cpu] = blas_memory_alloc(2);
++if (buffer == NULL) {
++	blas_thread_buffer[cpu] = blas_memory_alloc(2);
++	buffer = blas_thread_buffer[cpu];
++}      
++
+ 	
+ //For target LOONGSON3R5, applying an offset to the buffer is essential
+ //for minimizing cache conflicts and optimizing performance.
diff --git a/deps/pcre.mk b/deps/pcre.mk
index cd1180d992885..3ff85d5569ad9 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -9,6 +9,9 @@ PCRE_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
 ifeq ($(OS),emscripten)
 PCRE_CFLAGS += -fPIC
 PCRE_JIT = --disable-jit
+else ifeq ($(OS),OpenBSD)
+# jit will need RWX memory
+PCRE_JIT = --disable-jit
 else
 PCRE_JIT = --enable-jit
 endif
diff --git a/deps/pcre.version b/deps/pcre.version
index ce27921435e1d..e3ea507376105 100644
--- a/deps/pcre.version
+++ b/deps/pcre.version
@@ -2,4 +2,4 @@
 PCRE_JLL_NAME := PCRE2
 
 ## source build
-PCRE_VER := 10.42
+PCRE_VER := 10.43
diff --git a/deps/sanitizers.mk b/deps/sanitizers.mk
index 1a272321c05fa..2d0f0988a39b9 100644
--- a/deps/sanitizers.mk
+++ b/deps/sanitizers.mk
@@ -14,6 +14,8 @@ define copy_sanitizer_lib
 install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch_all,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir)
 $$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(SANITIZER_LIB_PATH)/,$(2)) | $$(build_shlibdir)
 	-cp $$< $$@
+	$(if $(filter $(OS), Linux), \
+		  -$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$$$ORIGIN' $$@ , 0)
 endef
 
 ifeq ($(USECLANG),1)
diff --git a/deps/terminfo.mk b/deps/terminfo.mk
new file mode 100644
index 0000000000000..63194f786f566
--- /dev/null
+++ b/deps/terminfo.mk
@@ -0,0 +1,43 @@
+## TERMINFO-DB ##
+include $(SRCDIR)/terminfo.version
+
+$(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/JuliaBinaryWrappers/TermInfoDB_jll.jl/releases/download/$(TERMINFO_TAG)/TermInfoDB.v$(TERMINFO_VER).any.tar.gz
+	touch -c $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+	rm -rf $(dir $@)
+	mkdir -p $(dir $@)
+	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
+	echo 1 > $@
+
+checksum-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+	echo 1 > $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+	echo 1 > $@
+
+define TERMINFO_INSTALL
+	mkdir -p $2/$$(build_datarootdir)
+	cp -R $1/terminfo $2/$$(build_datarootdir)
+endef
+$(eval $(call staged-install, \
+	terminfo,TermInfoDB-v$(TERMINFO_VER), \
+	TERMINFO_INSTALL,,,,))
+
+clean-terminfo:
+	-rm -f $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+
+distclean-terminfo:
+	rm -rf $(SRCCACHE)/TermInfoDB*.tar.gz $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER) $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)
+
+get-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+extract-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+configure-terminfo: extract-terminfo
+compile-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+fastcheck-terminfo: check-terminfo
+check-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked
diff --git a/deps/terminfo.version b/deps/terminfo.version
new file mode 100644
index 0000000000000..b7c020b830517
--- /dev/null
+++ b/deps/terminfo.version
@@ -0,0 +1,3 @@
+# -*- makefile -*-
+TERMINFO_VER := 2023.12.9
+TERMINFO_TAG := TermInfoDB-v$(TERMINFO_VER)+0
diff --git a/deps/unwind.mk b/deps/unwind.mk
index f8e8260b431fa..3951bbf36e22f 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -26,17 +26,29 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UN
 checksum-unwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-configure-static-lzma.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p0 -f -u -l < $(SRCDIR)/patches/libunwind-configure-static-lzma.patch
+	echo 1 > $@
+
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-configure-static-lzma.patch-applied
 	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-revert_prelink_unwind.patch
 	echo 1 > $@
 
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-aarch64-inline-asm.patch
+	echo 1 > $@
+
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-disable-initial-exec-tls.patch
+	echo 1 > $@
+
 # note minidebuginfo requires liblzma, which we do not have a source build for
 # (it will be enabled in BinaryBuilder-based downloads however)
 # since https://github.com/JuliaPackaging/Yggdrasil/commit/0149e021be9badcb331007c62442a4f554f3003c
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks --enable-per-thread-cache
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
@@ -76,40 +88,41 @@ check-unwind: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-checked
 LLVMUNWIND_OPTS := $(CMAKE_COMMON) \
 	-DCMAKE_BUILD_TYPE=MinSizeRel \
 	-DLIBUNWIND_ENABLE_PEDANTIC=OFF \
-	-DLLVM_PATH=$(SRCCACHE)/$(LLVM_SRC_DIR)/llvm
+	-DLIBUNWIND_INCLUDE_DOCS=OFF \
+	-DLIBUNWIND_INCLUDE_TESTS=OFF \
+	-DLIBUNWIND_INSTALL_HEADERS=ON \
+	-DLIBUNWIND_ENABLE_ASSERTIONS=OFF \
+	-DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config \
+	-DLLVM_PATH=$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/llvm
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/libunwind-$(LLVMUNWIND_VER).src.tar.xz
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/llvm-project-$(LLVMUNWIND_VER).src.tar.xz
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) xf $<
-	mv $(SRCCACHE)/libunwind-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)
-	echo 1 > $@
-
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
+	mv $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-revert-monorepo-requirement.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
 	echo 1 > $@
 
-checksum-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+checksum-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
+$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(LLVMUNWIND_OPTS)
+	$(CMAKE) $(dir $<)/libunwind $(LLVMUNWIND_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
@@ -119,7 +132,7 @@ $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-
 $(eval $(call staged-install, \
 	llvmunwind,llvmunwind-$(LLVMUNWIND_VER), \
 	MAKE_INSTALL,,, \
-	cp -fR $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/include/* $(build_includedir)))
+	cp -fR $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/* $(build_includedir)))
 
 clean-llvmunwind:
 	-rm -f $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
@@ -127,14 +140,14 @@ clean-llvmunwind:
 	-$(MAKE) -C $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) clean
 
 distclean-llvmunwind:
-	rm -rf $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz \
+	rm -rf $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz \
 		$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) \
 		$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)
 
-get-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
-extract-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-configure-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-compile-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
+get-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
+extract-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+configure-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-configured
+compile-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-compiled
 fastcheck-llvmunwind: check-llvmunwind
 check-llvmunwind: # no test/check provided by Makefile
 
diff --git a/deps/unwind.version b/deps/unwind.version
index 1349f2d657e87..e3ed63675fd8c 100644
--- a/deps/unwind.version
+++ b/deps/unwind.version
@@ -2,5 +2,5 @@
 UNWIND_JLL_NAME := LibUnwind
 
 ## source build
-UNWIND_VER_TAG := 1.7.2
-UNWIND_VER := 1.7.2
+UNWIND_VER_TAG := 1.8.1
+UNWIND_VER := 1.8.1
diff --git a/deps/zlib.version b/deps/zlib.version
index 89a304c49b6dc..27d862a4cc35b 100644
--- a/deps/zlib.version
+++ b/deps/zlib.version
@@ -3,6 +3,6 @@
 ZLIB_JLL_NAME := Zlib
 
 ## source build
-ZLIB_VER := 1.2.13
-ZLIB_BRANCH=v1.2.13
-ZLIB_SHA1=04f42ceca40f73e2978b50e93806c2a18c1281fc
+ZLIB_VER := 1.3.1
+ZLIB_BRANCH=v1.3.1
+ZLIB_SHA1=51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index b34942bbddd3a..c0f8b693bd1ac 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.11.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f"
+project_hash = "1e9ffa7d4739f7d125a5e2c66af8747a8effd889"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -10,13 +10,13 @@ uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
 version = "0.0.1"
 
 [[deps.AbstractTrees]]
-git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c"
+git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177"
 uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-version = "0.4.4"
+version = "0.4.5"
 
 [[deps.ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
-version = "1.1.1"
+version = "1.1.2"
 
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -26,6 +26,12 @@ version = "1.11.0"
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 version = "1.11.0"
 
+[[deps.CodecZlib]]
+deps = ["TranscodingStreams", "Zlib_jll"]
+git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.7.4"
+
 [[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -38,10 +44,10 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 version = "0.9.3"
 
 [[deps.Documenter]]
-deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "Test", "Unicode"]
-git-tree-sha1 = "2613dbec8f4748273bbe30ba71fd5cb369966bac"
+deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"]
+git-tree-sha1 = "f15a91e6e3919055efa4f206f942a73fedf5dfe6"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "1.2.1"
+version = "1.4.0"
 
 [[deps.Downloads]]
 deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
@@ -60,21 +66,21 @@ version = "1.11.0"
 
 [[deps.Git]]
 deps = ["Git_jll"]
-git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d"
+git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e"
 uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
-version = "1.3.0"
+version = "1.3.1"
 
 [[deps.Git_jll]]
 deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
-git-tree-sha1 = "bb8f7cc77ec1152414b2af6db533d9471cfbb2d1"
+git-tree-sha1 = "d18fb8a1f3609361ebda9bf029b60fd0f120c809"
 uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb"
-version = "2.42.0+0"
+version = "2.44.0+2"
 
 [[deps.IOCapture]]
 deps = ["Logging", "Random"]
-git-tree-sha1 = "d75853a0bdbfb1ac815478bacd89cd27b550ace6"
+git-tree-sha1 = "8b72179abc660bfab5e28472e019392b97d0985c"
 uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
-version = "0.2.3"
+version = "0.2.4"
 
 [[deps.InteractiveUtils]]
 deps = ["Markdown"]
@@ -106,17 +112,21 @@ version = "0.6.4"
 [[deps.LibCURL_jll]]
 deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "8.4.0+0"
+version = "8.6.0+0"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
 
 [[deps.LibGit2]]
-deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 version = "1.11.0"
 
 [[deps.LibGit2_jll]]
 deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
 uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
-version = "1.7.1+0"
+version = "1.8.0+0"
 
 [[deps.LibSSH2_jll]]
 deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
@@ -134,12 +144,11 @@ uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
 version = "1.17.0+0"
 
 [[deps.Logging]]
-deps = ["StyledStrings"]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 version = "1.11.0"
 
 [[deps.Markdown]]
-deps = ["Base64"]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 version = "1.11.0"
 
@@ -152,7 +161,7 @@ version = "0.1.2"
 [[deps.MbedTLS_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-version = "2.28.2+1"
+version = "2.28.6+0"
 
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
@@ -160,7 +169,7 @@ version = "1.11.0"
 
 [[deps.MozillaCACerts_jll]]
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2023.1.10"
+version = "2024.3.11"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
@@ -168,37 +177,41 @@ version = "1.2.0"
 
 [[deps.OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f"
+git-tree-sha1 = "3da7367955dcc5c54c1ba4d402ccdc09a1a3e046"
 uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
-version = "3.0.12+0"
+version = "3.0.13+1"
 
 [[deps.PCRE2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
-version = "10.42.0+1"
+version = "10.43.0+0"
 
 [[deps.Parsers]]
 deps = ["Dates", "PrecompileTools", "UUIDs"]
-git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf"
+git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.8.0"
+version = "2.8.1"
 
 [[deps.Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-version = "1.11.0"
+version = "1.12.0"
+weakdeps = ["REPL"]
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
 
 [[deps.PrecompileTools]]
 deps = ["Preferences"]
-git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
+git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
 uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
-version = "1.2.0"
+version = "1.2.1"
 
 [[deps.Preferences]]
 deps = ["TOML"]
-git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e"
+git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6"
 uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.4.1"
+version = "1.4.3"
 
 [[deps.Printf]]
 deps = ["Unicode"]
@@ -206,7 +219,7 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 version = "1.11.0"
 
 [[deps.REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+deps = ["InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 version = "1.11.0"
 
@@ -233,6 +246,10 @@ version = "1.11.0"
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 version = "1.11.0"
 
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
 [[deps.TOML]]
 deps = ["Dates"]
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
@@ -243,14 +260,20 @@ deps = ["ArgTools", "SHA"]
 uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
 version = "1.10.0"
 
-[[deps.StyledStrings]]
-uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
-
 [[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 version = "1.11.0"
 
+[[deps.TranscodingStreams]]
+git-tree-sha1 = "71509f04d045ec714c4748c785a59045c3736349"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.10.7"
+weakdeps = ["Random", "Test"]
+
+    [deps.TranscodingStreams.extensions]
+    TestExt = ["Test", "Random"]
+
 [[deps.UUIDs]]
 deps = ["Random", "SHA"]
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
@@ -263,14 +286,14 @@ version = "1.11.0"
 [[deps.Zlib_jll]]
 deps = ["Libdl"]
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.13+1"
+version = "1.3.1+0"
 
 [[deps.nghttp2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.58.0+0"
+version = "1.60.0+0"
 
 [[deps.p7zip_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.4.0+2"
+version = "17.5.0+0"
diff --git a/doc/make.jl b/doc/make.jl
index 1f654649d9237..5ef5fcf55b215 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -3,7 +3,8 @@ Base.ACTIVE_PROJECT[] = nothing
 empty!(LOAD_PATH)
 push!(LOAD_PATH, @__DIR__, "@stdlib")
 empty!(DEPOT_PATH)
-pushfirst!(DEPOT_PATH, joinpath(@__DIR__, "deps"))
+push!(DEPOT_PATH, joinpath(@__DIR__, "deps"))
+push!(DEPOT_PATH, abspath(Sys.BINDIR, "..", "share", "julia"))
 using Pkg
 Pkg.instantiate()
 
@@ -102,7 +103,14 @@ documenter_stdlib_remotes = let stdlib_dir = realpath(joinpath(@__DIR__, "..", "
         isdir(package_root_dir) || mkpath(package_root_dir)
         package_root_dir => (remote, package_sha)
     end
-    Dict(remotes_list)
+    Dict(
+        # We also add the root of the repository to `remotes`, because we do not always build the docs in a
+        # checked out JuliaLang/julia repository. In particular, when building Julia from tarballs, there is no
+        # Git information available. And also the way the BuildKite CI is configured to check out the code means
+        # that in some circumstances the Git repository information is incorrect / no available via Git.
+        dirname(@__DIR__) => (Documenter.Remotes.GitHub("JuliaLang", "julia"), Base.GIT_VERSION_INFO.commit),
+        remotes_list...
+    )
 end
 
 # Check if we are building a PDF
@@ -156,6 +164,7 @@ Manual = [
     "manual/environment-variables.md",
     "manual/embedding.md",
     "manual/code-loading.md",
+    "manual/profile.md",
     "manual/stacktraces.md",
     "manual/performance-tips.md",
     "manual/workflow-tips.md",
@@ -191,12 +200,6 @@ BaseDocs = [
 
 StdlibDocs = [stdlib.targetfile for stdlib in STDLIB_DOCS]
 
-Tutorials = [
-    "tutorials/creating-packages.md",
-    "tutorials/profile.md",
-    "tutorials/external.md",
-]
-
 DevDocs = [
     "Documentation of Julia's Internals" => [
         "devdocs/init.md",
@@ -255,7 +258,6 @@ const PAGES = [
     "Manual" => ["index.md", Manual...],
     "Base" => BaseDocs,
     "Standard Library" => StdlibDocs,
-    "Tutorials" => Tutorials,
     # Add "Release Notes" to devdocs
     "Developer Documentation" => [DevDocs..., hide("NEWS.md")],
 ]
@@ -266,7 +268,6 @@ const PAGES = [
     "Manual" => Manual,
     "Base" => BaseDocs,
     "Standard Library" => StdlibDocs,
-    "Tutorials" => Tutorials,
     "Developer Documentation" => DevDocs,
 ]
 end
@@ -365,6 +366,7 @@ else
         ansicolor = true,
         size_threshold = 800 * 2^10, # 800 KiB
         size_threshold_warn = 200 * 2^10, # the manual has quite a few large pages, so we warn at 200+ KiB only
+        inventory_version = VERSION,
     )
 end
 
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index a7d16abe3f85c..ebac4362b39a6 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -28,7 +28,7 @@
 julia - a high-level, high-performance dynamic programming language for technical computing
 
 .SH SYNOPSIS
-\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMMFILE] [ARGS...]
+\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMFILE] [ARGS...]
 
 If a Julia source file is given as a \fIPROGRAMFILE\fP (optionally followed by
 arguments in \fIARGS\fP) Julia will execute the program and exit.
@@ -59,7 +59,7 @@ Display version information
 
 .TP
 -h, --help
-Print help message
+Print command-line options (this message)
 
 .TP
 --help-hidden
@@ -77,7 +77,7 @@ Start up with the given system image file
 
 .TP
 -H, --home <dir>
-Set location of julia executable
+Set location of `julia` executable
 
 .TP
 --startup-file={yes*|no}
@@ -95,44 +95,47 @@ Use native code from system image if available
 .TP
 --compiled-modules={yes*|no|existing|strict}
 Enable or disable incremental precompilation of modules.
-The "existing" option allows use of existing compiled modules that were previously precompiled, but disallows creation of new precompile files.
-The "strict" option is similar, but will error if no precompile file is found.
+The `existing` option allows use of existing compiled modules that were
+previously precompiled, but disallows creation of new precompile files.
+The `strict` option is similar, but will error if no precompile file is found.
 
 .TP
 --pkgimages={yes*|no|existing}
 Enable or disable usage of native code caching in the form of pkgimages
+The `existing` option allows use of existing pkgimages but disallows creation of new ones
 
 .TP
 -e, --eval <expr>
 Evaluate <expr>
 
-.TP
--m, --module <Package> [args]
-Run entry point of `Package` (`@main` function) with `args'.
-
-
 .TP
 -E, --print <expr>
 Evaluate <expr> and display the result
 
+.TP
+-m, --module <Package> [args]
+Run entry point of `Package` (`@main` function) with `args'
+
 .TP
 -L, --load <file>
 Load <file> immediately on all processors
 
 .TP
--t, --threads <n>
-Enable n threads; "auto" tries to infer a useful default number
-of threads to use but the exact behavior might change in the future.
-Currently, "auto" uses the number of CPUs assigned to this julia
-process based on the OS-specific affinity assignment interface, if
-supported (Linux and Windows). If this is not supported (macOS) or
-process affinity is not configured, it uses the number of CPU
-threads.
+-t, --threads {auto|N[,auto|M]}
+Enable N[+M] threads; N threads are assigned to the `default`
+threadpool, and if M is specified, M threads are assigned to the
+`interactive` threadpool; `auto` tries to infer a useful
+default number of threads to use but the exact behavior might change
+in the future. Currently sets N to the number of CPUs assigned to
+this Julia process based on the OS-specific affinity assignment
+interface if supported (Linux and Windows) or to the number of CPU
+threads if not supported (MacOS) or if process affinity is not
+configured, and sets M to 1.
 
 .TP
 --gcthreads=N[,M]
 Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC.
-N is set to half of the number of compute threads and M is set to 0 if unspecified.
+N is set to the number of compute threads and M is set to 0 if unspecified.
 
 .TP
 -p, --procs {N|auto}
@@ -144,7 +147,7 @@ as the number of local CPU threads (logical cores)
 Run processes on hosts listed in <file>
 
 .TP
--i
+-i, --interactive
 Interactive mode; REPL runs and `isinteractive()` is true
 
 .TP
@@ -152,7 +155,7 @@ Interactive mode; REPL runs and `isinteractive()` is true
 Quiet startup: no banner, suppress REPL warnings
 
 .TP
---banner={yes|no|auto*}
+--banner={yes|no|short|auto*}
 Enable or disable startup banner
 
 .TP
@@ -180,15 +183,15 @@ Enable or disable warning for ambiguous top-level scope
 Limit usage of CPU features up to <target>; set to `help` to see the available options
 
 .TP
--O, --optimize={0,1,2*,3}
+-O, --optimize={0|1|2*|3}
 Set the optimization level (level 3 if `-O` is used without a level)
 
 .TP
---min-optlevel={0*,1,2,3}
+--min-optlevel={0*|1|2|3}
 Set a lower bound on the optimization level
 
 .TP
--g {0,1*,2}
+-g, --debug-info={0|1*|2}
 Set the level of debug info generation (level 2 if `-g` is used without a level)
 
 .TP
@@ -200,8 +203,12 @@ Control whether inlining is permitted, including overriding @inline declarations
 Emit bounds checks always, never, or respect @inbounds declarations
 
 .TP
---math-mode={ieee|user}
-Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)
+--math-mode={ieee|user*}
+Always follow `ieee` floating point semantics or respect `@fastmath` declarations
+
+.TP
+--polly={yes*|no}
+Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)
 
 .TP
 --code-coverage[={none*|user|all}]
@@ -213,8 +220,8 @@ Count executions of source lines in a file or files under a given directory. A `
 be placed before the path to indicate this option. A `@` with no path will track the current directory.
 
 .TP
- --code-coverage=tracefile.info
- Append coverage information to the LCOV tracefile (filename supports format tokens)
+--code-coverage=tracefile.info
+Append coverage information to the LCOV tracefile (filename supports format tokens)
 
 .TP
 --track-allocation[={none*|user|all}]
@@ -222,8 +229,8 @@ Count bytes allocated by each source line (omitting setting is equivalent to `us
 
 .TP
 --track-allocation=@<path>
-Count bytes allocated by each source line in a file or files under a given directory. A `@`
-must be placed before the path to indicate this option. A `@` with no path will track the current directory.
+Count bytes but only in files that fall under the given file path/directory.
+The `@` prefix is required to select this option. A `@` with no path will track the current directory.
 
 .TP
 --bug-report=KIND
@@ -233,7 +240,7 @@ fallbacks to the latest compatible BugReporting.jl if not. For more information,
 --bug-report=help.
 
 .TP
---heap-size-hint=<value>
+--heap-size-hint=<size>
 Forces garbage collection if memory usage is higher than the given value.
 The value may be specified as a number of bytes, optionally in units of
 KB, MB, GB, or TB, or as a percentage of physical memory with %.
@@ -275,13 +282,21 @@ Generate an assembly file (.s)
 Generate an incremental output file (rather than complete)
 
 .TP
---trace-compile={stderr,name}
+--trace-compile={stderr|name}
 Print precompile statements for methods compiled during execution or save to a path
 
+.TP
+--trace-compile-timing=
+If --trace-compile is enabled show how long each took to compile in ms
+
 .TP
 -image-codegen
 Force generate code in imaging mode
 
+.TP
+--permalloc-pkgimg={yes|no*}
+Copy the data section of package images into memory
+
 .SH FILES AND ENVIRONMENT
 See https://docs.julialang.org/en/v1/manual/environment-variables/
 
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index f7ea23ad3b556..66fe5c78f1ee6 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -30,8 +30,9 @@ Base.StridedArray
 Base.StridedVector
 Base.StridedMatrix
 Base.StridedVecOrMat
+Base.GenericMemory
 Base.Memory
-Base.MemoryRef
+Base.memoryref
 Base.Slices
 Base.RowSlices
 Base.ColumnSlices
@@ -78,6 +79,7 @@ to operate on arrays, you should use `sin.(a)` to vectorize via `broadcast`.
 Base.broadcast
 Base.Broadcast.broadcast!
 Base.@__dot__
+Base.Broadcast.BroadcastFunction
 ```
 
 For specializing broadcast on custom types, see
@@ -124,7 +126,7 @@ accessing the first 10 elements of `x`. Writing to a view, e.g. `v[3] = 2`, writ
 
 Slicing operations like `x[1:10]` create a copy by default in Julia. `@view x[1:10]` changes it to make a view. The
 `@views` macro can be used on a whole block of code (e.g. `@views function foo() .... end` or `@views begin ... end`)
-to change all the slicing operations in that block to use views.  Sometimes making a copy of the data is faster and
+to change all the slicing operations in that block to use views. Sometimes making a copy of the data is faster and
 sometimes using a view is faster, as described in the [performance tips](@ref man-performance-views).
 
 ```@docs
@@ -136,10 +138,10 @@ Base.parentindices
 Base.selectdim
 Base.reinterpret
 Base.reshape
+Base.insertdims
 Base.dropdims
 Base.vec
 Base.SubArray
-Base.wrap
 ```
 
 ## Concatenation and permutation
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index 556d51d3af0c1..b5d50a846ce89 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -4,7 +4,7 @@
 
 Julia Base contains a range of functions and macros appropriate for performing
 scientific and numerical computing, but is also as broad as those of many general purpose programming
-languages.  Additional functionality is available from a growing collection of
+languages. Additional functionality is available from a growing collection of
 [available packages](https://julialang.org/packages/).
 Functions are grouped by topic below.
 
@@ -102,6 +102,11 @@ where
 ;
 =
 ?:
+.=
+.
+->
+::
+[]
 ```
 
 ## Standard Modules
@@ -277,6 +282,7 @@ Base.:(|>)
 Base.:(∘)
 Base.ComposedFunction
 Base.splat
+Base.Fix
 Base.Fix1
 Base.Fix2
 ```
@@ -307,7 +313,12 @@ Base.@simd
 Base.@polly
 Base.@generated
 Base.@assume_effects
+```
+
+## Managing deprecations
+```@docs
 Base.@deprecate
+Base.depwarn
 ```
 
 ## Missing Values
@@ -338,6 +349,12 @@ Base.Cmd
 Base.setenv
 Base.addenv
 Base.withenv
+Base.shell_escape
+Base.shell_split
+Base.shell_escape_posixly
+Base.shell_escape_csh
+Base.shell_escape_wincmd
+Base.escape_microsoft_c_args
 Base.setcpuaffinity
 Base.pipeline(::Any, ::Any, ::Any, ::Any...)
 Base.pipeline(::Base.AbstractCmd)
@@ -375,7 +392,7 @@ Base.Sys.isjsvm
 Base.Sys.loadavg
 Base.Sys.isexecutable
 Base.Sys.isreadable
-Base.Sys.iswriteable
+Base.Sys.iswritable
 Base.Sys.username
 Base.@static
 ```
@@ -408,6 +425,7 @@ Core.DivideError
 Core.DomainError
 Base.EOFError
 Core.ErrorException
+Core.FieldError
 Core.InexactError
 Core.InterruptException
 Base.KeyError
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 9d68d55be3459..e724930222a13 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -195,7 +195,7 @@ Dictionaries may also be created with generators. For example, `Dict(i => f(i) f
 
 Given a dictionary `D`, the syntax `D[x]` returns the value of key `x` (if it exists) or throws
 an error, and `D[x] = y` stores the key-value pair `x => y` in `D` (replacing any existing value
-for the key `x`).  Multiple arguments to `D[...]` are converted to tuples; for example, the syntax
+for the key `x`). Multiple arguments to `D[...]` are converted to tuples; for example, the syntax
 `D[x,y]`  is equivalent to `D[(x,y)]`, i.e. it refers to the value keyed by the tuple `(x,y)`.
 
 ```@docs
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 22799f882bb26..300738a39322d 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -29,6 +29,7 @@ Base.Filesystem.operm
 Base.Filesystem.cp
 Base.download
 Base.Filesystem.mv
+Base.Filesystem.rename
 Base.Filesystem.rm
 Base.Filesystem.touch
 Base.Filesystem.tempname
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index f99f592e0fb5c..cd3bb9fbfa7aa 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -39,6 +39,7 @@ Base.eof
 Base.isreadonly
 Base.iswritable
 Base.isreadable
+Base.isexecutable
 Base.isopen
 Base.fd
 Base.redirect_stdio
@@ -114,17 +115,17 @@ PNG images in a window can register this capability with Julia, so that calling
 types with PNG representations will automatically display the image using the module's window.
 
 In order to define a new display backend, one should first create a subtype `D` of the abstract
-class [`AbstractDisplay`](@ref).  Then, for each MIME type (`mime` string) that can be displayed on `D`, one should
+class [`AbstractDisplay`](@ref). Then, for each MIME type (`mime` string) that can be displayed on `D`, one should
 define a function `display(d::D, ::MIME"mime", x) = ...` that displays `x` as that MIME type,
 usually by calling [`show(io, mime, x)`](@ref) or [`repr(io, mime, x)`](@ref).
 A [`MethodError`](@ref) should be thrown if `x` cannot be displayed
 as that MIME type; this is automatic if one calls `show` or `repr`. Finally, one should define a function
 `display(d::D, x)` that queries [`showable(mime, x)`](@ref) for the `mime` types supported by `D`
 and displays the "best" one; a `MethodError` should be thrown if no supported MIME types are found
-for `x`.  Similarly, some subtypes may wish to override [`redisplay(d::D, ...)`](@ref Base.Multimedia.redisplay). (Again, one should
+for `x`. Similarly, some subtypes may wish to override [`redisplay(d::D, ...)`](@ref Base.Multimedia.redisplay). (Again, one should
 `import Base.display` to add new methods to `display`.) The return values of these functions are
 up to the implementation (since in some cases it may be useful to return a display "handle" of
-some type).  The display functions for `D` can then be called directly, but they can also be invoked
+some type). The display functions for `D` can then be called directly, but they can also be invoked
 automatically from [`display(x)`](@ref) simply by pushing a new display onto the display-backend stack
 with:
 
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index c1b18c0b66d86..4f816ce2a6c1d 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -74,6 +74,7 @@ Base.Math.tand
 Base.Math.sincosd
 Base.Math.sinpi
 Base.Math.cospi
+Base.Math.tanpi
 Base.Math.sincospi
 Base.sinh(::Number)
 Base.cosh(::Number)
@@ -165,6 +166,7 @@ Base.flipsign
 Base.sqrt(::Number)
 Base.isqrt
 Base.Math.cbrt(::AbstractFloat)
+Base.fourthroot(::Number)
 Base.real
 Base.imag
 Base.reim
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index 7605d4e6c9e7e..9f24db176b538 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -13,6 +13,7 @@ Base.istaskfailed
 Base.task_local_storage(::Any)
 Base.task_local_storage(::Any, ::Any)
 Base.task_local_storage(::Function, ::Any, ::Any)
+Core.ConcurrencyViolationError
 ```
 
 ## Scheduling
@@ -30,6 +31,8 @@ Base.schedule
 Base.errormonitor
 Base.@sync
 Base.wait
+Base.waitany
+Base.waitall
 Base.fetch(t::Task)
 Base.fetch(x::Any)
 Base.timedwait
@@ -72,11 +75,11 @@ Base.bind(c::Channel, task::Task)
 ## [Low-level synchronization using `schedule` and `wait`](@id low-level-schedule-wait)
 
 The easiest correct use of [`schedule`](@ref) is on a `Task` that is not started (scheduled)
-yet.  However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
-low-level building block for constructing synchronization interfaces.  A crucial
+yet. However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
+low-level building block for constructing synchronization interfaces. A crucial
 pre-condition of calling `schedule(task)` is that the caller must "own" the `task`; i.e., it
 must know that the call to `wait` in the given `task` is happening at the locations known to
-the code calling `schedule(task)`.  One strategy for ensuring such pre-condition is to use
+the code calling `schedule(task)`. One strategy for ensuring such pre-condition is to use
 atomics, as demonstrated in the following example:
 
 ```jldoctest
@@ -121,8 +124,8 @@ function Base.wait(ev::OneWayEvent)
     state, ok = @atomicreplace(ev.state, OWE_EMPTY => OWE_WAITING)
     if ok
         # OWE_EMPTY -> OWE_WAITING transition means that the notifier task is guaranteed to
-        # invoke OWE_WAITING -> OWE_NOTIFYING transition.  The waiter task must call
-        # `wait()` immediately.  In particular, it MUST NOT invoke any function that may
+        # invoke OWE_WAITING -> OWE_NOTIFYING transition. The waiter task must call
+        # `wait()` immediately. In particular, it MUST NOT invoke any function that may
         # yield to the scheduler at this point in code.
         wait()
     else
@@ -135,7 +138,7 @@ end
 
 ev = OneWayEvent()
 @sync begin
-    @async begin
+    Threads.@spawn begin
         wait(ev)
         println("done")
     end
@@ -148,12 +151,12 @@ notifying...
 done
 ```
 
-`OneWayEvent` lets one task to `wait` for another task's `notify`.  It is a limited
+`OneWayEvent` lets one task to `wait` for another task's `notify`. It is a limited
 communication interface since `wait` can only be used once from a single task (note the
 non-atomic assignment of `ev.task`)
 
 In this example, `notify(ev::OneWayEvent)` is allowed to call `schedule(ev.task)` if and
-only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`.  This lets us know that
+only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`. This lets us know that
 the task executing `wait(ev::OneWayEvent)` is now in the `ok` branch and that there cannot be
 other tasks that tries to `schedule(ev.task)` since their
 `@atomicreplace(ev.state, state => OWE_NOTIFYING)` will fail.
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index dbea97e4e3cb5..8956cebd53971 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -22,6 +22,7 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | `'`         | a trailing apostrophe is the [`adjoint`](@ref) (that is, the complex transpose) operator Aᴴ |
 | [`*`](@ref) | the asterisk is used for multiplication, including matrix multiplication and [string concatenation](@ref man-concatenation) |
 | [`/`](@ref) | forward slash divides the argument on its left by the one on its right                      |
+| [`//`](@ref) | double forward slash performs exact, rational division                                     |
 | [`\`](@ref) | backslash operator divides the argument on its right by the one on its left, commonly used to solve matrix equations |
 | `()`        | parentheses with no arguments constructs an empty [`Tuple`](@ref)                           |
 | `(a,...)`   | parentheses with comma-separated arguments constructs a tuple containing its arguments      |
diff --git a/doc/src/base/reflection.md b/doc/src/base/reflection.md
index d44bc474abbd2..0f0af140b605f 100644
--- a/doc/src/base/reflection.md
+++ b/doc/src/base/reflection.md
@@ -100,7 +100,6 @@ as assignments, branches, and calls:
 ```jldoctest; setup = (using Base: +, sin)
 julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] ))
 :($(Expr(:thunk, CodeInfo(
-    @ none within `top-level scope`
 1 ─ %1 = 1 + 2
 │   %2 = sin(0.5)
 │   %3 = Base.vect(%1, %2)
diff --git a/doc/src/base/scopedvalues.md b/doc/src/base/scopedvalues.md
index 0de29308c5df8..6ad553429bb1f 100644
--- a/doc/src/base/scopedvalues.md
+++ b/doc/src/base/scopedvalues.md
@@ -17,18 +17,15 @@ concurrently.
     Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
     implementation is available from the package ScopedValues.jl.
 
-In its simplest form you can create a [`ScopedValue`](@ref) with a
-default value and then use [`with`](@ref Base.with) or [`@with`](@ref) to
-enter a new dynamic scope.
+In its simplest form you can create a [`ScopedValue`](@ref Base.ScopedValues.ScopedValue)
+with a default value and then use [`with`](@ref Base.ScopedValues.with) or
+[`@with`](@ref Base.ScopedValues.@with) to enter a new dynamic scope. The new scope will
+inherit all values from the parent scope (and recursively from all outer scopes) with the
+provided scoped value taking priority over previous definitions.
 
-The new scope will inherit all values from the parent scope
-(and recursively from all outer scopes) with the provided scoped
-value taking priority over previous definitions.
-
-Let's first look at an example of **lexical** scope:
-
-A `let` statements begins a new lexical scope within which the outer definition
-of `x` is shadowed by it's inner definition.
+Let's first look at an example of **lexical** scope. A `let` statement begins
+a new lexical scope within which the outer definition of `x` is shadowed by
+it's inner definition.
 
 ```julia
 x = 1
@@ -38,9 +35,9 @@ end
 @show x # 1
 ```
 
-Since Julia uses lexical scope the variable `x` is bound within the function `f`
-to the global scope and entering a `let` scope does not change the value `f`
-observes.
+In the following example, since Julia uses lexical scope, the variable `x` in the body
+of `f` refers to the `x` defined in the global scope, and entering a `let` scope does
+not change the value `f` observes.
 
 ```julia
 x = 1
@@ -54,6 +51,8 @@ f() # 1
 Now using a `ScopedValue` we can use **dynamic** scoping.
 
 ```julia
+using Base.ScopedValues
+
 x = ScopedValue(1)
 f() = @show x[]
 with(x=>5) do
@@ -62,7 +61,7 @@ end
 f() # 1
 ```
 
-Not that the observed value of the `ScopedValue` is dependent on the execution
+Note that the observed value of the `ScopedValue` is dependent on the execution
 path of the program.
 
 It often makes sense to use a `const` variable to point to a scoped value,
@@ -70,32 +69,56 @@ and you can set the value of multiple `ScopedValue`s with one call to `with`.
 
 
 ```julia
-const scoped_val = ScopedValue(1)
-const scoped_val2 = ScopedValue(0)
-
-# Enter a new dynamic scope and set value
-@show scoped_val[] # 1
-@show scoped_val2[] # 0
-with(scoped_val => 2) do
-    @show scoped_val[] # 2
-    @show scoped_val2[] # 0
-    with(scoped_val => 3, scoped_val2 => 5) do
-        @show scoped_val[] # 3
-        @show scoped_val2[] # 5
+using Base.ScopedValues
+
+f() = @show a[]
+g() = @show b[]
+
+const a = ScopedValue(1)
+const b = ScopedValue(2)
+
+f() # a[] = 1
+g() # b[] = 2
+
+# Enter a new dynamic scope and set value.
+with(a => 3) do
+    f() # a[] = 3
+    g() # b[] = 2
+    with(a => 4, b => 5) do
+        f() # a[] = 4
+        g() # b[] = 5
     end
-    @show scoped_val[] # 2
-    @show scoped_val2[] # 0
+    f() # a[] = 3
+    g() # b[] = 2
 end
-@show scoped_val[] # 1
-@show scoped_val2[] # 0
+
+f() # a[] = 1
+g() # b[] = 2
 ```
 
-Since `with` requires a closure or a function and creates another call-frame,
-it can sometimes be beneficial to use the macro form.
+`ScopedValues` provides a macro version of `with`. The expression `@with var=>val expr`
+evaluates `expr` in a new dynamic scope with `var` set to `val`. `@with var=>val expr`
+is equivalent to `with(var=>val) do expr end`. However, `with` requires a zero-argument
+closure or function, which results in an extra call-frame. As an example, consider the
+following function `f`:
 
 ```julia
-const STATE = ScopedValue{State}()
-with_state(f, state::State) = @with(STATE => state, f())
+using Base.ScopedValues
+const a = ScopedValue(1)
+f(x) = a[] + x
+```
+
+If you wish to run `f` in a dynamic scope with `a` set to `2`, then you can use `with`:
+
+```julia
+with(() -> f(10), a=>2)
+```
+
+However, this requires wrapping `f` in a zero-argument function. If you wish to avoid
+the extra call-frame, then you can use the `@with` macro:
+
+```julia
+@with a=>2 f(10)
 ```
 
 !!! note
@@ -106,7 +129,9 @@ The parent task and the two child tasks observe independent values of the
 same scoped value at the same time.
 
 ```julia
+using Base.ScopedValues
 import Base.Threads: @spawn
+
 const scoped_val = ScopedValue(1)
 @sync begin
     with(scoped_val => 2)
@@ -128,7 +153,9 @@ values. You might want to explicitly [unshare mutable state](@ref unshare_mutabl
 when entering a new dynamic scope.
 
 ```julia
+using Base.ScopedValues
 import Base.Threads: @spawn
+
 const sval_dict = ScopedValue(Dict())
 
 # Example of using a mutable value wrongly
@@ -161,6 +188,8 @@ are not well suited for this kind of propagation; our only alternative would hav
 been to thread a value through the entire call-chain.
 
 ```julia
+using Base.ScopedValues
+
 const LEVEL = ScopedValue(:GUEST)
 
 function serve(request, response)
@@ -189,7 +218,9 @@ end
 ### [Unshare mutable state](@id unshare_mutable_state)
 
 ```julia
+using Base.ScopedValues
 import Base.Threads: @spawn
+
 const sval_dict = ScopedValue(Dict())
 
 # If you want to add new values to the dict, instead of replacing
@@ -210,6 +241,7 @@ be in (lexical) scope. This means most often you likely want to use scoped value
 as constant globals.
 
 ```julia
+using Base.ScopedValues
 const sval = ScopedValue(1)
 ```
 
@@ -218,7 +250,9 @@ Indeed one can think of scoped values as hidden function arguments.
 This does not preclude their use as non-globals.
 
 ```julia
+using Base.ScopedValues
 import Base.Threads: @spawn
+
 function main()
     role = ScopedValue(:client)
 
@@ -241,16 +275,18 @@ If you find yourself creating many `ScopedValue`'s for one given module,
 it may be better to use a dedicated struct to hold them.
 
 ```julia
+using Base.ScopedValues
+
 Base.@kwdef struct Configuration
     color::Bool = false
     verbose::Bool = false
 end
 
-const CONFIG = ScopedValue(Configuration())
+const CONFIG = ScopedValue(Configuration(color=true))
 
-@with CONFIG => Configuration(CONFIG[], color=true) begin
+@with CONFIG => Configuration(color=CONFIG[].color, verbose=true) begin
     @show CONFIG[].color # true
-    @show CONFIG[].verbose # false
+    @show CONFIG[].verbose # true
 end
 ```
 
@@ -260,7 +296,7 @@ end
 Base.ScopedValues.ScopedValue
 Base.ScopedValues.with
 Base.ScopedValues.@with
-Base.isassigned(::ScopedValue)
+Base.isassigned(::Base.ScopedValues.ScopedValue)
 Base.ScopedValues.get
 ```
 
@@ -276,6 +312,6 @@ version of Julia.
 ## Design inspiration
 
 This design was heavily inspired by [JEPS-429](https://openjdk.org/jeps/429),
-which in turn was inspired by dynamically scoped free variables in many Lisp dialects. In particular Interlisp-D and it's deep binding strategy.
+which in turn was inspired by dynamically scoped free variables in many Lisp dialects. In particular Interlisp-D and its deep binding strategy.
 
 A prior design discussed was context variables ala [PEPS-567](https://peps.python.org/pep-0567/) and implemented in Julia as [ContextVariablesX.jl](https://github.com/tkf/ContextVariablesX.jl).
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index ef470be6b55cc..b7d16ffc7d487 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -48,6 +48,9 @@ Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)
 Base.lpad
 Base.rpad
+Base.ltruncate
+Base.rtruncate
+Base.ctruncate
 Base.findfirst(::AbstractString, ::AbstractString)
 Base.findnext(::AbstractString, ::AbstractString, ::Integer)
 Base.findnext(::AbstractChar, ::AbstractString, ::Integer)
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index c2cd0e92d58d2..50a64ec5813f7 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -416,7 +416,7 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
   * `new`
 
     Allocates a new struct-like object. First argument is the type. The [`new`](@ref) pseudo-function is lowered
-    to this, and the type is always inserted by the compiler.  This is very much an internal-only
+    to this, and the type is always inserted by the compiler. This is very much an internal-only
     feature, and does no checking. Evaluating arbitrary `new` expressions can easily segfault.
 
   * `splatnew`
@@ -426,17 +426,20 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
   * `isdefined`
 
-    `Expr(:isdefined, :x)` returns a Bool indicating whether `x` has
-    already been defined in the current scope.
+    `Expr(:isdefined, :x [, allow_import])` returns a Bool indicating whether `x` has
+    already been defined in the current scope. The optional second argument is a boolean
+    that specifies whether `x` should be considered defined by an import or if only constants
+    or globals in the current module count as being defined. If `x` is not a global, the argument
+    is ignored.
 
   * `the_exception`
 
-    Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
+    Yields the caught exception inside a `catch` block, as returned by `jl_current_exception(ct)`.
 
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
-    error.  Yields a token which is consumed by `pop_exception`.
+    error. Yields a token which is consumed by `pop_exception`.
 
   * `leave`
 
@@ -519,18 +522,22 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The function signature of the opaque closure. Opaque closures don't participate in dispatch, but the input types can be restricted.
 
-      * `args[2]` : isva
-
-        Indicates whether the closure accepts varargs.
-
-      * `args[3]` : lb
+      * `args[2]` : lb
 
         Lower bound on the output type. (Defaults to `Union{}`)
 
-      * `args[4]` : ub
+      * `args[3]` : ub
 
         Upper bound on the output type. (Defaults to `Any`)
 
+      * `args[4]` : constprop
+
+        Indicates whether the opaque closure's identity may be used for constant
+        propagation. The `@opaque` macro enables this by default, but this will
+        cause additional inference which may be undesirable and prevents the
+        code from running during precompile.
+        If `args[4]` is a method, the argument is considered skipped.
+
       * `args[5]` : method
 
         The actual method as an `opaque_closure_method` expression.
@@ -605,11 +612,6 @@ for important details on how to modify these fields safely.
     For the `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`.
     But for a runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and indexable.
 
-  * `uninferred`
-
-    The uncompressed source code for a toplevel thunk. Additionally, for a generated function,
-    this is one of many places that the source code might be found.
-
   * `backedges`
 
     We store the reverse-list of cache dependencies for efficient tracking of incremental reanalysis/recompilation work that may be needed after a new method definitions.
@@ -665,7 +667,7 @@ for important details on how to modify these fields safely.
 
 ### CodeInfo
 
-A (usually temporary) container for holding lowered source code.
+A (usually temporary) container for holding lowered (and possibly inferred) source code.
 
   * `code`
 
@@ -696,29 +698,18 @@ A (usually temporary) container for holding lowered source code.
     Statement-level 32 bits flags for each expression in the function.
     See the definition of `jl_code_info_t` in julia.h for more details.
 
-  * `linetable`
-
-    An array of source location objects
-
-  * `codelocs`
-
-    An array of integer indices into the `linetable`, giving the location associated
-    with each statement.
-
-Optional Fields:
+These are only populated after inference (or by generated functions in some cases):
 
-  * `slottypes`
+  * `debuginfo`
 
-    An array of types for the slots.
+    An object to retrieve source information for each statements, see
+    [How to interpret line numbers in a `CodeInfo` object](@ref).
 
   * `rettype`
 
-    The inferred return type of the lowered form (IR). Default value is `Any`.
-
-  * `method_for_inference_limit_heuristics`
-
-    The `method_for_inference_heuristics` will expand the given method's generator if
-    necessary during inference.
+    The inferred return type of the lowered form (IR). Default value is `Any`. This is
+    mostly present for convenience, as (due to the way OpaqueClosures work) it is not
+    necessarily the rettype used by codegen.
 
   * `parent`
 
@@ -732,16 +723,19 @@ Optional Fields:
 
     The range of world ages for which this code was valid at the time when it had been inferred.
 
+Optional Fields:
 
-Boolean properties:
+  * `slottypes`
 
-  * `inferred`
+    An array of types for the slots.
 
-    Whether this has been produced by type inference.
+  * `method_for_inference_limit_heuristics`
 
-  * `inlineable`
+    The `method_for_inference_heuristics` will expand the given method's generator if
+    necessary during inference.
 
-    Whether this should be eligible for inlining.
+
+Boolean properties:
 
   * `propagate_inbounds`
 
@@ -751,7 +745,7 @@ Boolean properties:
 
 `UInt8` settings:
 
-  * `constprop`
+  * `constprop`, `inlineable`
 
     * 0 = use heuristic
     * 1 = aggressive
@@ -767,3 +761,79 @@ Boolean properties:
     * 0x01 << 4 = the syntactic control flow within this method is guaranteed to terminate (`:terminates_locally`)
 
     See the documentation of `Base.@assume_effects` for more details.
+
+
+#### How to interpret line numbers in a `CodeInfo` object
+
+There are 2 common forms for this data: one used internally that compresses the data somewhat and one used in the compiler.
+They contain the same basic info, but the compiler version is all mutable while the version used internally is not.
+
+Many consumers may be able to call `Base.IRShow.buildLineInfoNode`,
+`Base.IRShow.append_scopes!`, or `Stacktraces.lookup(::InterpreterIP)` to avoid needing to
+(re-)implement these details specifically.
+
+The definitions of each of these are:
+
+```julia
+struct Core.DebugInfo
+    @noinline
+    def::Union{Method,MethodInstance,Symbol}
+    linetable::Union{Nothing,DebugInfo}
+    edges::SimpleVector{DebugInfo}
+    codelocs::String # compressed data
+end
+mutable struct Core.Compiler.DebugInfoStream
+    def::Union{Method,MethodInstance,Symbol}
+    linetable::Union{Nothing,DebugInfo}
+    edges::Vector{DebugInfo}
+    firstline::Int32 # the starting line for this block (specified by an index of 0)
+    codelocs::Vector{Int32} # for each statement:
+        # index into linetable (if defined), else a line number (in the file represented by def)
+        # then index into edges
+        # then index into edges[linetable]
+end
+```
+
+
+  * `def` : where this `DebugInfo` was defined (the `Method`, `MethodInstance`, or `Symbol` of file scope, for example)
+
+  * `linetable`
+
+    Another `DebugInfo` that this was derived from, which contains the actual line numbers,
+    such that this DebugInfo contains only the indexes into it. This avoids making copies,
+    as well as makes it possible to track how each individual statement transformed from
+    source to optimized, not just the separate line numbers. If `def` is not a Symbol, then
+    that object replaces the current function object for the metadata on what function is
+    conceptually being executed (e.g. think Cassette transforms here). The `codelocs` values
+    described below also are interpreted as an index into the `codelocs` in this object,
+    instead of being a line number itself.
+
+  * `edges` : Vector of the unique DebugInfo for every function inlined into this (which
+    recursively have the edges for everything inlined into them).
+
+  * `firstline` (when uncompressed to DebugInfoStream)
+
+    The line number associated with the `begin` statement (or other keyword such as
+    `function` or `quote`) that delineates where this code definition "starts".
+
+  * `codelocs` (when uncompressed to `DebugInfoStream`)
+
+    A vector of indices, with 3 values for each statement in the IR plus one for the
+    starting point of the block, that describe the stacktrace from that point:
+     1. the integer index into the `linetable.codelocs` field, giving the
+        original location associated with each statement (including its syntactic edges),
+        or zero indicating no change to the line number from the previously
+        executed statement (which is not necessarily syntactic or lexical prior),
+        or the line number itself if the `linetable` field is `nothing`.
+     2. the integer index into `edges`, giving the `DebugInfo` inlined there,
+        or zero if there are no edges.
+     3. (if entry 2 is non-zero) the integer index into `edges[].codelocs`,
+        to interpret recursively for each function in the inlining stack,
+        or zero indicating to use `edges[].firstline` as the line number.
+
+    Special codes include:
+     - `(zero, zero, *) `: no change to the line number or edges from the previous statement
+       (you may choose to interpret this either syntactically or lexically). The inlining
+       depth also might have changed, though most callers should ignore that.
+     - `(zero, non-zero, *)` : no line number, just edges (usually because of
+       macro-expansion into top-level code).
diff --git a/doc/src/devdocs/backtraces.md b/doc/src/devdocs/backtraces.md
index 4ed3ea47efbb5..d0533ebe57fcb 100644
--- a/doc/src/devdocs/backtraces.md
+++ b/doc/src/devdocs/backtraces.md
@@ -1,12 +1,12 @@
 # Reporting and analyzing crashes (segfaults)
 
-So you managed to break Julia.  Congratulations!  Collected here are some general procedures you
-can undergo for common symptoms encountered when something goes awry.  Including the information
+So you managed to break Julia. Congratulations!  Collected here are some general procedures you
+can undergo for common symptoms encountered when something goes awry. Including the information
 from these debugging steps can greatly help the maintainers when tracking down a segfault or trying
 to figure out why your script is running slower than expected.
 
 If you've been directed to this page, find the symptom that best matches what you're experiencing
-and follow the instructions to generate the debugging information requested.  Table of symptoms:
+and follow the instructions to generate the debugging information requested. Table of symptoms:
 
   * [Segfaults during bootstrap (`sysimg.jl`)](@ref)
   * [Segfaults when running a script](@ref)
@@ -26,10 +26,10 @@ versioninfo()
 ## Segfaults during bootstrap (`sysimg.jl`)
 
 Segfaults toward the end of the `make` process of building Julia are a common symptom of something
-going wrong while Julia is preparsing the corpus of code in the `base/` folder.  Many factors
+going wrong while Julia is preparsing the corpus of code in the `base/` folder. Many factors
 can contribute toward this process dying unexpectedly, however it is as often as not due to an
 error in the C-code portion of Julia, and as such must typically be debugged with a debug build
-inside of `gdb`.  Explicitly:
+inside of `gdb`. Explicitly:
 
 Create a debug build of Julia:
 
@@ -40,7 +40,7 @@ $ make debug
 
 Note that this process will likely fail with the same error as a normal `make` incantation, however
 this will create a debug executable that will offer `gdb` the debugging symbols needed to get
-accurate backtraces.  Next, manually run the bootstrap process inside of `gdb`:
+accurate backtraces. Next, manually run the bootstrap process inside of `gdb`:
 
 ```
 $ cd base/
@@ -48,14 +48,14 @@ $ gdb -x ../contrib/debug_bootstrap.gdb
 ```
 
 This will start `gdb`, attempt to run the bootstrap process using the debug build of Julia, and
-print out a backtrace if (when) it segfaults.  You may need to hit `<enter>` a few times to get
-the full backtrace.  Create a [gist](https://gist.github.com) with the backtrace, the [version info](@ref dev-version-info),
+print out a backtrace if (when) it segfaults. You may need to hit `<enter>` a few times to get
+the full backtrace. Create a [gist](https://gist.github.com) with the backtrace, the [version info](@ref dev-version-info),
 and any other pertinent information you can think of and open a new [issue](https://github.com/JuliaLang/julia/issues?q=is%3Aopen)
 on Github with a link to the gist.
 
 ## Segfaults when running a script
 
-The procedure is very similar to [Segfaults during bootstrap (`sysimg.jl`)](@ref).  Create a debug
+The procedure is very similar to [Segfaults during bootstrap (`sysimg.jl`)](@ref). Create a debug
 build of Julia, and run your script inside of a debugged Julia process:
 
 ```
@@ -64,7 +64,7 @@ $ make debug
 $ gdb --args usr/bin/julia-debug <path_to_your_script>
 ```
 
-Note that `gdb` will sit there, waiting for instructions.  Type `r` to run the process, and `bt`
+Note that `gdb` will sit there, waiting for instructions. Type `r` to run the process, and `bt`
 to generate a backtrace once it segfaults:
 
 ```
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index 7acd32f04dc75..fa2cda2698bfe 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -105,7 +105,7 @@ checkbounds_indices(Bool, (IA1, IA...), (I1, I...)) = checkindex(Bool, IA1, I1)
                                                       checkbounds_indices(Bool, IA, I)
 ```
 
-so `checkindex` checks a single dimension.  All of these functions, including the unexported
+so `checkindex` checks a single dimension. All of these functions, including the unexported
 `checkbounds_indices` have docstrings accessible with `?` .
 
 If you have to customize bounds checking for a specific array type, you should specialize `checkbounds(Bool, A, I...)`.
@@ -113,10 +113,10 @@ However, in most cases you should be able to rely on `checkbounds_indices` as lo
 useful `axes` for your array type.
 
 If you have novel index types, first consider specializing `checkindex`, which handles a single
-index for a particular dimension of an array.  If you have a custom multidimensional index type
+index for a particular dimension of an array. If you have a custom multidimensional index type
 (similar to `CartesianIndex`), then you may have to consider specializing `checkbounds_indices`.
 
-Note this hierarchy has been designed to reduce the likelihood of method ambiguities.  We try
+Note this hierarchy has been designed to reduce the likelihood of method ambiguities. We try
 to make `checkbounds` the place to specialize on array type, and try to avoid specializations
 on index types; conversely, `checkindex` is intended to be specialized only on index type (especially,
 the last argument).
diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md
index 747ee25d22a04..df9ede07d270f 100644
--- a/doc/src/devdocs/build/arm.md
+++ b/doc/src/devdocs/build/arm.md
@@ -55,18 +55,9 @@ due to unsupported inline assembly. In that case, add `MCPU=armv7-a` to
 
 ## AArch64 (ARMv8)
 
-Julia has been successfully built on the following ARMv8 devices:
+Julia is expected to work and build on ARMv8 cpus. One should follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md). Julia expects to have around 8GB of ram or swap enabled to build itself.
 
-* [nVidia Jetson TX1 & TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html);
-* [X-Gene 1](https://www.apm.com/products/data-center/x-gene-family/x-gene/);
-* [Overdrive 3000](https://softiron.com/products/overdrive-3000/);
-* [Cavium ThunderX](https://www.cavium.com/ThunderX_ARM_Processors.html) on [packet.net](https://www.packet.net).
-
-Compilation on `ARMv8-A` requires that `Make.user` is configured as follows:
-
-```
-MCPU=armv8-a
-```
+### Known issues
 
 Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions.
 Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like
@@ -77,21 +68,3 @@ Should this happen, ask your system administrator to increase the limit of memor
 ```
 sysctl -w vm.max_map_count=262144
 ```
-
-### nVidia Jetson TX2
-
-Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html)
-platform with minimal configuration changes.
-
-After configuring `Make.user` as per the `AArch64` instructions in this document,
-follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md).
-The majority of the build dependencies specified in the instructions are installed by
-the default configuration flashed by [Jetpack 3.0](https://developer.nvidia.com/embedded/jetpack). The remaining tools can be installed by issuing the following command:
-
-```
-sudo apt-get install gfortran wget cmake
-```
-
-A full parallel build, including LLVM,
-will complete in around two hours. All tests pass and CUDA functionality is available
-through, e.g., [CUDAdrv](https://github.com/JuliaGPU/CUDAdrv.jl).
diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md
index e8092df493df7..0ef9ce4e4f071 100644
--- a/doc/src/devdocs/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -271,7 +271,7 @@ DEPS_GIT = llvm
 #  LLVM_BRANCH = julia-16.0.6-0
 #SHA hash of the alternate commit to check out automatically
 #  LLVM_SHA1 = $(LLVM_BRANCH)
-#List of LLVM targets to build.  It is strongly recommended to keep at least all the
+#List of LLVM targets to build. It is strongly recommended to keep at least all the
 #default targets listed in `deps/llvm.mk`, even if you don't necessarily need all of them.
 #  LLVM_TARGETS = ...
 #Use ccache for faster recompilation in case you need to restart a build.
@@ -335,8 +335,8 @@ Please note that assert builds of Julia will be slower than regular (non-assert)
 
 ## Building 32-bit Julia on a 64-bit machine
 
-Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine.  Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
-However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system.  At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
+Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine. Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
+However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system. At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
 
 ```sh
 $ docker pull i386/ubuntu
diff --git a/doc/src/devdocs/build/distributing.md b/doc/src/devdocs/build/distributing.md
index 8a6c5743035c3..99c08923b415b 100644
--- a/doc/src/devdocs/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -2,9 +2,9 @@ Binary distributions
 =======================================
 
 These notes are for those wishing to compile a binary distribution of Julia
-for distribution on various platforms.  We love users spreading Julia as
+for distribution on various platforms. We love users spreading Julia as
 far and wide as they can, trying it out on as wide an array of
-operating systems and hardware configurations as possible.  As each
+operating systems and hardware configurations as possible. As each
 platform has specific gotchas and processes that must be followed in
 order to create a portable, working Julia distribution, we have
 separated most of the notes by OS.
@@ -86,8 +86,8 @@ installation-wide initialization file. This file can be used by
 distribution managers to set up custom paths or initialization code.
 For Linux distribution packages, if `$prefix` is
 set to `/usr`, there is no `/usr/etc` to look into. This requires
-the path to Julia's private `etc` directory to be changed.  This can
-be done via the `sysconfdir` make variable when building.  Simply
+the path to Julia's private `etc` directory to be changed. This can
+be done via the `sysconfdir` make variable when building. Simply
 pass `sysconfdir=/etc` to `make` when building and Julia will first
 check `/etc/julia/startup.jl` before trying
 `$prefix/etc/julia/startup.jl`.
@@ -97,12 +97,12 @@ OS X
 
 To create a binary distribution on OSX, build Julia first, then cd to
 `contrib/mac/app`, and run `make` with the same makevars that were used
-with `make` when building Julia proper.  This will then
+with `make` when building Julia proper. This will then
 create a `.dmg` file in the `contrib/mac/app` directory holding a
 completely self-contained Julia.app.
 
 Alternatively, Julia may be built as a framework by invoking `make` with the
-`darwinframework` target and `DARWIN_FRAMEWORK=1` set.  For example,
+`darwinframework` target and `DARWIN_FRAMEWORK=1` set. For example,
 `make DARWIN_FRAMEWORK=1 darwinframework`.
 
 Windows
diff --git a/doc/src/devdocs/build/windows.md b/doc/src/devdocs/build/windows.md
index 8f8f0c8bc676a..ba4af459e24d0 100644
--- a/doc/src/devdocs/build/windows.md
+++ b/doc/src/devdocs/build/windows.md
@@ -60,14 +60,14 @@ MinGW-w64 compilers available through Cygwin's package manager.
 
  3. At the *Select Packages* step, select the following:
 
-    1.  From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
-    2.  From the *Net* category: `curl`
-    3.  From *Interpreters* (or *Python*) category: `m4`, `python3`
-    4.  From the *Archive* category: `p7zip`
-    5.  For 32 bit Julia, and also from the *Devel* category:
-        `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
-    6.  For 64 bit Julia, and also from the *Devel* category:
-        `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
+    1. From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
+    2. From the *Net* category: `curl`
+    3. From *Interpreters* (or *Python*) category: `m4`, `python3`
+    4. From the *Archive* category: `p7zip`
+    5. For 32 bit Julia, and also from the *Devel* category:
+       `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
+    6. For 64 bit Julia, and also from the *Devel* category:
+       `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
 
  4. Allow Cygwin installation to finish, then start from the installed shortcut
     *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
@@ -208,7 +208,7 @@ done
 
 **On Mac**: Install XCode, XCode command line tools, X11 (now
 [XQuartz](https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
-or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`, or `brew
+or [Homebrew](https://brew.sh/). Then run `port install wine wget mingw-w64`, or `brew
 install wine wget mingw-w64`, as appropriate.
 
 **Then run the build:**
@@ -259,14 +259,14 @@ Then run the resulting installer.
 ### GDB not attaching to the right process
 
  - Use the PID from the Windows task manager or `WINPID` from the `ps` command
-   instead of the PID from unix-style command line tools (e.g. `pgrep`).  You
+   instead of the PID from unix-style command line tools (e.g. `pgrep`). You
    may need to add the PID column if it is not shown by default in the Windows
    task manager.
 
 ### GDB not showing the right backtrace
 
  - When attaching to the julia process, GDB may not be attaching to the right
-   thread.  Use `info threads` command to show all the threads and
+   thread. Use `info threads` command to show all the threads and
    `thread <threadno>` to switch threads.
  - Be sure to use a 32 bit version of GDB to debug a 32 bit build of Julia, or
    a 64 bit version of GDB to debug a 64 bit build of Julia.
diff --git a/doc/src/devdocs/builtins.md b/doc/src/devdocs/builtins.md
index 5ce7fae16f488..e53321f3e70a0 100644
--- a/doc/src/devdocs/builtins.md
+++ b/doc/src/devdocs/builtins.md
@@ -7,7 +7,7 @@ definitions for what defines the abilities and behaviors of a Julia program. The
 typically accessed through a higher level generic API.
 
 ```@docs
-Core.memoryref
+Core.memoryrefnew
 Core.memoryrefoffset
 Core.memoryrefget
 Core.memoryrefset!
@@ -22,7 +22,6 @@ Core.Intrinsics.atomic_pointerswap
 Core.Intrinsics.atomic_pointermodify
 Core.Intrinsics.atomic_pointerreplace
 Core.get_binding_type
-Core.set_binding_type!
 Core.IntrinsicFunction
 Core.Intrinsics
 Core.IR
diff --git a/doc/src/devdocs/cartesian.md b/doc/src/devdocs/cartesian.md
index 604f04f2a39e5..8d5d6d1832e23 100644
--- a/doc/src/devdocs/cartesian.md
+++ b/doc/src/devdocs/cartesian.md
@@ -26,7 +26,7 @@ end
 ```
 
 In general, Cartesian allows you to write generic code that contains repetitive elements, like
-the nested loops in this example.  Other applications include repeated expressions (e.g., loop
+the nested loops in this example. Other applications include repeated expressions (e.g., loop
 unwinding) or creating function calls with variable numbers of arguments without using the "splat"
 construct (`i...`).
 
@@ -71,7 +71,7 @@ DocTestSetup = nothing
 
 The first argument to both of these macros is the number of expressions, which must be an integer.
 When you're writing a function that you intend to work in multiple dimensions, this may not be
-something you want to hard-code. The recommended approach is to use a `@generated function`.  Here's
+something you want to hard-code. The recommended approach is to use a `@generated function`. Here's
 an example:
 
 ```julia
@@ -91,7 +91,7 @@ Naturally, you can also prepare expressions or perform calculations before the `
 ### Anonymous-function expressions as macro arguments
 
 Perhaps the single most powerful feature in `Cartesian` is the ability to supply anonymous-function
-expressions that get evaluated at parsing time.  Let's consider a simple example:
+expressions that get evaluated at parsing time. Let's consider a simple example:
 
 ```julia
 @nexprs 2 j->(i_j = 1)
@@ -106,8 +106,8 @@ i_2 = 1
 ```
 
 In each generated statement, an "isolated" `j` (the variable of the anonymous function) gets replaced
-by values in the range `1:2`. Generally speaking, Cartesian employs a LaTeX-like syntax.  This
-allows you to do math on the index `j`.  Here's an example computing the strides of an array:
+by values in the range `1:2`. Generally speaking, Cartesian employs a LaTeX-like syntax. This
+allows you to do math on the index `j`. Here's an example computing the strides of an array:
 
 ```julia
 s_1 = 1
diff --git a/doc/src/devdocs/compiler.md b/doc/src/devdocs/compiler.md
index 0749eafd81bd3..8f5f2bb1aa17c 100644
--- a/doc/src/devdocs/compiler.md
+++ b/doc/src/devdocs/compiler.md
@@ -94,11 +94,16 @@ Use appropriate care when copying.
 
 ## Specialized Calling Convention Signature Representation
 
-A `jl_returninfo_t` object describes the calling convention details of any callable.
+A `jl_returninfo_t` object describes the specialized calling convention details of any
+callable. It can be generated from any (specTypes, rettype) pair, such as a CodeInstance, or
+other place they are declared. This is the expected calling convention for specptr, but
+other data may be stored there. Only if the function pointer stored there has the
+expected specialized calling convention will the corresponding flag be set in specsigflags
+to indicate it is useable.
 
-If any of the arguments or return type of a method can be represented unboxed,
-and the method is not varargs, it'll be given an optimized calling convention
-signature based on its `specTypes` and `rettype` fields.
+If any of the arguments or return type of a method can be represented unboxed, and none are
+unable to be represented unboxed (such as an unbounded vararg), it will be given an
+optimized calling convention signature based on the `specTypes` and `rettype` values.
 
 The general principles are that:
 
@@ -112,4 +117,5 @@ The total logic for this is implemented by `get_specsig_function` and `deserves_
 
 Additionally, if the return type is a union, it may be returned as a pair of values (a pointer and a tag).
 If the union values can be stack-allocated, then sufficient space to store them will also be passed as a hidden first argument.
+If the struct to return needs gc roots, space for those will be passed as a hidden second argument.
 It is up to the callee whether the returned pointer will point to this space, a boxed object, or even other constant memory.
diff --git a/doc/src/devdocs/debuggingtips.md b/doc/src/devdocs/debuggingtips.md
index a7740b1780b06..0c7ee9d98f046 100644
--- a/doc/src/devdocs/debuggingtips.md
+++ b/doc/src/devdocs/debuggingtips.md
@@ -241,7 +241,7 @@ process)
 
 Julia now works out of the box with [rr](https://rr-project.org/), the lightweight recording and
 deterministic debugging framework from Mozilla. This allows you to replay the trace of an execution
-deterministically.  The replayed execution's address spaces, register contents, syscall data etc
+deterministically. The replayed execution's address spaces, register contents, syscall data etc
 are exactly the same in every run.
 
 A recent version of rr (3.1.0 or higher) is required.
diff --git a/doc/src/devdocs/jit.md b/doc/src/devdocs/jit.md
index f33b968ad3948..96315c67b659f 100644
--- a/doc/src/devdocs/jit.md
+++ b/doc/src/devdocs/jit.md
@@ -59,20 +59,22 @@ In addition, there are a number of different transitional states that occur duri
 
 1. When writing `invoke`, `specsigflags`, and `specptr`:
       1. Perform an atomic compare-exchange operation of specptr assuming the old value was NULL. This compare-exchange operation should have at least acquire-release ordering, to provide ordering guarantees of the remaining memory operations in the write.
-      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written.
+      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written, then restart from step 1 if desired.
       3. Write the new low bit of `specsigflags` to its final value. This may be a relaxed write.
       4. Write the new `invoke` pointer to its final value. This must have at least a release memory ordering to synchronize with reads of `invoke`.
       5. Set the second bit of `specsigflags` to 1. This must be at least a release memory ordering to synchronize with reads of `specsigflags`. This step completes the write operation and announces to all other threads that all fields have been set.
 2. When reading all of `invoke`, `specsigflags`, and `specptr`:
-   1. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
-   2. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
-   3. Read the `specptr` field with at least an acquire memory ordering.
+   1. Read the `specptr` field with any memory ordering.
+   2. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
+   3. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
    4. If `specptr` is NULL, then the `initial_invoke` pointer must not be relying on `specptr` to guarantee correct execution. Therefore, `invoke` is non-null, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
    5. If `specptr` is non-null, then `initial_invoke` might not be the final `invoke` field that uses `specptr`. This can occur if `specptr` has been written, but `invoke` has not yet been written. Therefore, spin on the second bit of `specsigflags` until it is set to 1 with at least acquire memory ordering.
-   6. Re-read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `final_invoke`.
+   6. Re-read the `invoke` field with any memory ordering. This load will be referred to as `final_invoke`.
    7. Read the `specsigflags` field with any memory ordering.
    8. `invoke` is `final_invoke`, `specsigflags` is the value read in step 7, `specptr` is the value read in step 3.
 3. When updating a `specptr` to a different but equivalent function pointer:
    1. Perform a release store of the new function pointer to `specptr`. Races here must be benign, as the old function pointer is required to still be valid, and any new ones are also required to be valid as well. Once a pointer has been written to `specptr`, it must always be callable whether or not it is later overwritten.
 
+Correctly reading these fields is implemented in `jl_read_codeinst_invoke`.
+
 Although these write, read, and update steps are complicated, they ensure that the JIT can update codeinsts without invalidating existing codeinsts, and that the JIT can update codeinsts without invalidating existing `invoke` pointers. This allows the JIT to potentially reoptimize functions at higher optimization levels in the future, and also will allow the JIT to support concurrent compilation of functions in the future.
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 170a812c09994..ab8f7dde50022 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -43,7 +43,7 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 Some of the `.cpp` files form a group that compile to a single object.
 
 The difference between an intrinsic and a builtin is that a builtin is a first class function
-that can be used like any other Julia function.  An intrinsic can operate only on unboxed data,
+that can be used like any other Julia function. An intrinsic can operate only on unboxed data,
 and therefore its arguments must be statically typed.
 
 ### [Alias Analysis](@id LLVM-Alias-Analysis)
@@ -188,7 +188,7 @@ study it and the pass of interest in isolation.
 3. Pick out the IR at the point just before the pass of interest runs.
 4. Strip the debug metadata and fix up the TBAA metadata by hand.
 
-The last step is labor intensive.  Suggestions on a better way would be appreciated.
+The last step is labor intensive. Suggestions on a better way would be appreciated.
 
 ## The jlcall calling convention
 
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index 50cdd738e3b34..8d6672842c3c8 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -71,19 +71,8 @@ The following is a level 5 lock
 
 The following are a level 6 lock, which can only recurse to acquire locks at lower levels:
 
->   * codegen
 >   * jl_modules_mutex
 
-The following is an almost root lock (level end-1), meaning only the root look may be held when
-trying to acquire it:
-
->   * typeinf
->
->     > this one is perhaps one of the most tricky ones, since type-inference can be invoked from many
->     > points
->     >
->     > currently the lock is merged with the codegen lock, since they call each other recursively
-
 The following lock synchronizes IO operation. Be aware that doing any I/O (for example,
 printing warning messages or debug information) while holding any other lock listed above
 may result in pernicious and hard-to-find deadlocks. BE VERY CAREFUL!
@@ -149,7 +138,7 @@ Module serializer : toplevel lock
 
 JIT & type-inference : codegen lock
 
-MethodInstance/CodeInstance updates : Method->writelock, codegen lock
+MethodInstance/CodeInstance updates : Method->writelock
 
 >   * These are set at construction and immutable:
 >       * specTypes
@@ -157,26 +146,9 @@ MethodInstance/CodeInstance updates : Method->writelock, codegen lock
 >       * def
 >       * owner
 
->   * These are set by `jl_type_infer` (while holding codegen lock):
->       * cache
->       * rettype
->       * inferred
-        * valid ages
-
->   * `inInference` flag:
->       * optimization to quickly avoid recurring into `jl_type_infer` while it is already running
->       * actual state (of setting `inferred`, then `fptr`) is protected by codegen lock
-
 >   * Function pointers:
->       * these transition once, from `NULL` to a value, while the codegen lock is held
->
->   * Code-generator cache (the contents of `functionObjectsDecls`):
->       * these can transition multiple times, but only while the codegen lock is held
->       * it is valid to use old version of this, or block for new versions of this, so races are benign,
->         as long as the code is careful not to reference other data in the method instance (such as `rettype`)
->         and assume it is coordinated, unless also holding the codegen lock
+>       * these transition once, from `NULL` to a value, which is coordinated internal to the JIT
 >
-LLVMContext : codegen lock
 
 Method : Method->writelock
 
diff --git a/doc/src/devdocs/meta.md b/doc/src/devdocs/meta.md
index 726f471c228e0..7b37ceaad068d 100644
--- a/doc/src/devdocs/meta.md
+++ b/doc/src/devdocs/meta.md
@@ -2,7 +2,7 @@
 
 In some circumstances, one might wish to provide hints or instructions that a given block of code
 has special properties: you might always want to inline it, or you might want to turn on special
-compiler optimization passes.  Starting with version 0.4, Julia has a convention that these instructions
+compiler optimization passes. Starting with version 0.4, Julia has a convention that these instructions
 can be placed inside a `:meta` expression, which is typically (but not necessarily) the first
 expression in the body of a function.
 
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index a2f72d623ab21..8134132d6ee75 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -92,7 +92,7 @@ The corresponding global `jl_datatype_t` objects are created by [`jl_init_types`
 
 The garbage collector uses several bits from the metadata portion of the `jl_typetag_t` to track
 each object in the system. Further details about this algorithm can be found in the comments of
-the [garbage collector implementation in `gc.c`](https://github.com/JuliaLang/julia/blob/master/src/gc.c).
+the [garbage collector implementation in `gc-stock.c`](https://github.com/JuliaLang/julia/blob/master/src/gc-stock.c).
 
 ## Object allocation
 
@@ -179,7 +179,7 @@ jl_value_t *newstruct(jl_value_t *type);
 jl_value_t *newobj(jl_value_t *type, size_t nfields);
 ```
 
-And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc.c`),
+And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc-stock.c`),
 then tagged with its type:
 
 ```c
diff --git a/doc/src/devdocs/offset-arrays.md b/doc/src/devdocs/offset-arrays.md
index cc647eb1bd464..9a234288c6097 100644
--- a/doc/src/devdocs/offset-arrays.md
+++ b/doc/src/devdocs/offset-arrays.md
@@ -2,7 +2,7 @@
 
 Conventionally, Julia's
 arrays are indexed starting at 1, whereas some other languages start numbering at 0, and yet others
-(e.g., Fortran) allow you to specify arbitrary starting indices.  While there is much merit in
+(e.g., Fortran) allow you to specify arbitrary starting indices. While there is much merit in
 picking a standard (i.e., 1 for Julia), there are some algorithms which simplify considerably
 if you can index outside the range `1:size(A,d)` (and not just `0:size(A,d)-1`, either).
 To facilitate such computations, Julia supports arrays with arbitrary indices.
@@ -57,8 +57,8 @@ the cause try running julia with the option `--check-bounds=yes`.)
 ### Using `axes` for bounds checks and loop iteration
 
 `axes(A)` (reminiscent of `size(A)`) returns a tuple of `AbstractUnitRange{<:Integer}` objects, specifying
-the range of valid indices along each dimension of `A`.  When `A` has unconventional indexing,
-the ranges may not start at 1.  If you just want the range for a particular dimension `d`, there
+the range of valid indices along each dimension of `A`. When `A` has unconventional indexing,
+the ranges may not start at 1. If you just want the range for a particular dimension `d`, there
 is `axes(A, d)`.
 
 Base implements a custom range type, `OneTo`, where `OneTo(n)` means the same thing as `1:n` but
@@ -102,7 +102,7 @@ a convenient way of producing an all-zeros array that matches the indices of A i
 
 Let's walk through a couple of explicit examples. First, if `A` has conventional indices, then
 `similar(Array{Int}, axes(A))` would end up calling `Array{Int}(undef, size(A))`, and thus return
-an array.  If `A` is an `AbstractArray` type with unconventional indexing, then `similar(Array{Int}, axes(A))`
+an array. If `A` is an `AbstractArray` type with unconventional indexing, then `similar(Array{Int}, axes(A))`
 should return something that "behaves like" an `Array{Int}` but with a shape (including indices)
 that matches `A`.  (The most obvious implementation is to allocate an `Array{Int}(undef, size(A))` and
 then "wrap" it in a type that shifts the indices.)
@@ -118,7 +118,7 @@ This page focuses on the steps needed to define unconventional indexing.
 ### Custom `AbstractUnitRange` types
 
 If you're writing a non-1 indexed array type, you will want to specialize `axes` so it returns
-a `UnitRange`, or (perhaps better) a custom `AbstractUnitRange`.  The advantage of a custom type
+a `UnitRange`, or (perhaps better) a custom `AbstractUnitRange`. The advantage of a custom type
 is that it "signals" the allocation type for functions like `similar`. If we're writing an array
 type for which indexing will start at 0, we likely want to begin by creating a new `AbstractUnitRange`,
 `ZeroRange`, where `ZeroRange(n)` is equivalent to `0:n-1`.
@@ -150,7 +150,7 @@ axes(A::AbstractArray{T,N}, d) where {T,N} = d <= N ? axes(A)[d] : OneTo(1)
 ```
 
 may not be what you want: you may need to specialize it to return something other than `OneTo(1)`
-when `d > ndims(A)`.  Likewise, in `Base` there is a dedicated function `axes1` which is equivalent
+when `d > ndims(A)`. Likewise, in `Base` there is a dedicated function `axes1` which is equivalent
 to `axes(A, 1)` but which avoids checking (at runtime) whether `ndims(A) > 0`. (This is purely
 a performance optimization.)  It is defined as:
 
diff --git a/doc/src/devdocs/precompile_hang.md b/doc/src/devdocs/precompile_hang.md
index 0de9c99792b64..2204651848509 100644
--- a/doc/src/devdocs/precompile_hang.md
+++ b/doc/src/devdocs/precompile_hang.md
@@ -7,7 +7,7 @@ On Julia 1.10 or higher, you might see the following message:
 This may repeat. If it continues to repeat with no hints that it will
 resolve itself, you may have a "precompilation hang" that requires
 fixing. Even if it's transient, you might prefer to resolve it so that
-users will not be bothered by this warning.  This page walks you
+users will not be bothered by this warning. This page walks you
 through how to analyze and fix such issues.
 
 If you follow the advice and hit `Ctrl-C`, you might see
@@ -51,7 +51,7 @@ Use a binary search to identify the problematic dependency: start by commenting
 
 Once you've identified a suspect (here we'll call it `ThePackageYouThinkIsCausingTheProblem`), first try precompiling that package. If it also hangs during precompilation, continue chasing the problem backwards.
 
-However, most likely `ThePackageYouThinkIsCausingTheProblem` will precompile fine. This suggests it's in the function `ThePackageYouThinkIsCausingTheProblem.__init__`, which does not run during precompilation of `ThePackageYouThinkIsCausingTheProblem` but *does* in any package that loads `ThePackageYouThinkIsCausingTheProblem`.  To test this theory, set up a minimal working example (MWE), something like
+However, most likely `ThePackageYouThinkIsCausingTheProblem` will precompile fine. This suggests it's in the function `ThePackageYouThinkIsCausingTheProblem.__init__`, which does not run during precompilation of `ThePackageYouThinkIsCausingTheProblem` but *does* in any package that loads `ThePackageYouThinkIsCausingTheProblem`. To test this theory, set up a minimal working example (MWE), something like
 
 ```julia
 (@v1.10) pkg> generate MWE
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
index e75e7f6bb0156..a0e072c0b1ae3 100644
--- a/doc/src/devdocs/probes.md
+++ b/doc/src/devdocs/probes.md
@@ -188,7 +188,7 @@ Julia session and get the PID and REPL's task address:
    _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
   | | | | | | |/ _` |  |
   | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
- _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org release
 |__/                   |
 
 1> getpid()
@@ -206,7 +206,7 @@ Now we can start `bpftrace` and have it monitor `rt__new__task` for *only* this
 
 And if we spawn a single task:
 
-`@async 1+1`
+`Threads.@spawn 1+1`
 
 we see this task being created:
 
@@ -215,8 +215,8 @@ we see this task being created:
 However, if we spawn a bunch of tasks from that newly-spawned task:
 
 ```julia
-@async for i in 1:10
-   @async 1+1
+Threads.@spawn for i in 1:10
+   Threads.@spawn 1+1
 end
 ```
 
@@ -264,7 +264,7 @@ We can see this problem illustrated with `bpftrace` quite easily. First, in one
    _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
   | | | | | | |/ _` |  |
   | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
- _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org release
 |__/                   |
 
 1> getpid()
diff --git a/doc/src/devdocs/stdio.md b/doc/src/devdocs/stdio.md
index 5ee4f0206ee0b..352420e25de77 100644
--- a/doc/src/devdocs/stdio.md
+++ b/doc/src/devdocs/stdio.md
@@ -36,7 +36,7 @@ Julia's `__init__()` function (in `base/sysimg.jl`) calls `reinit_stdio()` (in `
 to create Julia objects for [`Base.stdin`](@ref), [`Base.stdout`](@ref) and [`Base.stderr`](@ref).
 
 `reinit_stdio()` uses [`ccall`](@ref) to retrieve pointers to `JL_STD*` and calls `jl_uv_handle_type()`
-to inspect the type of each stream.  It then creates a Julia `Base.IOStream`, `Base.TTY` or `Base.PipeEndpoint`
+to inspect the type of each stream. It then creates a Julia `Base.IOStream`, `Base.TTY` or `Base.PipeEndpoint`
 object to represent each stream, e.g.:
 
 ```
@@ -63,7 +63,7 @@ stream.jl: function write(s::IO, p::Ptr, nb::Integer)
 ## printf() during initialization
 
 The libuv streams relied upon by `jl_printf()` etc., are not available until midway through
-initialization of the runtime (see `init.c`, `init_stdio()`).  Error messages or warnings that
+initialization of the runtime (see `init.c`, `init_stdio()`). Error messages or warnings that
 need to be printed before this are routed to the standard C library `fwrite()` function by the
 following mechanism:
 
diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md
index cec7a64a65245..75b76bcb563a1 100644
--- a/doc/src/devdocs/subarrays.md
+++ b/doc/src/devdocs/subarrays.md
@@ -1,6 +1,6 @@
 # SubArrays
 
-Julia's `SubArray` type is a container encoding a "view" of a parent [`AbstractArray`](@ref).  This page
+Julia's `SubArray` type is a container encoding a "view" of a parent [`AbstractArray`](@ref). This page
 documents some of the design principles and implementation of `SubArray`s.
 
 One of the major design goals is to ensure high performance for views of both [`IndexLinear`](@ref) and
@@ -56,8 +56,8 @@ struct SubArray{T,N,P,I,L} <: AbstractArray{T,N}
 end
 ```
 
-`SubArray` has 5 type parameters.  The first two are the standard element type and dimensionality.
- The next is the type of the parent `AbstractArray`.  The most heavily-used is the fourth parameter,
+`SubArray` has 5 type parameters. The first two are the standard element type and dimensionality.
+ The next is the type of the parent `AbstractArray`. The most heavily-used is the fourth parameter,
 a `Tuple` of the types of the indices for each dimension. The final one, `L`, is only provided
 as a convenience for dispatch; it's a boolean that represents whether the index types support
 fast linear indexing. More on that later.
@@ -78,8 +78,8 @@ one to dispatch to efficient algorithms.
 ### Index translation
 
 Performing index translation requires that you do different things for different concrete `SubArray`
-types.  For example, for `S1`, one needs to apply the `i,j` indices to the first and third dimensions
-of the parent array, whereas for `S2` one needs to apply them to the second and third.  The simplest
+types. For example, for `S1`, one needs to apply the `i,j` indices to the first and third dimensions
+of the parent array, whereas for `S2` one needs to apply them to the second and third. The simplest
 approach to indexing would be to do the type-analysis at runtime:
 
 ```julia
@@ -161,7 +161,7 @@ julia> diff(A[2:2:4,:][:])
 ```
 
 A view constructed as `view(A, 2:2:4, :)` happens to have uniform stride, and therefore linear
-indexing indeed could be performed efficiently.  However, success in this case depends on the
+indexing indeed could be performed efficiently. However, success in this case depends on the
 size of the array: if the first dimension instead were odd,
 
 ```jldoctest
@@ -192,7 +192,7 @@ then `A[2:2:4,:]` does not have uniform stride, so we cannot guarantee efficient
     levels of indirection; they can simply re-compute the indices into the original parent array!
   * Hopefully by now it's fairly clear that supporting slices means that the dimensionality, given
     by the parameter `N`, is not necessarily equal to the dimensionality of the parent array or the
-    length of the `indices` tuple.  Neither do user-supplied indices necessarily line up with entries
+    length of the `indices` tuple. Neither do user-supplied indices necessarily line up with entries
     in the `indices` tuple (e.g., the second user-supplied index might correspond to the third dimension
     of the parent array, and the third element in the `indices` tuple).
 
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index dafeddd259745..7d4f7afdbb86a 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -3,15 +3,15 @@
 ## [Building the Julia system image](@id Building-the-Julia-system-image)
 
 Julia ships with a preparsed system image containing the contents of the `Base` module, named
-`sys.ji`.  This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
-as many platforms as possible, so as to give vastly improved startup times.  On systems that do
+`sys.ji`. This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
+as many platforms as possible, so as to give vastly improved startup times. On systems that do
 not ship with a precompiled system image file, one can be generated from the source files shipped
 in Julia's `DATAROOTDIR/julia/base` folder.
 
 Julia will by default generate its system image on half of the available system threads. This
 may be controlled by the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable.
 
-This operation is useful for multiple reasons.  A user may:
+This operation is useful for multiple reasons. A user may:
 
   * Build a precompiled shared library system image on a platform that did not ship with one, thereby
     improving startup times.
@@ -84,11 +84,11 @@ generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1)
 This creates a system image with three separate targets; one for a generic `x86_64`
 processor, one with a `sandybridge` ISA (explicitly excluding `xsaveopt`) that explicitly
 clones all functions, and one targeting the `haswell` ISA, based off of the `sandybridge`
-sysimg version, and also excluding `rdrnd`.  When a Julia implementation loads the
+sysimg version, and also excluding `rdrnd`. When a Julia implementation loads the
 generated sysimg, it will check the host processor for matching CPU capability flags,
-enabling the highest ISA level possible.  Note that the base level (`generic`) requires
+enabling the highest ISA level possible. Note that the base level (`generic`) requires
 the `cx16` instruction, which is disabled in some virtualization software and must be
-enabled for the `generic` target to be loaded.  Alternatively, a sysimg could be generated
+enabled for the `generic` target to be loaded. Alternatively, a sysimg could be generated
 with the target `generic,-cx16` for greater compatibility, however note that this may cause
 performance and stability problems in some code.
 
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index 7a5ede63b1989..a09df61e4881d 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -1,6 +1,6 @@
 # More about types
 
-If you've used Julia for a while, you understand the fundamental role that types play.  Here we
+If you've used Julia for a while, you understand the fundamental role that types play. Here we
 try to get under the hood, focusing particularly on [Parametric Types](@ref).
 
 ## Types and sets (and `Any` and `Union{}`/`Bottom`)
@@ -52,7 +52,7 @@ julia> typejoin(Tuple{Integer, Float64}, Tuple{Int, Real})
 Tuple{Integer, Real}
 ```
 
-While these operations may seem abstract, they lie at the heart of Julia.  For example, method
+While these operations may seem abstract, they lie at the heart of Julia. For example, method
 dispatch is implemented by stepping through the items in a method list until reaching one for which
 the type of the argument tuple is a subtype of the method signature.
 For this algorithm to work, it's important that methods be sorted by their specificity, and that the
@@ -225,7 +225,7 @@ Ptr{Cvoid} @0x00007fcc7de64850
 The `wrapper` field of [`Array`](@ref) points to itself, but for `Array{TV,NV}` it points back
 to the original definition of the type.
 
-What about the other fields? `hash` assigns an integer to each type.  To examine the `cache`
+What about the other fields? `hash` assigns an integer to each type. To examine the `cache`
 field, it's helpful to pick a type that is less heavily used than Array. Let's first create our
 own type:
 
@@ -245,8 +245,8 @@ variables are not cached.
 
 ## Tuple types
 
-Tuple types constitute an interesting special case.  For dispatch to work on declarations like
-`x::Tuple`, the type has to be able to accommodate any tuple.  Let's check the parameters:
+Tuple types constitute an interesting special case. For dispatch to work on declarations like
+`x::Tuple`, the type has to be able to accommodate any tuple. Let's check the parameters:
 
 ```jldoctest
 julia> Tuple
@@ -491,7 +491,7 @@ julia> function mysubtype(a,b)
        end
 ```
 
-and then set a breakpoint in `jl_breakpoint`.  Once this breakpoint gets triggered, you can set
+and then set a breakpoint in `jl_breakpoint`. Once this breakpoint gets triggered, you can set
 breakpoints in other functions.
 
 As a warm-up, try the following:
diff --git a/doc/src/devdocs/valgrind.md b/doc/src/devdocs/valgrind.md
index b8dd96ed8be60..015c4a6d983ee 100644
--- a/doc/src/devdocs/valgrind.md
+++ b/doc/src/devdocs/valgrind.md
@@ -6,14 +6,14 @@ Julia.
 
 ## General considerations
 
-By default, Valgrind assumes that there is no self modifying code in the programs it runs.  This
+By default, Valgrind assumes that there is no self modifying code in the programs it runs. This
 assumption works fine in most instances but fails miserably for a just-in-time compiler like
-`julia`.  For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
+`julia`. For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
 code may crash or behave unexpectedly (often in subtle ways).
 
 In some cases, to better detect memory errors using Valgrind, it can help to compile `julia` with
-memory pools disabled.  The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
-`MEMDEBUG2` disables memory pools in FemtoLisp.  To build `julia` with both flags, add the following
+memory pools disabled. The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
+`MEMDEBUG2` disables memory pools in FemtoLisp. To build `julia` with both flags, add the following
 line to `Make.user`:
 
 ```make
@@ -21,7 +21,7 @@ CFLAGS = -DMEMDEBUG -DMEMDEBUG2
 ```
 
 Another thing to note: if your program uses multiple worker processes, it is likely that you
-want all such worker processes to run under Valgrind, not just the parent process.  To do this,
+want all such worker processes to run under Valgrind, not just the parent process. To do this,
 pass `--trace-children=yes` to `valgrind`.
 
 Yet another thing to note: if using `valgrind` errors with `Unable to find compatible target in system image`,
@@ -29,9 +29,9 @@ try rebuilding the sysimage with target `generic` or julia with `JULIA_CPU_TARGE
 
 ## Suppressions
 
-Valgrind will typically display spurious warnings as it runs.  To reduce the number of such warnings,
+Valgrind will typically display spurious warnings as it runs. To reduce the number of such warnings,
 it helps to provide a [suppressions file](https://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
-to Valgrind.  A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
+to Valgrind. A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
 
 The suppressions file can be used from the `julia/` source directory as follows:
 
@@ -40,13 +40,13 @@ $ valgrind --smc-check=all-non-file --suppressions=contrib/valgrind-julia.supp .
 ```
 
 Any memory errors that are displayed should either be reported as bugs or contributed as additional
-suppressions.  Note that some versions of Valgrind are [shipped with insufficient default suppressions](https://github.com/JuliaLang/julia/issues/8314#issuecomment-55766210),
+suppressions. Note that some versions of Valgrind are [shipped with insufficient default suppressions](https://github.com/JuliaLang/julia/issues/8314#issuecomment-55766210),
 so that may be one thing to consider before submitting any bugs.
 
 ## Running the Julia test suite under Valgrind
 
 It is possible to run the entire Julia test suite under Valgrind, but it does take quite some
-time (typically several hours).  To do so, run the following command from the `julia/test/` directory:
+time (typically several hours). To do so, run the following command from the `julia/test/` directory:
 
 ```
 valgrind --smc-check=all-non-file --trace-children=yes --suppressions=$PWD/../contrib/valgrind-julia.supp ../julia runtests.jl all
@@ -65,7 +65,7 @@ suppressions file yet are nonetheless safe to ignore.
 Valgrind will emit a warning if it encounters any of the [system calls
 that are specific to
 rr](https://github.com/rr-debugger/rr/blob/master/src/preload/rrcalls.h),
-the [Record and Replay Framework](https://rr-project.org/).  In
+the [Record and Replay Framework](https://rr-project.org/). In
 particular, a warning about an unhandled `1008` syscall will be shown
 when julia tries to detect whether it is running under rr:
 
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 34e5a231d22de..8a33d31a23cf8 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -430,7 +430,7 @@ julia> map(tuple, (1/(i+j) for i=1:2, j=1:2), [1 3; 2 4])
 
 Generators are implemented via inner functions. Just like
 inner functions used elsewhere in the language, variables from the enclosing scope can be
-"captured" in the inner function.  For example, `sum(p[i] - q[i] for i=1:n)`
+"captured" in the inner function. For example, `sum(p[i] - q[i] for i=1:n)`
 captures the three variables `p`, `q` and `n` from the enclosing scope.
 Captured variables can present performance challenges; see
 [performance tips](@ref man-performance-captured).
@@ -714,7 +714,7 @@ julia> A[:, 3:3]
 ### Cartesian indices
 
 The special `CartesianIndex{N}` object represents a scalar index that behaves
-like an `N`-tuple of integers spanning multiple dimensions.  For example:
+like an `N`-tuple of integers spanning multiple dimensions. For example:
 
 ```jldoctest cartesianindex
 julia> A = reshape(1:32, 4, 4, 2);
@@ -793,7 +793,7 @@ Indexing by a boolean vector `B` is effectively the same as indexing by the
 vector of integers that is returned by [`findall(B)`](@ref). Similarly, indexing
 by a `N`-dimensional boolean array is effectively the same as indexing by the
 vector of `CartesianIndex{N}`s where its values are `true`. A logical index
-must be a array of the same shape as the dimension(s) it indexes into, or it
+must be an array of the same shape as the dimension(s) it indexes into, or it
 must be the only index provided and match the shape of the one-dimensional
 reshaped view of the array it indexes into. It is generally more efficient
 to use boolean arrays as indices directly instead of first calling [`findall`](@ref).
@@ -1074,7 +1074,7 @@ is equivalent to `broadcast(f, args...)`, providing a convenient syntax to broad
 [automatically fuse](@ref man-dot-operators) into a single `broadcast` call.
 
 Additionally, [`broadcast`](@ref) is not limited to arrays (see the function documentation);
-it also handles scalars, tuples and other collections.  By default, only some argument types are
+it also handles scalars, tuples and other collections. By default, only some argument types are
 considered scalars, including (but not limited to) `Number`s, `String`s, `Symbol`s, `Type`s, `Function`s
 and some common singletons like `missing` and `nothing`. All other arguments are
 iterated over or indexed into elementwise.
@@ -1144,7 +1144,7 @@ is created with the [`view`](@ref) function, which is called the same way as
 of [`view`](@ref) looks the same as the result of [`getindex`](@ref), except the
 data is left in place. [`view`](@ref) stores the input index vectors in a
 `SubArray` object, which can later be used to index the original array
-indirectly.  By putting the [`@views`](@ref) macro in front of an expression or
+indirectly. By putting the [`@views`](@ref) macro in front of an expression or
 block of code, any `array[...]` slice in that expression will be converted to
 create a `SubArray` view instead.
 
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 15db6eda5f807..d1d095c48b2ff 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -64,8 +64,8 @@ the next input prompt appears. That is because the REPL is waiting for `t`
 to finish before proceeding.
 
 It is common to want to create a task and schedule it right away, so the
-macro [`@async`](@ref) is provided for that purpose --- `@async x` is
-equivalent to `schedule(@task x)`.
+macro [`Threads.@spawn`](@ref) is provided for that purpose --- `Threads.@spawn x` is
+equivalent to `task = @task x; task.sticky = false; schedule(task)`.
 
 ## Communicating with Channels
 
@@ -186,7 +186,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
 
     # we can schedule `n` instances of `foo` to be active concurrently.
     for _ in 1:n
-        errormonitor(@async foo())
+        errormonitor(Threads.@spawn foo())
     end
     ```
   * Channels are created via the `Channel{T}(sz)` constructor. The channel will only hold objects
@@ -264,10 +264,10 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for i in 1:4 # start 4 tasks to process requests in parallel
-           errormonitor(@async do_work())
+           errormonitor(Threads.@spawn do_work())
        end
 
 julia> @elapsed while n > 0 # print out results
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 6f4d69b16bc81..b8d064f698208 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -996,7 +996,7 @@ A table of translations between the macro and function interfaces is given below
 |------------------------------------------------------------------------------|-----------------------------------------------------------------------------|
 | `@ccall clock()::Int32`                                                      | `ccall(:clock, Int32, ())`                                                  |
 | `@ccall f(a::Cint)::Cint`                                                    | `ccall(:a, Cint, (Cint,), a)`                                               |
-| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), (a, b))`                      |
+| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), a, b)`                        |
 | `@ccall $fptr.f()::Cvoid`                                                    | `ccall(fptr, f, Cvoid, ())`                                                 |
 | `@ccall printf("%s = %d\n"::Cstring ; "foo"::Cstring, foo::Cint)::Cint`      | `<unavailable>`                                                             |
 | `@ccall printf("%s = %s\n"::Cstring ; "2 + 2"::Cstring, "5"::Cstring)::Cint` | `ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")`    |
diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md
index 25ebf3475f34c..5c8315693c71e 100644
--- a/doc/src/manual/code-loading.md
+++ b/doc/src/manual/code-loading.md
@@ -160,11 +160,12 @@ What happens if `import Zebra` is evaluated in the main `App` code base? Since `
 **The paths map** of a project environment is extracted from the manifest file. The path of a package `uuid` named `X` is determined by these rules (in order):
 
 1. If the project file in the directory matches `uuid` and name `X`, then either:
-   - It has a toplevel `path` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
-   - Otherwise, `uuid` is mapped to  `src/X.jl` relative to the directory containing the project file.
-2. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
-   - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
-   - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   - It has a toplevel `entryfile` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
+   - Otherwise, `uuid` is mapped to `src/X.jl` relative to the directory containing the project file.
+2. 1. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
+      - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
+      - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   2. If this is a directory then `uuid` is mapped to `src/X.jl` unless the matching manifest stanza has an `entryfile` entry in which case this is used. In both cases, these are relative to the directory in 2.1.
 
 If any of these result in success, the path to the source code entry point will be either that result, the relative path from that result plus `src/X.jl`; otherwise, there is no path mapping for `uuid`. When loading `X`, if no source code path is found, the lookup will fail, and the user may be prompted to install the appropriate package version or to take other corrective action (e.g. declaring `X` as a dependency).
 
@@ -208,7 +209,6 @@ This example map includes three different kinds of package locations (the first
 2. The public `Priv` and `Zebra` packages are in the system depot, where packages installed and managed by the system administrator live. These are available to all users on the system.
 3. The `Pub` package is in the user depot, where packages installed by the user live. These are only available to the user who installed them.
 
-
 ### Package directories
 
 Package directories provide a simpler kind of environment without the ability to handle name collisions. In a package directory, the set of top-level packages is the set of subdirectories that "look like" packages. A package `X` exists in a package directory if the directory contains one of the following "entry point" files:
@@ -351,7 +351,7 @@ Since the primary environment is typically the environment of a project you're w
 
 ### [Package Extensions](@id man-extensions)
 
-A package "extension" is a module that is automatically loaded when a specified set of other packages (its "extension dependencies") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The extension dependencies of an extension are a subset of those packages listed under the `[weakdeps]` section of the project file. Those packages can have compat entries like other packages.
+A package "extension" is a module that is automatically loaded when a specified set of other packages (its "triggers") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The triggers of an extension are a subset of those packages listed under the `[weakdeps]` (and possibly, but uncommonly the `[deps]`) section of the project file. Those packages can have compat entries like other packages.
 
 ```toml
 name = "MyPackage"
@@ -371,8 +371,8 @@ FooExt = "ExtDep"
 ```
 
 The keys under `extensions` are the names of the extensions.
-They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded.
-If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity.
+They are loaded when all the packages on the right hand side (the triggers) of that extension are loaded.
+If an extension only has one trigger the list of triggers can be written as just a string for brevity.
 The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for
 extension `FooExt`.
 The content of an extension is often structured as:
@@ -380,10 +380,10 @@ The content of an extension is often structured as:
 ```
 module FooExt
 
-# Load main package and extension dependencies
+# Load main package and triggers
 using MyPackage, ExtDep
 
-# Extend functionality in main package with types from the extension dependencies
+# Extend functionality in main package with types from the triggers
 MyPackage.func(x::ExtDep.SomeStruct) = ...
 
 end
@@ -391,9 +391,31 @@ end
 
 When a package with extensions is added to an environment, the `weakdeps` and `extensions` sections
 are stored in the manifest file in the section for that package. The dependency lookup rules for
-a package are the same as for its "parent" except that the listed extension dependencies are also considered as
+a package are the same as for its "parent" except that the listed triggers are also considered as
 dependencies.
 
+### [Workspaces](@id workspaces)
+
+A project file can define a workspace by giving a set of projects that is part of that workspace:
+
+```toml
+[workspace]
+projects = ["test", "benchmarks", "docs", "SomePackage"]
+```
+
+Each subfolder contains its own `Project.toml` file, which may include additional dependencies and compatibility constraints. In such cases, the package manager gathers all dependency information from all the projects in the workspace generating a single manifest file that combines the versions of all dependencies.
+
+Furthermore, workspaces can be "nested", meaning a project defining a workspace can also be part of another workspace. In this scenario, a single manifest file is still utilized, stored alongside the "root project" (the project that doesn't have another workspace including it). An example file structure could look like this:
+
+```
+Project.toml # projects = ["MyPackage"]
+Manifest.toml
+MyPackage/
+    Project.toml # projects = ["test"]
+    test/
+        Project.toml
+```
+
 ### [Package/Environment Preferences](@id preferences)
 
 Preferences are dictionaries of metadata that influence package behavior within an environment.
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index 448964bf1ef8a..d1ed576c42a4f 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -42,8 +42,8 @@ See also [Scripting](@ref man-scripting) for more information on writing Julia s
 ## The `Main.main` entry point
 
 As of Julia, 1.11, `Base` exports the macro `@main`. This macro expands to the symbol `main`,
-but at the conclusion of executing a script or expression, `julia` will attempt to execute the function
-`Main.main(ARGS)` if such a function has been defined and this behavior was opted into
+but at the conclusion of executing a script or expression, `julia` will attempt to execute
+`Main.main(Base.ARGS)` if such a function `Main.main` has been defined and this behavior was opted into
 by using the `@main` macro.
 
 This feature is intended to aid in the unification
@@ -59,7 +59,7 @@ expression.
 To see this feature in action, consider the following definition, which will execute the print function despite there being no explicit call to `main`:
 
 ```
-$ julia -e '(@main)(ARGS) = println("Hello World!")'
+$ julia -e '(@main)(args) = println("Hello World!")'
 Hello World!
 $
 ```
@@ -70,19 +70,19 @@ the macro `@main` was used within the defining module.
 For example, using `hello` instead of `main` will not result in the `hello` function executing:
 
 ```
-$ julia -e 'hello(ARGS) = println("Hello World!")'
+$ julia -e 'hello(args) = println("Hello World!")'
 $
 ```
 
 and neither will a plain definition of `main`:
 ```
-$ julia -e 'main(ARGS) = println("Hello World!")'
+$ julia -e 'main(args) = println("Hello World!")'
 $
 ```
 
 However, the opt-in need not occur at definition time:
 ```
-$ julia -e 'main(ARGS) = println("Hello World!"); @main'
+$ julia -e 'main(args) = println("Hello World!"); @main'
 Hello World!
 $
 ```
@@ -93,7 +93,7 @@ The `main` binding may be imported from a package. A *hello world* package defin
 module Hello
 
 export main
-(@main)(ARGS) = println("Hello from the package!")
+(@main)(args) = println("Hello from the package!")
 
 end
 ```
@@ -164,44 +164,47 @@ The following is a complete list of command-line switches available when launchi
 |Switch                                 |Description|
 |:---                                   |:---|
 |`-v`, `--version`                      |Display version information|
-|`-h`, `--help`                         |Print command-line options (this message).|
-|`--help-hidden`                        |Uncommon options not shown by `-h`|
-|`--project[={<dir>\|@.}]`              |Set `<dir>` as the active project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
+|`-h`, `--help`                         |Print command-line options (this message)|
+|`--help-hidden`                        |Print uncommon options not shown by `-h`|
+|`--project[={<dir>\|@temp\|@.}]`       |Set `<dir>` as the active project/environment. Or, create a temporary environment with `@temp`. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
 |`-J`, `--sysimage <file>`              |Start up with the given system image file|
 |`-H`, `--home <dir>`                   |Set location of `julia` executable|
 |`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is unset, load `~/.julia/config/startup.jl`|
 |`--handle-signals={yes*\|no}`          |Enable or disable Julia's default signal handlers|
 |`--sysimage-native-code={yes*\|no}`    |Use native code from system image if available|
-|`--compiled-modules={yes*\|no\|existing|strict}` |Enable or disable incremental precompilation of modules. The `existing` option allows use of existing compiled modules that were previously precompiled, but disallows creation of new precompile files. The `strict` option is similar, but will error if no precompile file is found. |
-|`--pkgimages={yes*\|no|existing}`               |Enable or disable usage of native code caching in the form of pkgimages. The `existing` option allows use of existing pkgimages but disallows creation of new ones|
+|`--compiled-modules={yes*\|no\|existing\|strict}` |Enable or disable incremental precompilation of modules. The `existing` option allows use of existing compiled modules that were previously precompiled, but disallows creation of new precompile files. The `strict` option is similar, but will error if no precompile file is found. |
+|`--pkgimages={yes*\|no\|existing}`     |Enable or disable usage of native code caching in the form of pkgimages. The `existing` option allows use of existing pkgimages but disallows creation of new ones|
 |`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
 |`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
+|`-m`, `--module <Package> [args]`      |Run entry point of `Package` (`@main` function) with `args'|
 |`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
-|`-t`, `--threads {N\|auto}`            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
-| `--gcthreads=N[,M]`                   |Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC. N is set to half of the number of compute threads and M is set to 0 if unspecified.|
+|`-t`, `--threads {auto\|N[,auto\|M]}`  |Enable N[+M] threads; N threads are assigned to the `default` threadpool, and if M is specified, M threads are assigned to the `interactive` threadpool; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future. Currently sets N to the number of CPUs assigned to this Julia process based on the OS-specific affinity assignment interface if supported (Linux and Windows) or to the number of CPU threads if not supported (MacOS) or if process affinity is not configured, and sets M to 1.|
+| `--gcthreads=N[,M]`                   |Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC. N is set to the number of compute threads and M is set to 0 if unspecified.|
 |`-p`, `--procs {N\|auto}`              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
 |`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
-|`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
+|`-i`, `--interactive`                  |Interactive mode; REPL runs and `isinteractive()` is true|
 |`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
-|`--banner={yes\|no\|auto*}`            |Enable or disable startup banner|
+|`--banner={yes\|no\|short\|auto*}`     |Enable or disable startup banner|
 |`--color={yes\|no\|auto*}`             |Enable or disable color text|
 |`--history-file={yes*\|no}`            |Load or save history|
 |`--depwarn={yes\|no*\|error}`          |Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)|
 |`--warn-overwrite={yes\|no*}`          |Enable or disable method overwrite warnings|
 |`--warn-scope={yes*\|no}`              |Enable or disable warning for ambiguous top-level scope|
 |`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
-|`-O`, `--optimize={0,1,2*,3}`          |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
-|`--min-optlevel={0*,1,2,3}`            |Set the lower bound on per-module optimization|
-|`-g`, `--debug-info={0,1*,2}`          |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
+|`-O`, `--optimize={0\|1\|2*\|3}`       |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
+|`--min-optlevel={0*\|1\|2\|3}`         |Set the lower bound on per-module optimization|
+|`-g`, `--debug-info={0\|1*\|2}`        |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
 |`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
 |`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations ($)|
-|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
+|`--math-mode={ieee\|user*}`            |Always follow `ieee` floating point semantics or respect `@fastmath` declarations|
+|`--polly={yes*\|no}`                   |Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)|
 |`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)|
 |`--code-coverage=@<path>`              |Count executions but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--code-coverage=tracefile.info`       |Append coverage information to the LCOV tracefile (filename supports format tokens).|
 |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
 |`--track-allocation=@<path>`           |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--bug-report=KIND`                    |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
+|`--heap-size-hint=<size>`              |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %.|
 |`--compile={yes*\|no\|all\|min}`       |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
 |`--output-o <name>`                    |Generate an object file (including system image data)|
 |`--output-ji <name>`                   |Generate a system image data file (.ji)|
@@ -211,9 +214,10 @@ The following is a complete list of command-line switches available when launchi
 |`--output-bc <name>`                   |Generate LLVM bitcode (.bc)|
 |`--output-asm <name>`                  |Generate an assembly file (.s)|
 |`--output-incremental={yes\|no*}`      |Generate an incremental output file (rather than complete)|
-|`--trace-compile={stderr,name}`        |Print precompile statements for methods compiled during execution or save to a path|
+|`--trace-compile={stderr\|name}`       |Print precompile statements for methods compiled during execution or save to a path|
+|`--trace-compile-timing`               |If --trace-compile is enabled show how long each took to compile in ms|
 |`--image-codegen`                      |Force generate code in imaging mode|
-|`--heap-size-hint=<size>`              |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %.|
+|`--permalloc-pkgimg={yes\|no*}`        |Copy the data section of package images into memory|
 
 
 !!! compat "Julia 1.1"
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index 4ab611f0cafae..ed6f26725f87c 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -248,7 +248,7 @@ no
 ## Short-Circuit Evaluation
 
 The `&&` and `||` operators in Julia correspond to logical “and” and “or” operations, respectively,
-and are typically used for this purpose.  However, they have an additional property of *short-circuit*
+and are typically used for this purpose. However, they have an additional property of *short-circuit*
 evaluation: they don't necessarily evaluate their second argument, as explained below.  (There
 are also bitwise `&` and `|` operators that can be used as logical “and” and “or” *without*
 short-circuit behavior, but beware that `&` and `|` have higher precedence than `&&` and `||` for evaluation order.)
@@ -601,6 +601,7 @@ below all interrupt the normal flow of control.
 | [`DomainError`](@ref)         |
 | [`EOFError`](@ref)            |
 | [`ErrorException`](@ref)      |
+| [`FieldError`](@ref)          |
 | [`InexactError`](@ref)        |
 | [`InitError`](@ref)           |
 | [`InterruptException`](@ref)  |
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index 020181dd0a08c..f60dfb7004ada 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -123,7 +123,7 @@ An important thing to remember is that, once fetched, a [`Future`](@ref Distribu
 locally. Further [`fetch`](@ref) calls do not entail a network hop. Once all referencing [`Future`](@ref Distributed.Future)s
 have fetched, the remote stored value is deleted.
 
-[`@async`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
+[`Threads.@spawn`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
 use it to create a "feeder" task for each process. Each task picks the next index that needs to
 be computed, then waits for its process to finish, then repeats until we run out of indices. Note
 that the feeder tasks do not begin to execute until the main task reaches the end of the [`@sync`](@ref)
@@ -186,7 +186,7 @@ end
 ```
 
 In order to refer to `MyType` across all processes, `DummyModule.jl` needs to be loaded on
-every process.  Calling `include("DummyModule.jl")` loads it only on a single process.  To
+every process. Calling `include("DummyModule.jl")` loads it only on a single process. To
 load it on every process, use the [`@everywhere`](@ref) macro (starting Julia with `julia -p
 2`):
 
@@ -198,7 +198,7 @@ loaded
 ```
 
 As usual, this does not bring `DummyModule` into scope on any of the process, which requires
-[`using`](@ref) or [`import`](@ref).  Moreover, when `DummyModule` is brought into scope on one process, it
+[`using`](@ref) or [`import`](@ref). Moreover, when `DummyModule` is brought into scope on one process, it
 is not on any other:
 
 ```julia-repl
@@ -657,7 +657,7 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for p in workers() # start tasks on the workers to process requests in parallel
            remote_do(do_work, p, jobs, results)
@@ -896,7 +896,7 @@ conflicts. For example:
 ```julia
 @sync begin
     for p in procs(S)
-        @async begin
+        Threads.@spawn begin
             remotecall_wait(fill!, p, S, p)
         end
     end
@@ -978,7 +978,7 @@ and one that delegates in chunks:
 julia> function advection_shared!(q, u)
            @sync begin
                for p in procs(q)
-                   @async remotecall_wait(advection_shared_chunk!, p, q, u)
+                   Threads.@spawn remotecall_wait(advection_shared_chunk!, p, q, u)
                end
            end
            q
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 47b8b84dda1b6..d41249abe6af8 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -511,6 +511,20 @@ end
 Adds docstring `"..."` to the `Module` `M`. Adding the docstring above the `Module` is the preferred
 syntax, however both are equivalent.
 
+The module docstring is evaluated *inside* the scope of the module, allowing
+access to all the symbols defined in and imported into the module:
+
+```julia
+"The magic number is $(MAGIC)."
+module DocStringEval
+const MAGIC = 42
+end
+```
+
+Documenting a `baremodule` by placing a docstring above the expression automatically imports
+`@doc` into the module. These imports must be done manually when the module expression is not
+documented:
+
 ```julia
 "..."
 baremodule M
@@ -527,10 +541,6 @@ f(x) = x
 end
 ```
 
-Documenting a `baremodule` by placing a docstring above the expression automatically imports
-`@doc` into the module. These imports must be done manually when the module expression is not
-documented.
-
 ### Global Variables
 
 ```julia
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 9df9a6c198003..f578e10764101 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -412,7 +412,7 @@ per pointer using
 ```c
 jl_module_t *mod = jl_main_module;
 jl_sym_t *var = jl_symbol("var");
-jl_binding_t *bp = jl_get_binding_wr(mod, var);
+jl_binding_t *bp = jl_get_binding_wr(mod, var, 1);
 jl_checked_assignment(bp, mod, var, val);
 ```
 
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index e1e450998f016..1fb11018a22e7 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -234,7 +234,7 @@ Specifies the preferred registry flavor. Currently supported values are `conserv
 (the default), which will only publish resources that have been processed by the storage
 server (and thereby have a higher probability of being available from the PkgServers),
 whereas `eager` will publish registries whose resources have not necessarily been
-processed by the storage servers.  Users behind restrictive firewalls that do not allow
+processed by the storage servers. Users behind restrictive firewalls that do not allow
 downloading from arbitrary servers should not use the `eager` flavor.
 
 !!! compat "Julia 1.7"
@@ -267,6 +267,14 @@ versions of packages already installed as possible.
 !!! compat "Julia 1.9"
     This only affects Julia 1.9 and above.
 
+### [`JULIA_PKG_GC_AUTO`](@id JULIA_PKG_GC_AUTO)
+
+If set to `false`, automatic garbage collection of packages and artifacts will be disabled;
+see [`Pkg.gc`](https://pkgdocs.julialang.org/v1/api/#Pkg.gc) for more details.
+
+!!! compat "Julia 1.12"
+    This environment variable is only supported on Julia 1.12 and above.
+
 ## Network transport
 
 ### [`JULIA_NO_VERIFY_HOSTS`](@id JULIA_NO_VERIFY_HOSTS)
@@ -320,16 +328,25 @@ a master process to establish a connection before dying.
 
 ### [`JULIA_NUM_THREADS`](@id JULIA_NUM_THREADS)
 
-An unsigned 64-bit integer (`uint64_t`) that sets the maximum number of threads
-available to Julia.  If `$JULIA_NUM_THREADS` is not positive or is not set, or
-if the number of CPU threads cannot be determined through system calls, then the
-number of threads is set to `1`.
+An unsigned 64-bit integer (`uint64_t`) or string that sets the maximum number
+of threads available to Julia. If `$JULIA_NUM_THREADS` is not set or is a
+non-positive integer, or if the number of CPU threads cannot be determined
+through system calls, then the number of threads is set to `1`.
 
 If `$JULIA_NUM_THREADS` is set to `auto`, then the number of threads will be set
-to the number of CPU threads.
+to the number of CPU threads. It can also be set to a comma-separated string to
+specify the size of the `:default` and `:interactive` [threadpools](@ref
+man-threadpools), respectively:
+```bash
+# 5 threads in the :default pool and 2 in the :interactive pool
+export JULIA_NUM_THREADS=5,2
+
+# `auto` threads in the :default pool and 1 in the :interactive pool
+export JULIA_NUM_THREADS=auto,1
+```
 
 !!! note
-    `JULIA_NUM_THREADS` must be defined before starting julia; defining it in
+    `JULIA_NUM_THREADS` must be defined before starting Julia; defining it in
     `startup.jl` is too late in the startup process.
 
 !!! compat "Julia 1.5"
@@ -339,6 +356,9 @@ to the number of CPU threads.
 !!! compat "Julia 1.7"
     The `auto` value for `$JULIA_NUM_THREADS` requires Julia 1.7 or above.
 
+!!! compat "Julia 1.9"
+    The `x,y` format for threadpools requires Julia 1.9 or above.
+
 ### [`JULIA_THREAD_SLEEP_THRESHOLD`](@id JULIA_THREAD_SLEEP_THRESHOLD)
 
 If set to a string that starts with the case-insensitive substring `"infinite"`,
@@ -348,8 +368,7 @@ nanoseconds, the amount of time after which spinning threads should sleep.
 
 ### [`JULIA_NUM_GC_THREADS`](@id JULIA_NUM_GC_THREADS)
 
-Sets the number of threads used by Garbage Collection. If unspecified is set to
-half of the number of worker threads.
+Sets the number of threads used by Garbage Collection. If unspecified is set to the number of worker threads.
 
 !!! compat "Julia 1.10"
     The environment variable was added in 1.10
@@ -376,7 +395,7 @@ affinitized. Otherwise, Julia lets the operating system handle thread policy.
 ## REPL formatting
 
 Environment variables that determine how REPL output should be formatted at the
-terminal. Generally, these variables should be set to [ANSI terminal escape
+terminal. The `JULIA_*_COLOR` variables should be set to [ANSI terminal escape
 sequences](https://en.wikipedia.org/wiki/ANSI_escape_code). Julia provides
 a high-level interface with much of the same functionality; see the section on
 [The Julia REPL](@ref).
@@ -406,6 +425,19 @@ should have at the terminal.
 The formatting `Base.answer_color()` (default: normal, `"\033[0m"`) that output
 should have at the terminal.
 
+### [`NO_COLOR`](@id NO_COLOR)
+
+When this variable is present and not an empty string (regardless of its value) then colored
+text will be disabled on the REPL. Can be overridden with the flag `--color=yes` or with the
+environment variable [`FORCE_COLOR`](@ref FORCE_COLOR). This environment variable is
+[commonly recognized by command-line applications](https://no-color.org/).
+
+### [`FORCE_COLOR`](@id FORCE_COLOR)
+
+When this variable is present and not an empty string (regardless of its value) then
+colored text will be enabled on the REPL. Can be overridden with the flag `--color=no`. This
+environment variable is [commonly recognized by command-line applications](https://force-color.org/).
+
 ## System and Package Image Building
 
 ### [`JULIA_CPU_TARGET`](@id JULIA_CPU_TARGET)
@@ -470,35 +502,6 @@ Allows you to enable or disable zones for a specific Julia run.
 For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable
 the `INFERENCE` zones. See [Dynamically Enabling and Disabling Zones](@ref).
 
-### [`JULIA_GC_ALLOC_POOL`](@id JULIA_GC_ALLOC_POOL)
-### [`JULIA_GC_ALLOC_OTHER`](@id JULIA_GC_ALLOC_OTHER)
-### [`JULIA_GC_ALLOC_PRINT`](@id JULIA_GC_ALLOC_PRINT)
-
-If set, these environment variables take strings that optionally start with the
-character `'r'`, followed by a string interpolation of a colon-separated list of
-three signed 64-bit integers (`int64_t`). This triple of integers `a:b:c`
-represents the arithmetic sequence `a`, `a + b`, `a + 2*b`, ... `c`.
-
-*   If it's the `n`th time that `jl_gc_pool_alloc()` has been called, and `n`
-    belongs to the arithmetic sequence represented by `$JULIA_GC_ALLOC_POOL`,
-    then garbage collection is forced.
-*   If it's the `n`th time that `maybe_collect()` has been called, and `n` belongs
-    to the arithmetic sequence represented by `$JULIA_GC_ALLOC_OTHER`, then garbage
-    collection is forced.
-*   If it's the `n`th time that `jl_gc_collect()` has been called, and `n` belongs
-    to the arithmetic sequence represented by `$JULIA_GC_ALLOC_PRINT`, then counts
-    for the number of calls to `jl_gc_pool_alloc()` and `maybe_collect()` are
-    printed.
-
-If the value of the environment variable begins with the character `'r'`, then
-the interval between garbage collection events is randomized.
-
-!!! note
-
-    These environment variables only have an effect if Julia was compiled with
-    garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
-    in the build configuration).
-
 ### [`JULIA_GC_NO_GENERATIONAL`](@id JULIA_GC_NO_GENERATIONAL)
 
 If set to anything besides `0`, then the Julia garbage collector never performs
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index c7f2325410a37..2673ca7532acf 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -8,15 +8,15 @@ No.
 
 ### Why don't you compile Matlab/Python/R/… code to Julia?
 
-Since many people are familiar with the syntax of other dynamic languages, and lots of code has already been written in those languages, it is natural to wonder why we didn't just plug a Matlab or Python front-end into a Julia back-end (or “transpile” code to Julia) in order to get all the performance benefits of Julia without requiring programmers to learn a new language.  Simple, right?
+Since many people are familiar with the syntax of other dynamic languages, and lots of code has already been written in those languages, it is natural to wonder why we didn't just plug a Matlab or Python front-end into a Julia back-end (or “transpile” code to Julia) in order to get all the performance benefits of Julia without requiring programmers to learn a new language. Simple, right?
 
-The basic issue is that there is *nothing special about Julia's compiler*: we use a commonplace compiler (LLVM) with no “secret sauce” that other language developers don't know about.  Indeed, Julia's compiler is in many ways much simpler than those of other dynamic languages (e.g. PyPy or LuaJIT).   Julia's performance advantage derives almost entirely from its front-end: its language semantics allow a [well-written Julia program](@ref man-performance-tips) to *give more opportunities to the compiler* to generate efficient code and memory layouts.  If you tried to compile Matlab or Python code to Julia, our compiler would be limited by the semantics of Matlab or Python to producing code no better than that of existing compilers for those languages (and probably worse).  The key role of semantics is also why several existing Python compilers (like Numba and Pythran) only attempt to optimize a small subset of the language (e.g. operations on Numpy arrays and scalars), and for this subset they are already doing at least as well as we could for the same semantics.  The people working on those projects are incredibly smart and have accomplished amazing things, but retrofitting a compiler onto a language that was designed to be interpreted is a very difficult problem.
+The basic issue is that there is *nothing special about Julia's compiler*: we use a commonplace compiler (LLVM) with no “secret sauce” that other language developers don't know about. Indeed, Julia's compiler is in many ways much simpler than those of other dynamic languages (e.g. PyPy or LuaJIT). Julia's performance advantage derives almost entirely from its front-end: its language semantics allow a [well-written Julia program](@ref man-performance-tips) to *give more opportunities to the compiler* to generate efficient code and memory layouts. If you tried to compile Matlab or Python code to Julia, our compiler would be limited by the semantics of Matlab or Python to producing code no better than that of existing compilers for those languages (and probably worse). The key role of semantics is also why several existing Python compilers (like Numba and Pythran) only attempt to optimize a small subset of the language (e.g. operations on Numpy arrays and scalars), and for this subset they are already doing at least as well as we could for the same semantics. The people working on those projects are incredibly smart and have accomplished amazing things, but retrofitting a compiler onto a language that was designed to be interpreted is a very difficult problem.
 
-Julia's advantage is that good performance is not limited to a small subset of “built-in” types and operations, and one can write high-level type-generic code that works on arbitrary user-defined types while remaining fast and memory-efficient.  Types in languages like Python simply don't provide enough information to the compiler for similar capabilities, so as soon as you used those languages as a Julia front-end you would be stuck.
+Julia's advantage is that good performance is not limited to a small subset of “built-in” types and operations, and one can write high-level type-generic code that works on arbitrary user-defined types while remaining fast and memory-efficient. Types in languages like Python simply don't provide enough information to the compiler for similar capabilities, so as soon as you used those languages as a Julia front-end you would be stuck.
 
 For similar reasons, automated translation to Julia would also typically generate unreadable, slow, non-idiomatic code that would not be a good starting point for a native Julia port from another language.
 
-On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities.  We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
+On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities. We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
 
 ## [Public API](@id man-api)
 
@@ -47,12 +47,12 @@ will not be removed or receive meaningful breaking changes before Julia 2.0.
 
 ### There is a useful undocumented function/type/constant. Can I use it?
 
-Updating Julia may break your code if you use non-public API.  If the code is
-self-contained, it may be a good idea to copy it into your project.  If you want to rely on
+Updating Julia may break your code if you use non-public API. If the code is
+self-contained, it may be a good idea to copy it into your project. If you want to rely on
 a complex non-public API, especially when using it from a stable package, it is a good idea
 to open an [issue](https://github.com/JuliaLang/julia/issues) or
 [pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it
-into a public API.  However, we do not discourage the attempt to create packages that expose
+into a public API. However, we do not discourage the attempt to create packages that expose
 stable public interfaces while relying on non-public implementation details of Julia and
 buffering the differences across different Julia versions.
 
@@ -71,12 +71,12 @@ session (technically, in module `Main`), it is always present.
 
 If memory usage is your concern, you can always replace objects with ones that consume less memory.
  For example, if `A` is a gigabyte-sized array that you no longer need, you can free the memory
-with `A = nothing`.  The memory will be released the next time the garbage collector runs; you can force
+with `A = nothing`. The memory will be released the next time the garbage collector runs; you can force
 this to happen with [`GC.gc()`](@ref Base.GC.gc). Moreover, an attempt to use `A` will likely result in an error, because most methods are not defined on type `Nothing`.
 
 ### How can I modify the declaration of a type in my session?
 
-Perhaps you've defined a type and then realize you need to add a new field.  If you try this at
+Perhaps you've defined a type and then realize you need to add a new field. If you try this at
 the REPL, you get the error:
 
 ```
@@ -87,8 +87,8 @@ Types in module `Main` cannot be redefined.
 
 While this can be inconvenient when you are developing new code, there's an excellent workaround.
  Modules can be replaced by redefining them, and so if you wrap all your new code inside a module
-you can redefine types and constants.  You can't import the type names into `Main` and then expect
-to be able to redefine them there, but you can use the module name to resolve the scope.  In other
+you can redefine types and constants. You can't import the type names into `Main` and then expect
+to be able to redefine them there, but you can use the module name to resolve the scope. In other
 words, while developing you might use a workflow something like this:
 
 ```julia
@@ -118,7 +118,7 @@ If one needs functionality both available as a library and a script, it is bette
 
 Running a Julia script using `julia file.jl` does not throw
 [`InterruptException`](@ref) when you try to terminate it with CTRL-C
-(SIGINT).  To run a certain code before terminating a Julia script,
+(SIGINT). To run a certain code before terminating a Julia script,
 which may or may not be caused by CTRL-C, use [`atexit`](@ref).
 Alternatively, you can use `julia -e 'include(popfirst!(ARGS))'
 file.jl` to execute a script while being able to catch
@@ -151,7 +151,7 @@ invoking an [operating-system shell](https://en.wikipedia.org/wiki/Shell_(comput
 That means that `run` does not perform wildcard expansion of `*` (["globbing"](https://en.wikipedia.org/wiki/Glob_(programming))),
 nor does it interpret [shell pipelines](https://en.wikipedia.org/wiki/Pipeline_(Unix)) like `|` or `>`.
 
-You can still do globbing and pipelines using Julia features, however.  For example, the built-in
+You can still do globbing and pipelines using Julia features, however. For example, the built-in
 [`pipeline`](@ref) function allows you to chain external programs and files, similar to shell pipes, and
 the [Glob.jl package](https://github.com/vtjnash/Glob.jl) implements POSIX-compatible globbing.
 
@@ -175,7 +175,7 @@ end
 ```
 and notice that it works fine in an interactive environment (like the Julia REPL),
 but gives ```UndefVarError: `x` not defined``` when you try to run it in script or other
-file.   What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
+file. What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
 
 Here, `x` is a global variable, `while` defines a [local scope](@ref scope-of-variables), and `x += 1` is
 an assignment to a global in that local scope.
@@ -184,7 +184,7 @@ As mentioned above, Julia (version 1.5 or later) allows you to omit the `global`
 keyword for code in the REPL (and many other interactive environments), to simplify
 exploration (e.g. copy-pasting code from a function to run interactively).
 However, once you move to code in files, Julia requires a more disciplined approach
-to global variables.  You have least three options:
+to global variables. You have least three options:
 
 1. Put the code into a function (so that `x` is a *local* variable in a function). In general, it is good software engineering to use functions rather than global scripts (search online for "why global variables bad" to see many explanations). In Julia, global variables are also [slow](@ref man-performance-tips).
 2. Wrap the code in a [`let`](@ref) block.  (This makes `x` a local variable within the `let ... end` statement, again eliminating the need for `global`).
@@ -252,7 +252,7 @@ the variables `A` and `x` were distinct bindings referring to the same mutable `
 
 ### Can I use `using` or `import` inside a function?
 
-No, you are not allowed to have a `using` or `import` statement inside a function.  If you want
+No, you are not allowed to have a `using` or `import` statement inside a function. If you want
 to import a module but only use its symbols inside a specific function or set of functions, you
 have two options:
 
@@ -266,7 +266,7 @@ have two options:
    ```
 
    This loads the module `Foo` and defines a variable `Foo` that refers to the module, but does not
-   import any of the other symbols from the module into the current namespace.  You refer to the
+   import any of the other symbols from the module into the current namespace. You refer to the
    `Foo` symbols by their qualified names `Foo.bar` etc.
 2. Wrap your function in a module:
 
@@ -395,7 +395,7 @@ julia> twothreearr()
 
 ### [What does "type-stable" mean?](@id man-type-stability)
 
-It means that the type of the output is predictable from the types of the inputs.  In particular,
+It means that the type of the output is predictable from the types of the inputs. In particular,
 it means that the type of the output cannot vary depending on the *values* of the inputs. The
 following code is *not* type-stable:
 
@@ -427,9 +427,9 @@ Stacktrace:
 [...]
 ```
 
-This behavior is an inconvenient consequence of the requirement for type-stability.  In the case
+This behavior is an inconvenient consequence of the requirement for type-stability. In the case
 of [`sqrt`](@ref), most users want `sqrt(2.0)` to give a real number, and would be unhappy if
-it produced the complex number `1.4142135623730951 + 0.0im`.  One could write the [`sqrt`](@ref)
+it produced the complex number `1.4142135623730951 + 0.0im`. One could write the [`sqrt`](@ref)
 function to switch to a complex-valued output only when passed a negative number (which is what
 [`sqrt`](@ref) does in some other languages), but then the result would not be [type-stable](@ref man-type-stability)
 and the [`sqrt`](@ref) function would have poor performance.
@@ -447,14 +447,14 @@ julia> sqrt(-2.0+0im)
 The parameters of a [parametric type](@ref Parametric-Types) can hold either
 types or bits values, and the type itself chooses how it makes use of these parameters.
 For example, `Array{Float64, 2}` is parameterized by the type `Float64` to express its
-element type and the integer value `2` to express its number of dimensions.  When
+element type and the integer value `2` to express its number of dimensions. When
 defining your own parametric type, you can use subtype constraints to declare that a
 certain parameter must be a subtype ([`<:`](@ref)) of some abstract type or a previous
-type parameter.  There is not, however, a dedicated syntax to declare that a parameter
+type parameter. There is not, however, a dedicated syntax to declare that a parameter
 must be a _value_ of a given type — that is, you cannot directly declare that a
 dimensionality-like parameter [`isa`](@ref) `Int` within the `struct` definition, for
-example.  Similarly, you cannot do computations (including simple things like addition
-or subtraction) on type parameters.  Instead, these sorts of constraints and
+example. Similarly, you cannot do computations (including simple things like addition
+or subtraction) on type parameters. Instead, these sorts of constraints and
 relationships may be expressed through additional type parameters that are computed
 and enforced within the type's [constructors](@ref man-constructors).
 
@@ -900,7 +900,7 @@ array to store the result. If you prefer to mutate `x`, use `x .+= y` to update
 individually.
 
 While this behavior might surprise some, the choice is deliberate. The main reason is the presence
-of immutable objects within Julia, which cannot change their value once created.  Indeed, a
+of immutable objects within Julia, which cannot change their value once created. Indeed, a
 number is an immutable object; the statements `x = 5; x += 1` do not modify the meaning of `5`,
 they modify the value bound to `x`. For an immutable, the only way to change the value is to reassign
 it.
@@ -943,7 +943,7 @@ Consider the printed output from the following:
 
 ```jldoctest
 julia> @sync for i in 1:3
-           @async write(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn write(stdout, string(i), " Foo ", " Bar ")
        end
 123 Foo  Foo  Foo  Bar  Bar  Bar
 ```
@@ -956,7 +956,7 @@ in the above example results in:
 
 ```jldoctest
 julia> @sync for i in 1:3
-           @async println(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn println(stdout, string(i), " Foo ", " Bar ")
        end
 1 Foo  Bar
 2 Foo  Bar
@@ -969,7 +969,7 @@ You can lock your writes with a `ReentrantLock` like this:
 julia> l = ReentrantLock();
 
 julia> @sync for i in 1:3
-           @async begin
+           Threads.@spawn begin
                lock(l)
                try
                    write(stdout, string(i), " Foo ", " Bar ")
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index 58d093d200bc9..be81fe529ef7d 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -73,9 +73,9 @@ function f(x, y)
 end
 ```
 The statement `x[1] = 42` *mutates* the object `x`, and hence this change *will* be visible in the array passed
-by the caller for this argument.   On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
+by the caller for this argument. On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
 `y` to refer to a new value `7 + y`, rather than mutating the *original* object referred to by `y`,
-and hence does *not* change the corresponding argument passed by the caller.   This can be seen if we call `f(x, y)`:
+and hence does *not* change the corresponding argument passed by the caller. This can be seen if we call `f(x, y)`:
 ```julia-repl
 julia> a = [4, 5, 6]
 3-element Vector{Int64}:
@@ -115,13 +115,13 @@ fib(n::Integer) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)
 ```
 and the `::Integer` specification means that it will only be callable when `n` is a subtype of the [abstract](@ref man-abstract-types) `Integer` type.
 
-Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller.   For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
+Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller. For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
 
-* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments.  For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
-* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types.  For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
+* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments. For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
+* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types. For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
 * **Clarity:** Type declarations can serve as a form of documentation about the expected arguments.
 
-However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate.    For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)).  If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason.   In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**.  You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
+However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate. For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)). If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason. In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**. You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
 
 ## The `return` Keyword
 
@@ -174,7 +174,7 @@ julia> function hypot(x, y)
                return x*sqrt(1 + r*r)
            end
            if y == 0
-               return zero(x)
+               return x
            end
            r = x/y
            return y*sqrt(1 + r*r)
@@ -208,7 +208,7 @@ See [Type Declarations](@ref) for more on return types.
 
 Return type declarations are **rarely used** in Julia: in general, you should
 instead write "type-stable" functions in which Julia's compiler can automatically
-infer the return type.  For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
+infer the return type. For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
 
 ### Returning nothing
 
@@ -292,12 +292,12 @@ syntaxes:
 
 ```jldoctest
 julia> x -> x^2 + 2x - 1
-#1 (generic function with 1 method)
+#2 (generic function with 1 method)
 
 julia> function (x)
            x^2 + 2x - 1
        end
-#3 (generic function with 1 method)
+#5 (generic function with 1 method)
 ```
 
 Each statement creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
@@ -1098,7 +1098,7 @@ julia> f.(A, B)
 ```
 
 Keyword arguments are not broadcasted over, but are simply passed through to each call of
-the function.  For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
+the function. For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
 
 Moreover, *nested* `f.(args...)` calls are *fused* into a single `broadcast` loop. For example,
 `sin.(cos.(X))` is equivalent to `broadcast(x -> sin(cos(x)), X)`, similar to `[sin(cos(x)) for x in X]`:
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index 36d54650388cd..2c69aabbda192 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -56,4 +56,4 @@ search: begin disable_sigint reenable_sigint
   begin...end denotes a block of code.
 ```
 
-If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips).
+If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips), or check out the comprehensive [ModernJuliaWorkflows](https://modernjuliaworkflows.org/) blog.
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 4c31871374aa2..0ee7850c92087 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -631,7 +631,7 @@ BigInt
 
 The default precision (in number of bits of the significand) and rounding mode of [`BigFloat`](@ref)
 operations can be changed globally by calling [`setprecision`](@ref) and [`setrounding`](@ref),
-and all further calculations will take these changes in account.  Alternatively, the precision
+and all further calculations will take these changes in account. Alternatively, the precision
 or the rounding can be changed only within the execution of a particular block of code by using
 the same functions with a `do` block:
 
@@ -699,7 +699,7 @@ julia> 2(x-1)^2 - 3(x-1) + 1
 !!! note
     The precedence of numeric literal coefficients used for implicit
     multiplication is higher than other binary operators such as multiplication
-    (`*`), and division (`/`, `\`, and `//`).  This means, for example, that
+    (`*`), and division (`/`, `\`, and `//`). This means, for example, that
     `1 / 2im` equals `-0.5im` and `6 // 2(2 + 1)` equals `1 // 1`.
 
 Additionally, parenthesized expressions can be used as coefficients to variables, implying multiplication
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index d158fb86575a2..e752448f14a25 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -91,7 +91,7 @@ julia> sum(Squares(100))
 ```
 
 There are a few more methods we can extend to give Julia more information about this iterable
-collection.  We know that the elements in a `Squares` sequence will always be `Int`. By extending
+collection. We know that the elements in a `Squares` sequence will always be `Int`. By extending
 the [`eltype`](@ref) method, we can give that information to Julia and help it make more specialized
 code in the more complicated methods. We also know the number of elements in our sequence, so
 we can extend [`length`](@ref), too:
@@ -131,7 +131,7 @@ to additionally specialize those extra behaviors when they know a more efficient
 be used in their specific case.
 
 It is also often useful to allow iteration over a collection in *reverse order*
-by iterating over [`Iterators.reverse(iterator)`](@ref).  To actually support
+by iterating over [`Iterators.reverse(iterator)`](@ref). To actually support
 reverse-order iteration, however, an iterator
 type `T` needs to implement `iterate` for `Iterators.Reverse{T}`.
 (Given `r::Iterators.Reverse{T}`, the underling iterator of type `T` is `r.itr`.)
@@ -158,7 +158,7 @@ julia> collect(Iterators.reverse(Squares(4)))
 | `lastindex(X)`           | The last index, used in `X[end]`   |
 
 For the `Squares` iterable above, we can easily compute the `i`th element of the sequence by squaring
-it.  We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
+it. We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
 simply needs to define [`getindex`](@ref):
 
 ```jldoctest squaretype
@@ -233,12 +233,12 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `similar(T::Union{Type,Function}, inds)`   | `T(Base.to_shape(inds))`               | Return an array similar to `T` with the specified indices `inds` (see below)          |
 
 If a type is defined as a subtype of `AbstractArray`, it inherits a very large set of rich behaviors
-including iteration and multidimensional indexing built on top of single-element access.  See
+including iteration and multidimensional indexing built on top of single-element access. See
 the [arrays manual page](@ref man-multi-dim-arrays) and the [Julia Base section](@ref lib-arrays) for more supported methods.
 
 A key part in defining an `AbstractArray` subtype is [`IndexStyle`](@ref). Since indexing is
 such an important part of an array and often occurs in hot loops, it's important to make both
-indexing and indexed assignment as efficient as possible.  Array data structures are typically
+indexing and indexed assignment as efficient as possible. Array data structures are typically
 defined in one of two ways: either it most efficiently accesses its elements using just one index
 (linear indexing) or it intrinsically accesses the elements with indices specified for every dimension.
  These two modalities are identified by Julia as `IndexLinear()` and `IndexCartesian()`.
@@ -246,7 +246,7 @@ defined in one of two ways: either it most efficiently accesses its elements usi
 provides a traits-based mechanism to enable efficient generic code for all array types.
 
 This distinction determines which scalar indexing methods the type must define. `IndexLinear()`
-arrays are simple: just define `getindex(A::ArrayType, i::Int)`.  When the array is subsequently
+arrays are simple: just define `getindex(A::ArrayType, i::Int)`. When the array is subsequently
 indexed with a multidimensional set of indices, the fallback `getindex(A::AbstractArray, I...)`
 efficiently converts the indices into one linear index and then calls the above method. `IndexCartesian()`
 arrays, on the other hand, require methods to be defined for each supported dimensionality with
@@ -407,7 +407,7 @@ perhaps range-types `Ind` of your own design. For more information, see
 
 A strided array is a subtype of `AbstractArray` whose entries are stored in memory with fixed strides.
 Provided the element type of the array is compatible with BLAS, a strided array can utilize BLAS and LAPACK routines
-for more efficient linear algebra routines.  A typical example of a user-defined strided array is one
+for more efficient linear algebra routines. A typical example of a user-defined strided array is one
 that wraps a standard `Array` with additional structure.
 
 Warning: do not implement these methods if the underlying storage is not actually strided, as it
@@ -464,7 +464,7 @@ container for broadcasting, then the following method should be defined:
 ```julia
 Base.broadcastable(o::MyType) = Ref(o)
 ```
-that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container.   For example, such a wrapper
+that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container. For example, such a wrapper
 method is defined for types themselves, functions, special singletons like [`missing`](@ref) and [`nothing`](@ref), and dates.
 
 Custom array-like types can specialize
@@ -526,8 +526,8 @@ similar(bc::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}) where {N,ElType}
 
 However, if needed you can specialize on any or all of these arguments. The final argument
 `bc` is a lazy representation of a (potentially fused) broadcast operation, a `Broadcasted`
-object.  For these purposes, the most important fields of the wrapper are
-`f` and `args`, describing the function and argument list, respectively.  Note that the argument
+object. For these purposes, the most important fields of the wrapper are
+`f` and `args`, describing the function and argument list, respectively. Note that the argument
 list can — and often does — include other nested `Broadcasted` wrappers.
 
 For a complete example, let's say you have created a type, `ArrayAndChar`, that stores an
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index c333aeb56c5d9..d2cef68bd6fff 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -22,7 +22,7 @@ are supported on all primitive numeric types:
 | `x ^ y`    | power          | raises `x` to the `y`th power           |
 | `x % y`    | remainder      | equivalent to `rem(x, y)`               |
 
-A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x + y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
+A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x + y)`, is treated as a multiplication, except with higher precedence than other binary operations. See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
 
 Julia's promotion system makes arithmetic operations on mixtures of argument types "just work"
 naturally and automatically. See [Conversion and Promotion](@ref conversion-and-promotion) for details of the promotion
@@ -174,7 +174,7 @@ to perform `^` element-by-element on arrays. For example,
 `[1, 2, 3] ^ 3` is not defined, since there is no standard
 mathematical meaning to "cubing" a (non-square) array, but
 `[1, 2, 3] .^ 3` is defined as computing the elementwise
-(or "vectorized") result `[1^3, 2^3, 3^3]`.  Similarly for unary
+(or "vectorized") result `[1^3, 2^3, 3^3]`. Similarly for unary
 operators like `!` or `√`, there is a corresponding `.√` that
 applies the operator elementwise.
 
@@ -332,7 +332,7 @@ Mixed-type comparisons between signed integers, unsigned integers, and floats ca
 great deal of care has been taken to ensure that Julia does them correctly.
 
 For other types, `isequal` defaults to calling [`==`](@ref), so if you want to define
-equality for your own types then you only need to add a [`==`](@ref) method.  If you define
+equality for your own types then you only need to add a [`==`](@ref) method. If you define
 your own equality function, you should probably define a corresponding [`hash`](@ref) method
 to ensure that `isequal(x,y)` implies `hash(x) == hash(y)`.
 
@@ -551,21 +551,22 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 
 ### Powers, logs and roots
 
-| Function                 | Description                                                                |
-|:------------------------ |:-------------------------------------------------------------------------- |
-| [`sqrt(x)`](@ref), `√x`  | square root of `x`                                                         |
-| [`cbrt(x)`](@ref), `∛x`  | cube root of `x`                                                           |
-| [`hypot(x, y)`](@ref)    | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
-| [`exp(x)`](@ref)         | natural exponential function at `x`                                        |
-| [`expm1(x)`](@ref)       | accurate `exp(x) - 1` for `x` near zero                                    |
-| [`ldexp(x, n)`](@ref)    | `x * 2^n` computed efficiently for integer values of `n`                   |
-| [`log(x)`](@ref)         | natural logarithm of `x`                                                   |
-| [`log(b, x)`](@ref)      | base `b` logarithm of `x`                                                  |
-| [`log2(x)`](@ref)        | base 2 logarithm of `x`                                                    |
-| [`log10(x)`](@ref)       | base 10 logarithm of `x`                                                   |
-| [`log1p(x)`](@ref)       | accurate `log(1 + x)` for `x` near zero                                    |
-| [`exponent(x)`](@ref)    | binary exponent of `x`                                                     |
-| [`significand(x)`](@ref) | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
+| Function                      | Description                                                                |
+|:----------------------------- |:-------------------------------------------------------------------------- |
+| [`sqrt(x)`](@ref), `√x`       | square root of `x`                                                         |
+| [`cbrt(x)`](@ref), `∛x`       | cube root of `x`                                                           |
+| [`fourthroot(x)`](@ref), `∜x` | fourth root of `x`                                                         |
+| [`hypot(x, y)`](@ref)         | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
+| [`exp(x)`](@ref)              | natural exponential function at `x`                                        |
+| [`expm1(x)`](@ref)            | accurate `exp(x) - 1` for `x` near zero                                    |
+| [`ldexp(x, n)`](@ref)         | `x * 2^n` computed efficiently for integer values of `n`                   |
+| [`log(x)`](@ref)              | natural logarithm of `x`                                                   |
+| [`log(b, x)`](@ref)           | base `b` logarithm of `x`                                                  |
+| [`log2(x)`](@ref)             | base 2 logarithm of `x`                                                    |
+| [`log10(x)`](@ref)            | base 10 logarithm of `x`                                                   |
+| [`log1p(x)`](@ref)            | accurate `log(1 + x)` for `x` near zero                                    |
+| [`exponent(x)`](@ref)         | binary exponent of `x`                                                     |
+| [`significand(x)`](@ref)      | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
 
 For an overview of why functions like [`hypot`](@ref), [`expm1`](@ref), and [`log1p`](@ref)
 are necessary and useful, see John D. Cook's excellent pair of blog posts on the subject: [expm1, log1p, erfc](https://www.johndcook.com/blog/2010/06/07/math-library-functions-that-seem-unnecessary/),
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index 810f81f3e9c8f..6be44dcf4fa13 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -614,7 +614,7 @@ Start some other operations that use `f(x)`:
 julia> g(x) = f(x)
 g (generic function with 1 method)
 
-julia> t = @async f(wait()); yield();
+julia> t = Threads.@spawn f(wait()); yield();
 ```
 
 Now we add some new methods to `f(x)`:
@@ -639,7 +639,7 @@ julia> g(1)
 julia> fetch(schedule(t, 1))
 "original definition"
 
-julia> t = @async f(wait()); yield();
+julia> t = Threads.@spawn f(wait()); yield();
 
 julia> fetch(schedule(t, 1))
 "definition for Int"
@@ -664,7 +664,7 @@ abstract type AbstractArray{T, N} end
 eltype(::Type{<:AbstractArray{T}}) where {T} = T
 ```
 
-using so-called triangular dispatch.  Note that `UnionAll` types, for
+using so-called triangular dispatch. Note that `UnionAll` types, for
 example `eltype(AbstractArray{T} where T <: Integer)`, do not match the
 above method. The implementation of `eltype` in `Base` adds a fallback
 method to `Any` for such cases.
@@ -776,7 +776,7 @@ often referred to as a
 
 This pattern is implemented by defining a generic function which
 computes a different singleton value (or type) for each trait-set to which the
-function arguments may belong to.  If this function is pure there is
+function arguments may belong to. If this function is pure there is
 no impact on performance compared to normal dispatch.
 
 The example in the previous section glossed over the implementation details of
@@ -891,8 +891,8 @@ matmul(a, b) = matmul(promote(a, b)...)
 ## Parametrically-constrained Varargs methods
 
 Function parameters can also be used to constrain the number of arguments that may be supplied
-to a "varargs" function ([Varargs Functions](@ref)).  The notation `Vararg{T,N}` is used to indicate
-such a constraint.  For example:
+to a "varargs" function ([Varargs Functions](@ref)). The notation `Vararg{T,N}` is used to indicate
+such a constraint. For example:
 
 ```jldoctest
 julia> bar(a,b,x::Vararg{Any,2}) = (a,b,x)
@@ -1025,7 +1025,7 @@ function emptyfunc end
 ## [Method design and the avoidance of ambiguities](@id man-method-design-ambiguities)
 
 Julia's method polymorphism is one of its most powerful features, yet
-exploiting this power can pose design challenges.  In particular, in
+exploiting this power can pose design challenges. In particular, in
 more complex method hierarchies it is not uncommon for
 [ambiguities](@ref man-ambiguities) to arise.
 
@@ -1168,7 +1168,7 @@ sure this method is implemented with generic calls (like `similar` and
 When this approach is not possible, it may be worth starting a
 discussion with other developers about resolving the ambiguity; just
 because one method was defined first does not necessarily mean that it
-can't be modified or eliminated.  As a last resort, one developer can
+can't be modified or eliminated. As a last resort, one developer can
 define the "band-aid" method
 
 ```julia
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index 64befb03c1ad5..6b335305ac569 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -108,7 +108,11 @@ To mark a name as public without exporting it into the namespace of folks who ca
 one can use `public` instead of `export`. This marks the public name(s) as part of the public API,
 but does not have any namespace implications. The `public` keyword is only available in Julia 1.11
 and above. To maintain compatibility with Julia 1.10 and below, use the `@compat` macro from the
-[Compat](https://github.com/JuliaLang/Compat.jl) package.
+[Compat](https://github.com/JuliaLang/Compat.jl) package, or the version-aware construct
+
+```julia
+VERSION >= v"1.11.0-DEV.469" && eval(Meta.parse("public a, b, c"))
+```
 
 ### Standalone `using` and `import`
 
@@ -434,7 +438,7 @@ Large modules can take several seconds to load because executing all of the stat
 often involves compiling a large amount of code.
 Julia creates precompiled caches of the module to reduce this time.
 
-Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module.  If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
+Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module. If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
 cache files will be stored in the `compiled` subfolder of `DEPOT_PATH[1]`. If nothing about your system changes,
 such cache files will be used when you load the module with `import` or `using`.
 
@@ -486,12 +490,12 @@ or other imported modules have their `__init__` functions called *before* the `_
 enclosing module.
 
 Two typical uses of `__init__` are calling runtime initialization functions of external C libraries
-and initializing global constants that involve pointers returned by external libraries.  For example,
+and initializing global constants that involve pointers returned by external libraries. For example,
 suppose that we are calling a C library `libfoo` that requires us to call a `foo_init()` initialization
 function at runtime. Suppose that we also want to define a global constant `foo_data_ptr` that
 holds the return value of a `void *foo_data()` function defined by `libfoo` -- this constant must
 be initialized at runtime (not at compile time) because the pointer address will change from run
-to run.  You could accomplish this by defining the following `__init__` function in your module:
+to run. You could accomplish this by defining the following `__init__` function in your module:
 
 ```julia
 const foo_data_ptr = Ref{Ptr{Cvoid}}(0)
@@ -516,9 +520,9 @@ null pointers unless they are hidden inside an [`isbits`](@ref) object). This in
 of the Julia functions [`@cfunction`](@ref) and [`pointer`](@ref).
 
 Dictionary and set types, or in general anything that depends on the output of a `hash(key)` method,
-are a trickier case.  In the common case where the keys are numbers, strings, symbols, ranges,
+are a trickier case. In the common case where the keys are numbers, strings, symbols, ranges,
 `Expr`, or compositions of these types (via arrays, tuples, sets, pairs, etc.) they are safe to
-precompile.  However, for a few other key types, such as `Function` or `DataType` and generic
+precompile. However, for a few other key types, such as `Function` or `DataType` and generic
 user-defined types where you haven't defined a `hash` method, the fallback `hash` method depends
 on the memory address of the object (via its `objectid`) and hence may change from run to run.
 If you have one of these key types, or if you aren't sure, to be safe you can initialize this
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index d16407efc3dcf..209e2ffe1da56 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -192,6 +192,7 @@ julia> a
 Note that [`Threads.@threads`](@ref) does not have an optional reduction parameter like [`@distributed`](@ref).
 
 ### Using `@threads` without data-races
+
 The concept of a data-race is elaborated on in ["Communication and data races between threads"](@ref man-communication-and-data-races). For now, just known that a data race can result in incorrect results and dangerous errors.
 
 Lets say we want to make the function `sum_single` below multithreaded.
@@ -257,6 +258,9 @@ depending on the characteristics of the operations.
 Although Julia's threads can communicate through shared memory, it is notoriously difficult to write correct and data-race free multi-threaded code. Julia's
 [`Channel`](@ref)s are thread-safe and may be used to communicate safely. There are also sections below that explain how to use [locks](@ref man-using-locks) and [atomics](@ref man-atomic-operations) to avoid data-races.
 
+In certain cases, Julia is able to detect a detect safety violations, in particular in regards to deadlocks or other known-unsafe operations such as yielding
+to the currently running task. In these cases, a [`ConcurrencyViolationError`](@ref) is thrown.
+
 ### Data-race freedom
 
 You are entirely responsible for ensuring that your program is data-race free,
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 45bf60a7944d2..35ba7fdf16601 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -233,7 +233,7 @@ Let's first create a simple server:
 ```julia-repl
 julia> using Sockets
 
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2000)
            while true
                sock = accept(server)
@@ -305,11 +305,11 @@ printed the message and waited for the next client. Reading and writing works in
 To see this, consider the following simple echo server:
 
 ```julia-repl
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2001)
            while true
                sock = accept(server)
-               @async while isopen(sock)
+               Threads.@spawn while isopen(sock)
                    write(sock, readline(sock, keep=true))
                end
            end
@@ -319,7 +319,7 @@ Task (runnable) @0x00007fd31dc12e60
 julia> clientside = connect(2001)
 TCPSocket(RawFD(28) open, 0 bytes waiting)
 
-julia> errormonitor(@async while isopen(clientside)
+julia> errormonitor(Threads.@spawn while isopen(clientside)
            write(stdout, readline(clientside, keep=true))
        end)
 Task (runnable) @0x00007fd31dc11870
@@ -357,10 +357,10 @@ ip"74.125.226.225"
 
 All I/O operations exposed by [`Base.read`](@ref) and [`Base.write`](@ref) can be performed
 asynchronously through the use of [coroutines](@ref man-tasks). You can create a new coroutine to
-read from or write to a stream using the [`@async`](@ref) macro:
+read from or write to a stream using the [`Threads.@spawn`](@ref) macro:
 
 ```julia-repl
-julia> task = @async open("foo.txt", "w") do io
+julia> task = Threads.@spawn open("foo.txt", "w") do io
            write(io, "Hello, World!")
        end;
 
@@ -379,7 +379,7 @@ your program to block until all of the coroutines it wraps around have exited:
 julia> using Sockets
 
 julia> @sync for hostname in ("google.com", "github.com", "julialang.org")
-           @async begin
+           Threads.@spawn begin
                conn = connect(hostname, 80)
                write(conn, "GET / HTTP/1.1\r\nHost:$(hostname)\r\n\r\n")
                readline(conn, keep=true)
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index 16cf46e0dfc4a..181fe0a30eb38 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -56,6 +56,10 @@ may trip up Julia users accustomed to MATLAB:
   * In Julia, if `A` and `B` are arrays, logical comparison operations like `A == B` do not return
     an array of booleans. Instead, use `A .== B`, and similarly for the other boolean operators like
     [`<`](@ref), [`>`](@ref).
+  * In Julia, when you want to apply a scalar-valued function elementwise to an array, use broadcasting
+    syntax: `f.(A)` instead of `f(A)`. In some cases, both operations are defined but mean different things:
+    in MATLAB `exp(A)` applies elementwise and `expm(A)` is the [matrix exponential](https://en.wikipedia.org/wiki/Matrix_exponential),
+    but in Julia `exp.(A)` applies elementwise and `exp(A)` is the matrix exponential.
   * In Julia, the operators [`&`](@ref), [`|`](@ref), and [`⊻`](@ref xor) ([`xor`](@ref)) perform the
     bitwise operations equivalent to `and`, `or`, and `xor` respectively in MATLAB, and have precedence
     similar to Python's bitwise operators (unlike C). They can operate on scalars or element-wise
@@ -78,6 +82,9 @@ may trip up Julia users accustomed to MATLAB:
     provides the higher order functions [`filter`](@ref) and [`filter!`](@ref), allowing users
     to write `filter(z->z>3, x)` and `filter!(z->z>3, x)` as alternatives to the corresponding transliterations
     `x[x.>3]` and `x = x[x.>3]`. Using [`filter!`](@ref) reduces the use of temporary arrays.
+  * Following on from the previous point, to replace values that meet specific criteria, for example a
+    thresholding operation on all elements in a matrix, could be achieved in Matlab as follows `A(A < threshold) = 0`.
+    The Julia equivalent would be `A[A .< threshold] .= 0`.
   * The analogue of extracting (or "dereferencing") all elements of a cell array, e.g. in `vertcat(A{:})`
     in MATLAB, is written using the splat operator in Julia, e.g. as `vcat(A...)`.
   * In Julia, the `adjoint` function performs conjugate transposition; in MATLAB, `adjoint` provides the
@@ -106,7 +113,7 @@ For users coming to Julia from R, these are some noteworthy differences:
       * In Julia, `[1, 2, 3, 4][[true, false]]` throws a [`BoundsError`](@ref).
       * In Julia, `[1, 2, 3, 4][[true, false, true, false]]` produces `[1, 3]`.
   * Like many languages, Julia does not always allow operations on vectors of different lengths, unlike
-    R where the vectors only need to share a common index range.  For example, `c(1, 2, 3, 4) + c(1, 2)`
+    R where the vectors only need to share a common index range. For example, `c(1, 2, 3, 4) + c(1, 2)`
     is valid R but the equivalent `[1, 2, 3, 4] + [1, 2]` will throw an error in Julia.
   * Julia allows an optional trailing comma when that comma does not change the meaning of code.
     This can cause confusion among R users when indexing into arrays. For example, `x[1,]` in R
@@ -137,7 +144,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     or `if 1==1`.
   * Julia does not provide `nrow` and `ncol`. Instead, use `size(M, 1)` for `nrow(M)` and `size(M, 2)`
     for `ncol(M)`.
-  * Julia is careful to distinguish scalars, vectors and matrices.  In R, `1` and `c(1)` are the same.
+  * Julia is careful to distinguish scalars, vectors and matrices. In R, `1` and `c(1)` are the same.
     In Julia, they cannot be used interchangeably.
   * Julia's [`diag`](@ref) and [`diagm`](@ref) are not like R's.
   * Julia cannot assign to the results of function calls on the left hand side of an assignment operation:
@@ -167,12 +174,12 @@ For users coming to Julia from R, these are some noteworthy differences:
     have higher precedence than the `:` operator, whereas the reverse is true in R. For example, `1:n-1` in
     Julia is equivalent to `1:(n-1)` in R.
   * Julia's [`max`](@ref) and [`min`](@ref) are the equivalent of `pmax` and `pmin` respectively
-    in R, but both arguments need to have the same dimensions.  While [`maximum`](@ref) and [`minimum`](@ref)
+    in R, but both arguments need to have the same dimensions. While [`maximum`](@ref) and [`minimum`](@ref)
     replace `max` and `min` in R, there are important differences.
   * Julia's [`sum`](@ref), [`prod`](@ref), [`maximum`](@ref), and [`minimum`](@ref) are different
     from their counterparts in R. They all accept an optional keyword argument `dims`, which indicates the
-    dimensions, over which the operation is carried out.  For instance, let `A = [1 2; 3 4]` in Julia
-    and `B <- rbind(c(1,2),c(3,4))` be the same matrix in R.  Then `sum(A)` gives the same result as
+    dimensions, over which the operation is carried out. For instance, let `A = [1 2; 3 4]` in Julia
+    and `B <- rbind(c(1,2),c(3,4))` be the same matrix in R. Then `sum(A)` gives the same result as
     `sum(B)`, but `sum(A, dims=1)` is a row vector containing the sum over each column and `sum(A, dims=2)`
     is a column vector containing the sum over each row. This contrasts to the behavior of R, where separate
     `colSums(B)` and `rowSums(B)` functions provide these functionalities. If the `dims` keyword argument is a
@@ -217,7 +224,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
   * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
-    in array-indexing.  The `range` function is also supported.
+    in array-indexing. The `range` function is also supported.
   * In Julia, indexing a matrix with arrays like `X[[1,2], [1,3]]` refers to a sub-matrix that contains the intersections of the first and second rows with the first and third columns. In Python, `X[[1,2], [1,3]]` refers to a vector that contains the values of cell `[1,1]` and `[2,3]` in the matrix. `X[[1,2], [1,3]]` in Julia is equivalent with `X[np.ix_([0,1],[0,2])]` in Python. `X[[0,1], [0,2]]` in Python is equivalent with `X[[CartesianIndex(1,1), CartesianIndex(2,3)]]` in Julia.
   * Julia has no line continuation syntax: if, at the end of a line, the input so far is a complete
     expression, it is considered done; otherwise the input continues. One way to force an expression
@@ -245,6 +252,10 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, the exponentiation operator is `^`, not `**` as in Python.
   * Julia uses `nothing` of type `Nothing` to represent a null value, whereas Python uses `None` of type `NoneType`.
   * In Julia, the standard operators over a matrix type are matrix operations, whereas, in Python, the standard operators are element-wise operations. When both `A` and `B` are matrices, `A * B` in Julia performs matrix multiplication, not element-wise multiplication as in Python. `A * B` in Julia is equivalent with `A @ B` in Python, whereas `A * B` in Python is equivalent with `A .* B` in Julia.
+  * In Julia, when you want to apply a scalar-valued function elementwise to an array, use broadcasting
+    syntax: `f.(A)` instead of `f(A)`. In some cases, both operations are defined but mean different things:
+    `numpy.exp(A)` applies elementwise and `scipy.linalg.expm(A)` is the [matrix exponential](https://en.wikipedia.org/wiki/Matrix_exponential),
+    but in Julia `exp.(A)` applies elementwise and `exp(A)` is the matrix exponential.
   * The adjoint operator `'` in Julia returns an adjoint of a vector (a lazy representation of row vector), whereas the transpose operator `.T` over a vector in Python returns the original vector (non-op).
   * In Julia, a function may contain multiple concrete implementations (called *methods*), which are selected via multiple dispatch based on the types of all arguments to the call, as compared to functions in Python, which have a single implementation and no polymorphism (as opposed to Python method calls which use a different syntax and allows dispatch on the receiver of the method).
   * There are no classes in Julia. Instead there are structures (mutable or immutable), containing data but no methods.
@@ -259,7 +270,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Be careful with non-constant global variables in Julia, especially in tight loops. Since you can write close-to-metal code in Julia (unlike Python), the effect of globals can be drastic (see [Performance Tips](@ref man-performance-tips)).
   * In Julia, rounding and truncation are explicit. Python's `int(3.7)` should be `floor(Int, 3.7)` or `Int(floor(3.7))` and is distinguished from `round(Int, 3.7)`. `floor(x)` and `round(x)` on their own return an integer value of the same type as `x` rather than always returning `Int`.
   * In Julia, parsing is explicit. Python's `float("3.7")` would be `parse(Float64, "3.7")` in Julia.
-  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.  Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
+  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`. Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
   * In Julia, a new local scope is introduced by most code blocks, including loops and `try` — `catch` — `finally`. Note that comprehensions (list, generator, etc.) introduce a new local scope both in Python and Julia, whereas `if` blocks do not introduce a new local scope in both languages.
 
 ## Noteworthy differences from C/C++
@@ -296,7 +307,7 @@ For users coming to Julia from R, these are some noteworthy differences:
      Floating point literals are closer in behavior to C/C++. Octal (prefixed with `0o`) and binary
     (prefixed with `0b`) literals are also treated as unsigned (or `BigInt` for more than 128 bits).
   * In Julia, the division operator [`/`](@ref) returns a floating point number when both operands
-    are of integer type.  To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
+    are of integer type. To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
   * Indexing an `Array` with floating point types is generally an error in Julia. The Julia
     equivalent of the C expression `a[i / 2]` is `a[i ÷ 2 + 1]`, where `i` is of integer type.
   * String literals can be delimited with either `"`  or `"""`, `"""` delimited literals can contain
@@ -305,7 +316,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     evaluates the variable name or the expression in the context of the function.
   * `//` indicates a [`Rational`](@ref) number, and not a single-line comment (which is `#` in Julia)
   * `#=` indicates the start of a multiline comment, and `=#` ends it.
-  * Functions in Julia return values from their last expression(s) or the `return` keyword.  Multiple
+  * Functions in Julia return values from their last expression(s) or the `return` keyword. Multiple
     values can be returned from functions and assigned as tuples, e.g. `(a, b) = myfunction()` or
     `a, b = myfunction()`, instead of having to pass pointers to values as one would have to do in
     C/C++ (i.e. `a = myfunction(&b)`.
@@ -316,7 +327,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     meaning within `[ ]`, something to watch out for. `;` can be used to separate expressions on a
     single line, but are not strictly necessary in many cases, and are more an aid to readability.
   * In Julia, the operator [`⊻`](@ref xor) ([`xor`](@ref)) performs the bitwise XOR operation, i.e.
-    [`^`](@ref) in C/C++.  Also, the bitwise operators do not have the same precedence as C/C++, so
+    [`^`](@ref) in C/C++. Also, the bitwise operators do not have the same precedence as C/C++, so
     parenthesis may be required.
   * Julia's [`^`](@ref) is exponentiation (pow), not bitwise XOR as in C/C++ (use [`⊻`](@ref xor), or
     [`xor`](@ref), in Julia)
@@ -355,7 +366,7 @@ For users coming to Julia from R, these are some noteworthy differences:
 
 ### Julia ⇔ C/C++: Namespaces
   * C/C++ `namespace`s correspond roughly to Julia `module`s.
-  * There are no private globals or fields in Julia.  Everything is publicly accessible
+  * There are no private globals or fields in Julia. Everything is publicly accessible
     through fully qualified paths (or relative paths, if desired).
   * `using MyNamespace::myfun` (C++) corresponds roughly to `import MyModule: myfun` (Julia).
   * `using namespace MyNamespace` (C++) corresponds roughly to `using MyModule` (Julia)
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index a3e9ab1b8739e..38e27476f0af8 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -3,7 +3,9 @@
 In the following sections, we briefly go through a few techniques that can help make your Julia
 code run as fast as possible.
 
-## Performance critical code should be inside a function
+## General advice
+
+### Performance critical code should be inside a function
 
 Any code that is performance critical should be inside a function. Code inside functions tends to run much faster than top level code, due to how Julia's compiler works.
 
@@ -11,7 +13,7 @@ The use of functions is not only important for performance: functions are more r
 
 The functions should take arguments, instead of operating directly on global variables, see the next point.
 
-## Avoid untyped global variables
+### Avoid untyped global variables
 
 The value of an untyped global variable might change at any point, possibly leading to a change of its type. This makes
 it difficult for the compiler to optimize code using global variables. This also applies to type-valued variables,
@@ -24,7 +26,7 @@ performance:
 const DEFAULT_VAL = 0
 ```
 
-If a global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals).
+If a non-constant global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals); `const` globals need not be annotated because their type is inferred from their initialization value.
 
 Uses of untyped globals can be optimized by annotating their types at the point of use:
 
@@ -61,7 +63,7 @@ julia> global x = 1.0
 
 so all the performance issues discussed previously apply.
 
-## Measure performance with [`@time`](@ref) and pay attention to memory allocation
+### Measure performance with [`@time`](@ref) and pay attention to memory allocation
 
 A useful tool for measuring performance is the [`@time`](@ref) macro. We here repeat the example
 with the global variable above, but this time with the type annotation removed:
@@ -94,8 +96,8 @@ a vector of 64-bit floats so there should be no need to allocate (heap) memory.
 
 We should clarify that what `@time` reports is specifically *heap* allocations, which are typically needed for either
 mutable objects or for creating/growing variable-sized containers (such as `Array` or `Dict`, strings, or "type-unstable"
-objects whose type is only known at runtime).  Allocating (or deallocating) such blocks of memory may require an expensive function
-call to libc (e.g. via `malloc` in C), and they must be tracked for garbage collection.  In contrast, immutable values like
+objects whose type is only known at runtime). Allocating (or deallocating) such blocks of memory may require an expensive function
+call to libc (e.g. via `malloc` in C), and they must be tracked for garbage collection. In contrast, immutable values like
 numbers (except bignums), tuples, and immutable `struct`s can be stored much more cheaply, e.g. in stack or CPU-register
 memory, so one doesn’t typically worry about the performance cost of "allocating" them.
 
@@ -149,7 +151,38 @@ its algorithmic aspects (see [Pre-allocating outputs](@ref)).
     For more serious benchmarking, consider the [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl)
     package which among other things evaluates the function multiple times in order to reduce noise.
 
-## [Tools](@id tools)
+### Break functions into multiple definitions
+
+Writing a function as many small definitions allows the compiler to directly call the most applicable
+code, or even inline it.
+
+Here is an example of a "compound function" that should really be written as multiple definitions:
+
+```julia
+using LinearAlgebra
+
+function mynorm(A)
+    if isa(A, Vector)
+        return sqrt(real(dot(A,A)))
+    elseif isa(A, Matrix)
+        return maximum(svdvals(A))
+    else
+        error("mynorm: invalid argument")
+    end
+end
+```
+
+This can be written more concisely and efficiently as:
+
+```julia
+mynorm(x::Vector) = sqrt(real(dot(x, x)))
+mynorm(A::Matrix) = maximum(svdvals(A))
+```
+
+It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
+written as the `mynorm` example.
+
+### [Tools](@id tools)
 
 Julia and its package ecosystem includes tools that may help you diagnose problems and improve
 the performance of your code:
@@ -166,7 +199,14 @@ the performance of your code:
   * `@code_warntype` generates a representation of your code that can be helpful in finding expressions
     that result in type uncertainty. See [`@code_warntype`](@ref) below.
 
-## [Avoid containers with abstract type parameters](@id man-performance-abstract-container)
+## Type inference
+
+In many languages with optional type declarations, adding declarations is the principal way to
+make code run faster. This is *not* the case in Julia. In Julia, the compiler generally knows
+the types of all function arguments, local variables, and expressions. However, there are a few
+specific instances where declarations are helpful.
+
+### [Avoid containers with abstract type parameters](@id man-performance-abstract-container)
 
 When working with parameterized types, including arrays, it is best to avoid parameterizing with
 abstract types where possible.
@@ -210,13 +250,6 @@ better than `IdDict{Type, Vector}`
 
 See also the discussion under [Parametric Types](@ref).
 
-## Type declarations
-
-In many languages with optional type declarations, adding declarations is the principal way to
-make code run faster. This is *not* the case in Julia. In Julia, the compiler generally knows
-the types of all function arguments, local variables, and expressions. However, there are a few
-specific instances where declarations are helpful.
-
 ### Avoid fields with abstract type
 
 Types can be declared without specifying the types of their fields:
@@ -608,38 +641,7 @@ would not normally specialize that method call. You need to check the
 when argument types are changed, i.e., if `Base.specializations(@which f(...))` contains specializations
 for the argument in question.
 
-## Break functions into multiple definitions
-
-Writing a function as many small definitions allows the compiler to directly call the most applicable
-code, or even inline it.
-
-Here is an example of a "compound function" that should really be written as multiple definitions:
-
-```julia
-using LinearAlgebra
-
-function mynorm(A)
-    if isa(A, Vector)
-        return sqrt(real(dot(A,A)))
-    elseif isa(A, Matrix)
-        return maximum(svdvals(A))
-    else
-        error("mynorm: invalid argument")
-    end
-end
-```
-
-This can be written more concisely and efficiently as:
-
-```julia
-mynorm(x::Vector) = sqrt(real(dot(x, x)))
-mynorm(A::Matrix) = maximum(svdvals(A))
-```
-
-It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
-written as the `mynorm` example.
-
-## Write "type-stable" functions
+### Write "type-stable" functions
 
 When possible, it helps to ensure that a function always returns a value of the same type. Consider
 the following definition:
@@ -660,7 +662,7 @@ pos(x) = x < 0 ? zero(x) : x
 There is also a [`oneunit`](@ref) function, and a more general [`oftype(x, y)`](@ref) function, which
 returns `y` converted to the type of `x`.
 
-## Avoid changing the type of a variable
+### Avoid changing the type of a variable
 
 An analogous "type-stability" problem exists for variables used repeatedly within a function:
 
@@ -683,7 +685,7 @@ optimize the body of the loop. There are several possible fixes:
   * Use an explicit conversion by `x = oneunit(Float64)`
   * Initialize with the first loop iteration, to `x = 1 / rand()`, then loop `for i = 2:10`
 
-## [Separate kernel functions (aka, function barriers)](@id kernel-functions)
+### [Separate kernel functions (aka, function barriers)](@id kernel-functions)
 
 Many functions follow a pattern of performing some set-up work, and then running many iterations
 to perform a core computation. Where possible, it is a good idea to put these core computations
@@ -742,7 +744,172 @@ or the [`fill!`](@ref) function, which we could have used instead of writing our
 Functions like `strange_twos` occur when dealing with data of uncertain type, for example data
 loaded from an input file that might contain either integers, floats, strings, or something else.
 
-## [Types with values-as-parameters](@id man-performance-value-type)
+### [[`@code_warntype`](@ref)](@id man-code-warntype)
+
+The macro [`@code_warntype`](@ref) (or its function variant [`code_warntype`](@ref)) can sometimes
+be helpful in diagnosing type-related problems. Here's an example:
+
+```julia-repl
+julia> @noinline pos(x) = x < 0 ? 0 : x;
+
+julia> function f(x)
+           y = pos(x)
+           return sin(y*x + 1)
+       end;
+
+julia> @code_warntype f(3.2)
+MethodInstance for f(::Float64)
+  from f(x) @ Main REPL[9]:1
+Arguments
+  #self#::Core.Const(f)
+  x::Float64
+Locals
+  y::Union{Float64, Int64}
+Body::Float64
+1 ─      (y = Main.pos(x))
+│   %2 = (y * x)::Float64
+│   %3 = (%2 + 1)::Float64
+│   %4 = Main.sin(%3)::Float64
+└──      return %4
+```
+
+Interpreting the output of [`@code_warntype`](@ref), like that of its cousins [`@code_lowered`](@ref),
+[`@code_typed`](@ref), [`@code_llvm`](@ref), and [`@code_native`](@ref), takes a little practice.
+Your code is being presented in form that has been heavily digested on its way to generating
+compiled machine code. Most of the expressions are annotated by a type, indicated by the `::T`
+(where `T` might be [`Float64`](@ref), for example). The most important characteristic of [`@code_warntype`](@ref)
+is that non-concrete types are displayed in red; since this document is written in Markdown, which has no color,
+in this document, red text is denoted by uppercase.
+
+At the top, the inferred return type of the function is shown as `Body::Float64`.
+The next lines represent the body of `f` in Julia's SSA IR form.
+The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
+Looking at the body, you can see that the first thing that happens is that `pos` is called and the
+return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
+it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
+input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
+The net result is that `f(x::Float64)` will not be type-unstable
+in its output, even if some of the intermediate computations are type-unstable.
+
+How you use this information is up to you. Obviously, it would be far and away best to fix `pos`
+to be type-stable: if you did so, all of the variables in `f` would be concrete, and its performance
+would be optimal. However, there are circumstances where this kind of *ephemeral* type instability
+might not matter too much: for example, if `pos` is never used in isolation, the fact that `f`'s
+output is type-stable (for [`Float64`](@ref) inputs) will shield later code from the propagating
+effects of type instability. This is particularly relevant in cases where fixing the type instability
+is difficult or impossible. In such cases, the tips above (e.g., adding type annotations and/or
+breaking up functions) are your best tools to contain the "damage" from type instability.
+Also, note that even Julia Base has functions that are type unstable.
+For example, the function [`findfirst`](@ref) returns the index into an array where a key is found,
+or `nothing` if it is not found, a clear type instability. In order to make it easier to find the
+type instabilities that are likely to be important, `Union`s containing either `missing` or `nothing`
+are color highlighted in yellow, instead of red.
+
+The following examples may help you interpret expressions marked as containing non-leaf types:
+
+  * Function body starting with `Body::Union{T1,T2})`
+      * Interpretation: function with unstable return type
+      * Suggestion: make the return value type-stable, even if you have to annotate it
+
+  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
+      * Interpretation: call to a type-unstable function `g`.
+      * Suggestion: fix the function, or if necessary annotate the return value
+
+  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
+      * Interpretation: accessing elements of poorly-typed arrays
+      * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
+        element accesses
+
+  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
+      * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
+        field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
+      * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
+        of `ArrayContainer`
+
+### [Performance of captured variable](@id man-performance-captured)
+
+Consider the following example that defines an inner function:
+```julia
+function abmult(r::Int)
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f
+end
+```
+
+Function `abmult` returns a function `f` that multiplies its argument by
+the absolute value of `r`. The inner function assigned to `f` is called a
+"closure". Inner functions are also used by the
+language for `do`-blocks and for generator expressions.
+
+This style of code presents performance challenges for the language.
+The parser, when translating it into lower-level instructions,
+substantially reorganizes the above code by extracting the
+inner function to a separate code block.  "Captured" variables such as `r`
+that are shared by inner functions and their enclosing scope are
+also extracted into a heap-allocated "box" accessible to both inner and
+outer functions because the language specifies that `r` in the
+inner scope must be identical to `r` in the outer scope even after the
+outer scope (or another inner function) modifies `r`.
+
+The discussion in the preceding paragraph referred to the "parser", that is, the phase
+of compilation that takes place when the module containing `abmult` is first loaded,
+as opposed to the later phase when it is first invoked. The parser does not "know" that
+`Int` is a fixed type, or that the statement `r = -r` transforms an `Int` to another `Int`.
+The magic of type inference takes place in the later phase of compilation.
+
+Thus, the parser does not know that `r` has a fixed type (`Int`).
+Nor that `r` does not change value once the inner function is created (so that
+the box is unneeded). Therefore, the parser emits code for
+box that holds an object with an abstract type such as `Any`, which
+requires run-time type dispatch for each occurrence of `r`. This can be
+verified by applying `@code_warntype` to the above function. Both the boxing
+and the run-time type dispatch can cause loss of performance.
+
+If captured variables are used in a performance-critical section of the code,
+then the following tips help ensure that their use is performant. First, if
+it is known that a captured variable does not change its type, then this can
+be declared explicitly with a type annotation (on the variable, not the
+right-hand side):
+```julia
+function abmult2(r0::Int)
+    r::Int = r0
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f
+end
+```
+The type annotation partially recovers lost performance due to capturing because
+the parser can associate a concrete type to the object in the box.
+Going further, if the captured variable does not need to be boxed at all (because it
+will not be reassigned after the closure is created), this can be indicated
+with `let` blocks as follows.
+```julia
+function abmult3(r::Int)
+    if r < 0
+        r = -r
+    end
+    f = let r = r
+            x -> x * r
+    end
+    return f
+end
+```
+The `let` block creates a new variable `r` whose scope is only the
+inner function. The second technique recovers full language performance
+in the presence of captured variables. Note that this is a rapidly
+evolving aspect of the compiler, and it is likely that future releases
+will not require this degree of programmer annotation to attain performance.
+In the mean time, some user-contributed packages like
+[FastClosures](https://github.com/c42f/FastClosures.jl) automate the
+insertion of `let` statements as in `abmult3`.
+
+
+### [Types with values-as-parameters](@id man-performance-value-type)
 
 Let's say you want to create an `N`-dimensional array that has size 3 along each axis. Such arrays
 can be created like this:
@@ -832,7 +999,7 @@ In this example, `N` is passed as a parameter, so its "value" is known to the co
 `Val(T)` works only when `T` is either hard-coded/literal (`Val(3)`) or already specified in the
 type-domain.
 
-## The dangers of abusing multiple dispatch (aka, more on types with values-as-parameters)
+### The dangers of abusing multiple dispatch (aka, more on types with values-as-parameters)
 
 Once one learns to appreciate multiple dispatch, there's an understandable tendency to go overboard
 and try to use it for everything. For example, you might imagine using it to store information,
@@ -879,104 +1046,15 @@ or thousands of variants compiled for it. Each of these increases the size of th
 code, the length of internal lists of methods, etc. Excess enthusiasm for values-as-parameters
 can easily waste enormous resources.
 
-## [Access arrays in memory order, along columns](@id man-performance-column-major)
+## Memory management and arrays
 
-Multidimensional arrays in Julia are stored in column-major order. This means that arrays are
-stacked one column at a time. This can be verified using the `vec` function or the syntax `[:]`
-as shown below (notice that the array is ordered `[1 3 2 4]`, not `[1 2 3 4]`):
+### Pre-allocate outputs
 
-```jldoctest
-julia> x = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
+If your function returns an `Array` or some other complex type, it may have to allocate memory.
+Unfortunately, oftentimes allocation and its converse, garbage collection, are substantial bottlenecks.
 
-julia> x[:]
-4-element Vector{Int64}:
- 1
- 3
- 2
- 4
-```
-
-This convention for ordering arrays is common in many languages like Fortran, Matlab, and R (to
-name a few). The alternative to column-major ordering is row-major ordering, which is the convention
-adopted by C and Python (`numpy`) among other languages. Remembering the ordering of arrays can
-have significant performance effects when looping over arrays. A rule of thumb to keep in mind
-is that with column-major arrays, the first index changes most rapidly. Essentially this means
-that looping will be faster if the inner-most loop index is the first to appear in a slice expression.
-Keep in mind that indexing an array with `:` is an implicit loop that iteratively accesses all elements within a particular dimension; it can be faster to extract columns than rows, for example.
-
-Consider the following contrived example. Imagine we wanted to write a function that accepts a
-[`Vector`](@ref) and returns a square [`Matrix`](@ref) with either the rows or the columns filled with copies
-of the input vector. Assume that it is not important whether rows or columns are filled with these
-copies (perhaps the rest of the code can be easily adapted accordingly). We could conceivably
-do this in at least four ways (in addition to the recommended call to the built-in [`repeat`](@ref)):
-
-```julia
-function copy_cols(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for i = inds
-        out[:, i] = x
-    end
-    return out
-end
-
-function copy_rows(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for i = inds
-        out[i, :] = x
-    end
-    return out
-end
-
-function copy_col_row(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for col = inds, row = inds
-        out[row, col] = x[row]
-    end
-    return out
-end
-
-function copy_row_col(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for row = inds, col = inds
-        out[row, col] = x[col]
-    end
-    return out
-end
-```
-
-Now we will time each of these functions using the same random `10000` by `1` input vector:
-
-```julia-repl
-julia> x = randn(10000);
-
-julia> fmt(f) = println(rpad(string(f)*": ", 14, ' '), @elapsed f(x))
-
-julia> map(fmt, [copy_cols, copy_rows, copy_col_row, copy_row_col]);
-copy_cols:    0.331706323
-copy_rows:    1.799009911
-copy_col_row: 0.415630047
-copy_row_col: 1.721531501
-```
-
-Notice that `copy_cols` is much faster than `copy_rows`. This is expected because `copy_cols`
-respects the column-based memory layout of the `Matrix` and fills it one column at a time. Additionally,
-`copy_col_row` is much faster than `copy_row_col` because it follows our rule of thumb that the
-first element to appear in a slice expression should be coupled with the inner-most loop.
-
-## Pre-allocating outputs
-
-If your function returns an `Array` or some other complex type, it may have to allocate memory.
-Unfortunately, oftentimes allocation and its converse, garbage collection, are substantial bottlenecks.
-
-Sometimes you can circumvent the need to allocate memory on each function call by preallocating
-the output. As a trivial example, compare
+Sometimes you can circumvent the need to allocate memory on each function call by preallocating
+the output. As a trivial example, compare
 
 ```jldoctest prealloc
 julia> function xinc(x)
@@ -1035,21 +1113,55 @@ some judgment may be required. However, for "vectorized" (element-wise) function
 syntax `x .= f.(y)` can be used for in-place operations with fused loops and no temporary arrays
 (see the [dot syntax for vectorizing functions](@ref man-vectorized)).
 
-## [Use `MutableArithmetics` for more control over allocation for mutable arithmetic types](@id man-perftips-mutablearithmetics)
+### [Consider using views for slices](@id man-performance-views)
 
-Some [`Number`](@ref) subtypes, such as [`BigInt`](@ref) or [`BigFloat`](@ref), may
-be implemented as [`mutable struct`](@ref) types, or they may have mutable
-components. The arithmetic interfaces in Julia `Base` usually opt for convenience
-over efficiency in such cases, so using them in a naive manner may result in
-suboptimal performance. The abstractions of the
-[`MutableArithmetics`](https://juliahub.com/ui/Packages/General/MutableArithmetics)
-package, on the other hand, make it possible to exploit the mutability of such types
-for writing fast code that allocates only as much as necessary. `MutableArithmetics`
-also makes it possible to copy values of mutable arithmetic types explicitly when
-necessary. `MutableArithmetics` is a user package and is not affiliated with the
-Julia project.
+In Julia, an array "slice" expression like `array[1:5, :]` creates
+a copy of that data (except on the left-hand side of an assignment,
+where `array[1:5, :] = ...` assigns in-place to that portion of `array`).
+If you are doing many operations on the slice, this can be good for
+performance because it is more efficient to work with a smaller
+contiguous copy than it would be to index into the original array.
+On the other hand, if you are just doing a few simple operations on
+the slice, the cost of the allocation and copy operations can be
+substantial.
+
+An alternative is to create a "view" of the array, which is
+an array object (a `SubArray`) that actually references the data
+of the original array in-place, without making a copy. (If you
+write to a view, it modifies the original array's data as well.)
+This can be done for individual slices by calling [`view`](@ref),
+or more simply for a whole expression or block of code by putting
+[`@views`](@ref) in front of that expression. For example:
+
+```jldoctest; filter = r"[0-9\.]+ seconds \(.*?\)"
+julia> fcopy(x) = sum(x[2:end-1]);
+
+julia> @views fview(x) = sum(x[2:end-1]);
+
+julia> x = rand(10^6);
+
+julia> @time fcopy(x);
+  0.003051 seconds (3 allocations: 7.629 MB)
+
+julia> @time fview(x);
+  0.001020 seconds (1 allocation: 16 bytes)
+```
+
+Notice both the 3× speedup and the decreased memory allocation
+of the `fview` version of the function.
+
+### Consider StaticArrays.jl for small fixed-size vector/matrix operations
 
-## More dots: Fuse vectorized operations
+If your application involves many small (`< 100` element) arrays of fixed sizes (i.e. the size is
+known prior to execution), then you might want to consider using the [StaticArrays.jl package](https://github.com/JuliaArrays/StaticArrays.jl).
+This package allows you to represent such arrays in a way that avoids unnecessary heap allocations and allows the compiler to
+specialize code for the *size* of the array, e.g. by completely unrolling vector operations (eliminating the loops) and storing elements in CPU registers.
+
+For example, if you are doing computations with 2d geometries, you might have many computations with 2-component vectors. By
+using the `SVector` type from StaticArrays.jl, you can use convenient vector notation and operations like `norm(3v - w)` on
+vectors `v` and `w`, while allowing the compiler to unroll the code to a minimal computation equivalent to `@inbounds hypot(3v[1]-w[1], 3v[2]-w[2])`.
+
+### More dots: Fuse vectorized operations
 
 Julia has a special [dot syntax](@ref man-vectorized) that converts
 any scalar function into a "vectorized" function call, and any operator
@@ -1095,7 +1207,7 @@ a new temporary array and executes in a separate loop. In this example
 convenient to sprinkle some dots in your expressions than to
 define a separate function for each vectorized operation.
 
-## [Fewer dots: Unfuse certain intermediate broadcasts](@id man-performance-unfuse)
+### [Fewer dots: Unfuse certain intermediate broadcasts](@id man-performance-unfuse)
 
 The dot loop fusion mentioned above enables concise and idiomatic code to express highly performant operations. However, it is important to remember that the fused operation will be computed at every iteration of the broadcast. This means that in some situations, particularly in the presence of composed or multidimensional broadcasts, an expression with dot calls may be computing a function more times than intended. As an example, say we want to build a random matrix whose rows have Euclidean norm one. We might write something like the following:
 ```
@@ -1122,44 +1234,98 @@ julia> @time x ./= map(sqrt, d);
 
 Any of these options yields approximately a three-fold speedup at the cost of an allocation; for large broadcastables this speedup can be asymptotically very large.
 
-## [Consider using views for slices](@id man-performance-views)
+### [Access arrays in memory order, along columns](@id man-performance-column-major)
 
-In Julia, an array "slice" expression like `array[1:5, :]` creates
-a copy of that data (except on the left-hand side of an assignment,
-where `array[1:5, :] = ...` assigns in-place to that portion of `array`).
-If you are doing many operations on the slice, this can be good for
-performance because it is more efficient to work with a smaller
-contiguous copy than it would be to index into the original array.
-On the other hand, if you are just doing a few simple operations on
-the slice, the cost of the allocation and copy operations can be
-substantial.
+Multidimensional arrays in Julia are stored in column-major order. This means that arrays are
+stacked one column at a time. This can be verified using the `vec` function or the syntax `[:]`
+as shown below (notice that the array is ordered `[1 3 2 4]`, not `[1 2 3 4]`):
 
-An alternative is to create a "view" of the array, which is
-an array object (a `SubArray`) that actually references the data
-of the original array in-place, without making a copy. (If you
-write to a view, it modifies the original array's data as well.)
-This can be done for individual slices by calling [`view`](@ref),
-or more simply for a whole expression or block of code by putting
-[`@views`](@ref) in front of that expression. For example:
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
 
-```jldoctest; filter = r"[0-9\.]+ seconds \(.*?\)"
-julia> fcopy(x) = sum(x[2:end-1]);
+julia> x[:]
+4-element Vector{Int64}:
+ 1
+ 3
+ 2
+ 4
+```
 
-julia> @views fview(x) = sum(x[2:end-1]);
+This convention for ordering arrays is common in many languages like Fortran, Matlab, and R (to
+name a few). The alternative to column-major ordering is row-major ordering, which is the convention
+adopted by C and Python (`numpy`) among other languages. Remembering the ordering of arrays can
+have significant performance effects when looping over arrays. A rule of thumb to keep in mind
+is that with column-major arrays, the first index changes most rapidly. Essentially this means
+that looping will be faster if the inner-most loop index is the first to appear in a slice expression.
+Keep in mind that indexing an array with `:` is an implicit loop that iteratively accesses all elements within a particular dimension; it can be faster to extract columns than rows, for example.
 
-julia> x = rand(10^6);
+Consider the following contrived example. Imagine we wanted to write a function that accepts a
+[`Vector`](@ref) and returns a square [`Matrix`](@ref) with either the rows or the columns filled with copies
+of the input vector. Assume that it is not important whether rows or columns are filled with these
+copies (perhaps the rest of the code can be easily adapted accordingly). We could conceivably
+do this in at least four ways (in addition to the recommended call to the built-in [`repeat`](@ref)):
 
-julia> @time fcopy(x);
-  0.003051 seconds (3 allocations: 7.629 MB)
+```julia
+function copy_cols(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for i = inds
+        out[:, i] = x
+    end
+    return out
+end
 
-julia> @time fview(x);
-  0.001020 seconds (1 allocation: 16 bytes)
+function copy_rows(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for i = inds
+        out[i, :] = x
+    end
+    return out
+end
+
+function copy_col_row(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for col = inds, row = inds
+        out[row, col] = x[row]
+    end
+    return out
+end
+
+function copy_row_col(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for row = inds, col = inds
+        out[row, col] = x[col]
+    end
+    return out
+end
 ```
 
-Notice both the 3× speedup and the decreased memory allocation
-of the `fview` version of the function.
+Now we will time each of these functions using the same random `10000` by `1` input vector:
+
+```julia-repl
+julia> x = randn(10000);
+
+julia> fmt(f) = println(rpad(string(f)*": ", 14, ' '), @elapsed f(x))
 
-## Copying data is not always bad
+julia> map(fmt, [copy_cols, copy_rows, copy_col_row, copy_row_col]);
+copy_cols:    0.331706323
+copy_rows:    1.799009911
+copy_col_row: 0.415630047
+copy_row_col: 1.721531501
+```
+
+Notice that `copy_cols` is much faster than `copy_rows`. This is expected because `copy_cols`
+respects the column-based memory layout of the `Matrix` and fills it one column at a time. Additionally,
+`copy_col_row` is much faster than `copy_row_col` because it follows our rule of thumb that the
+first element to appear in a slice expression should be coupled with the inner-most loop.
+
+### Copying data is not always bad
 
 Arrays are stored contiguously in memory, lending themselves to CPU vectorization
 and fewer memory accesses due to caching. These are the same reasons that it is recommended
@@ -1197,87 +1363,41 @@ julia> @time iterated_neural_network(A[inds, inds], x, 10)
 ```
 
 Provided there is enough memory, the cost of copying the view to an array is outweighed
-by the speed boost from doing the repeated matrix multiplications on a contiguous array.
-
-## Consider StaticArrays.jl for small fixed-size vector/matrix operations
-
-If your application involves many small (`< 100` element) arrays of fixed sizes (i.e. the size is
-known prior to execution), then you might want to consider using the [StaticArrays.jl package](https://github.com/JuliaArrays/StaticArrays.jl).
-This package allows you to represent such arrays in a way that avoids unnecessary heap allocations and allows the compiler to
-specialize code for the *size* of the array, e.g. by completely unrolling vector operations (eliminating the loops) and storing elements in CPU registers.
-
-For example, if you are doing computations with 2d geometries, you might have many computations with 2-component vectors.  By
-using the `SVector` type from StaticArrays.jl, you can use convenient vector notation and operations like `norm(3v - w)` on
-vectors `v` and `w`, while allowing the compiler to unroll the code to a minimal computation equivalent to `@inbounds hypot(3v[1]-w[1], 3v[2]-w[2])`.
-
-## Avoid string interpolation for I/O
-
-When writing data to a file (or other I/O device), forming extra intermediate strings is a source
-of overhead. Instead of:
-
-```julia
-println(file, "$a $b")
-```
-
-use:
-
-```julia
-println(file, a, " ", b)
-```
-
-The first version of the code forms a string, then writes it to the file, while the second version
-writes values directly to the file. Also notice that in some cases string interpolation can be
-harder to read. Consider:
-
-```julia
-println(file, "$(f(a))$(f(b))")
-```
-
-versus:
-
-```julia
-println(file, f(a), f(b))
-```
+by the speed boost from doing the repeated matrix multiplications on a contiguous array.
 
-## Optimize network I/O during parallel execution
+### [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
 
-When executing a remote function in parallel:
+This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
+Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
+In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
 
-```julia
-using Distributed
+Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
+It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
+Its current value can be accessed with `BLAS.get_num_threads()`.
 
-responses = Vector{Any}(undef, nworkers())
-@sync begin
-    for (idx, pid) in enumerate(workers())
-        @async responses[idx] = remotecall_fetch(foo, pid, args...)
-    end
-end
-```
+When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
+However, it is generally recommended to check and set the value manually.
+The OpenBLAS behavior is as follows:
 
-is faster than:
+* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
+* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
 
-```julia
-using Distributed
+When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
+Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
+However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
 
-refs = Vector{Any}(undef, nworkers())
-for (idx, pid) in enumerate(workers())
-    refs[idx] = @spawnat pid foo(args...)
-end
-responses = [fetch(r) for r in refs]
-```
+### [Alternative linear algebra backends](@id man-backends-linear-algebra)
 
-The former results in a single network round-trip to every worker, while the latter results in
-two network calls - first by the [`@spawnat`](@ref) and the second due to the [`fetch`](@ref)
-(or even a [`wait`](@ref)).
-The [`fetch`](@ref)/[`wait`](@ref) is also being executed serially resulting in an overall poorer performance.
+As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
+Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+
+These are external packages, so we will not discuss them in detail here.
+Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
 
-## Fix deprecation warnings
 
-A deprecated function internally performs a lookup in order to print a relevant warning only once.
-This extra lookup can cause a significant slowdown, so all uses of deprecated functions should
-be modified as suggested by the warnings.
+## Miscellaneous
 
-## Tweaks
+### Tweaks
 
 These are some minor points that might help in tight inner loops.
 
@@ -1287,7 +1407,13 @@ These are some minor points that might help in tight inner loops.
   * Use [`div(x,y)`](@ref) for truncating division of integers instead of [`trunc(x/y)`](@ref), [`fld(x,y)`](@ref)
     instead of [`floor(x/y)`](@ref), and [`cld(x,y)`](@ref) instead of [`ceil(x/y)`](@ref).
 
-## [Performance Annotations](@id man-performance-annotations)
+### Fix deprecation warnings
+
+A deprecated function internally performs a lookup in order to print a relevant warning only once.
+This extra lookup can cause a significant slowdown, so all uses of deprecated functions should
+be modified as suggested by the warnings.
+
+### [Performance Annotations](@id man-performance-annotations)
 
 Sometimes you can enable better optimization by promising certain program properties.
 
@@ -1297,7 +1423,7 @@ Sometimes you can enable better optimization by promising certain program proper
     to differences for IEEE numbers. Be careful when doing this, as this may change numerical results.
     This corresponds to the `-ffast-math` option of clang.
   * Write [`@simd`](@ref) in front of `for` loops to promise that the iterations are independent and may be
-    reordered.  Note that in many cases, Julia can automatically vectorize code without the `@simd` macro;
+    reordered. Note that in many cases, Julia can automatically vectorize code without the `@simd` macro;
     it is only beneficial in cases where such a transformation would otherwise be illegal, including cases
     like allowing floating-point re-associativity and ignoring dependent memory accesses (`@simd ivdep`).
     Again, be very careful when asserting `@simd` as erroneously annotating a loop with dependent iterations
@@ -1320,7 +1446,7 @@ the optimizer from trying to be too clever and defeat our benchmark):
 ```julia
 @noinline function inner(x, y)
     s = zero(eltype(x))
-    for i=eachindex(x)
+    for i in eachindex(x, y)
         @inbounds s += x[i]*y[i]
     end
     return s
@@ -1328,7 +1454,7 @@ end
 
 @noinline function innersimd(x, y)
     s = zero(eltype(x))
-    @simd for i = eachindex(x)
+    @simd for i in eachindex(x, y)
         @inbounds s += x[i] * y[i]
     end
     return s
@@ -1457,7 +1583,7 @@ julia> f_fast(NaN)
 false
 ```
 
-## Treat Subnormal Numbers as Zeros
+### Treat Subnormal Numbers as Zeros
 
 Subnormal numbers, formerly called [denormal numbers](https://en.wikipedia.org/wiki/Denormal_number),
 are useful in many contexts, but incur a performance penalty on some hardware. A call [`set_zero_subnormals(true)`](@ref)
@@ -1530,195 +1656,105 @@ In some applications, an alternative to zeroing subnormal numbers is to inject a
 a = rand(Float32,1000) * 1.f-9
 ```
 
-## [[`@code_warntype`](@ref)](@id man-code-warntype)
+### Avoid string interpolation for I/O
 
-The macro [`@code_warntype`](@ref) (or its function variant [`code_warntype`](@ref)) can sometimes
-be helpful in diagnosing type-related problems. Here's an example:
+When writing data to a file (or other I/O device), forming extra intermediate strings is a source
+of overhead. Instead of:
 
-```julia-repl
-julia> @noinline pos(x) = x < 0 ? 0 : x;
+```julia
+println(file, "$a $b")
+```
 
-julia> function f(x)
-           y = pos(x)
-           return sin(y*x + 1)
-       end;
+use:
 
-julia> @code_warntype f(3.2)
-MethodInstance for f(::Float64)
-  from f(x) @ Main REPL[9]:1
-Arguments
-  #self#::Core.Const(f)
-  x::Float64
-Locals
-  y::Union{Float64, Int64}
-Body::Float64
-1 ─      (y = Main.pos(x))
-│   %2 = (y * x)::Float64
-│   %3 = (%2 + 1)::Float64
-│   %4 = Main.sin(%3)::Float64
-└──      return %4
+```julia
+println(file, a, " ", b)
 ```
 
-Interpreting the output of [`@code_warntype`](@ref), like that of its cousins [`@code_lowered`](@ref),
-[`@code_typed`](@ref), [`@code_llvm`](@ref), and [`@code_native`](@ref), takes a little practice.
-Your code is being presented in form that has been heavily digested on its way to generating
-compiled machine code. Most of the expressions are annotated by a type, indicated by the `::T`
-(where `T` might be [`Float64`](@ref), for example). The most important characteristic of [`@code_warntype`](@ref)
-is that non-concrete types are displayed in red; since this document is written in Markdown, which has no color,
-in this document, red text is denoted by uppercase.
+The first version of the code forms a string, then writes it to the file, while the second version
+writes values directly to the file. Also notice that in some cases string interpolation can be
+harder to read. Consider:
 
-At the top, the inferred return type of the function is shown as `Body::Float64`.
-The next lines represent the body of `f` in Julia's SSA IR form.
-The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
-Looking at the body, you can see that the first thing that happens is that `pos` is called and the
-return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
-it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
-input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
-The net result is that `f(x::Float64)` will not be type-unstable
-in its output, even if some of the intermediate computations are type-unstable.
+```julia
+println(file, "$(f(a))$(f(b))")
+```
 
-How you use this information is up to you. Obviously, it would be far and away best to fix `pos`
-to be type-stable: if you did so, all of the variables in `f` would be concrete, and its performance
-would be optimal. However, there are circumstances where this kind of *ephemeral* type instability
-might not matter too much: for example, if `pos` is never used in isolation, the fact that `f`'s
-output is type-stable (for [`Float64`](@ref) inputs) will shield later code from the propagating
-effects of type instability. This is particularly relevant in cases where fixing the type instability
-is difficult or impossible. In such cases, the tips above (e.g., adding type annotations and/or
-breaking up functions) are your best tools to contain the "damage" from type instability.
-Also, note that even Julia Base has functions that are type unstable.
-For example, the function [`findfirst`](@ref) returns the index into an array where a key is found,
-or `nothing` if it is not found, a clear type instability. In order to make it easier to find the
-type instabilities that are likely to be important, `Union`s containing either `missing` or `nothing`
-are color highlighted in yellow, instead of red.
+versus:
 
-The following examples may help you interpret expressions marked as containing non-leaf types:
+```julia
+println(file, f(a), f(b))
+```
 
-  * Function body starting with `Body::Union{T1,T2})`
-      * Interpretation: function with unstable return type
-      * Suggestion: make the return value type-stable, even if you have to annotate it
+### Avoid eager string materialization
 
-  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
-      * Interpretation: call to a type-unstable function `g`.
-      * Suggestion: fix the function, or if necessary annotate the return value
+In settings where a string representation of an object is only needed
+conditionally (e.g. in error paths of functions or conditional warnings such as
+deprecations), it is advisable to avoid the overhead of eagerly materializing
+the string. Since Julia 1.8, this can be achieved via
+[`LazyString`](@ref) and the corresponding string macro [`@lazy_str`](@ref).
 
-  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
-      * Interpretation: accessing elements of poorly-typed arrays
-      * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
-        element accesses
+For example, instead of:
 
-  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
-      * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
-        field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
-      * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
-        of `ArrayContainer`
+```julia
+Base.depwarn("`foo` is deprecated for type $(typeof(x))", :bar)
+```
 
-## [Performance of captured variable](@id man-performance-captured)
+use:
 
-Consider the following example that defines an inner function:
 ```julia
-function abmult(r::Int)
-    if r < 0
-        r = -r
-    end
-    f = x -> x * r
-    return f
-end
+Base.depwarn(lazy"`foo` is deprecated for type $(typeof(x))", :bar)
 ```
 
-Function `abmult` returns a function `f` that multiplies its argument by
-the absolute value of `r`. The inner function assigned to `f` is called a
-"closure". Inner functions are also used by the
-language for `do`-blocks and for generator expressions.
+or the equivalent macro-free version:
 
-This style of code presents performance challenges for the language.
-The parser, when translating it into lower-level instructions,
-substantially reorganizes the above code by extracting the
-inner function to a separate code block.  "Captured" variables such as `r`
-that are shared by inner functions and their enclosing scope are
-also extracted into a heap-allocated "box" accessible to both inner and
-outer functions because the language specifies that `r` in the
-inner scope must be identical to `r` in the outer scope even after the
-outer scope (or another inner function) modifies `r`.
+```julia
+Base.depwarn(LazyString("`foo` is deprecated for type ", typeof(x)), :bar)
+```
 
-The discussion in the preceding paragraph referred to the "parser", that is, the phase
-of compilation that takes place when the module containing `abmult` is first loaded,
-as opposed to the later phase when it is first invoked. The parser does not "know" that
-`Int` is a fixed type, or that the statement `r = -r` transforms an `Int` to another `Int`.
-The magic of type inference takes place in the later phase of compilation.
+Through this approach, the interpolated string will only be constructed when it is actually displayed.
 
-Thus, the parser does not know that `r` has a fixed type (`Int`).
-nor that `r` does not change value once the inner function is created (so that
-the box is unneeded).  Therefore, the parser emits code for
-box that holds an object with an abstract type such as `Any`, which
-requires run-time type dispatch for each occurrence of `r`.  This can be
-verified by applying `@code_warntype` to the above function.  Both the boxing
-and the run-time type dispatch can cause loss of performance.
+### Optimize network I/O during parallel execution
+
+When executing a remote function in parallel:
 
-If captured variables are used in a performance-critical section of the code,
-then the following tips help ensure that their use is performant. First, if
-it is known that a captured variable does not change its type, then this can
-be declared explicitly with a type annotation (on the variable, not the
-right-hand side):
-```julia
-function abmult2(r0::Int)
-    r::Int = r0
-    if r < 0
-        r = -r
-    end
-    f = x -> x * r
-    return f
-end
-```
-The type annotation partially recovers lost performance due to capturing because
-the parser can associate a concrete type to the object in the box.
-Going further, if the captured variable does not need to be boxed at all (because it
-will not be reassigned after the closure is created), this can be indicated
-with `let` blocks as follows.
 ```julia
-function abmult3(r::Int)
-    if r < 0
-        r = -r
-    end
-    f = let r = r
-            x -> x * r
+using Distributed
+
+responses = Vector{Any}(undef, nworkers())
+@sync begin
+    for (idx, pid) in enumerate(workers())
+        Threads.@spawn responses[idx] = remotecall_fetch(foo, pid, args...)
     end
-    return f
 end
 ```
-The `let` block creates a new variable `r` whose scope is only the
-inner function. The second technique recovers full language performance
-in the presence of captured variables. Note that this is a rapidly
-evolving aspect of the compiler, and it is likely that future releases
-will not require this degree of programmer annotation to attain performance.
-In the mean time, some user-contributed packages like
-[FastClosures](https://github.com/c42f/FastClosures.jl) automate the
-insertion of `let` statements as in `abmult3`.
-
-## [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
-
-This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
-Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
-In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
 
-Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
-It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
-Its current value can be accessed with `BLAS.get_num_threads()`.
-
-When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
-However, it is generally recommended to check and set the value manually.
-The OpenBLAS behavior is as follows:
+is faster than:
 
-* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
-* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
+```julia
+using Distributed
 
-When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
-Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
-However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
+refs = Vector{Any}(undef, nworkers())
+for (idx, pid) in enumerate(workers())
+    refs[idx] = @spawnat pid foo(args...)
+end
+responses = [fetch(r) for r in refs]
+```
 
-## [Alternative linear algebra backends](@id man-backends-linear-algebra)
+The former results in a single network round-trip to every worker, while the latter results in
+two network calls - first by the [`@spawnat`](@ref) and the second due to the [`fetch`](@ref)
+(or even a [`wait`](@ref)).
+The [`fetch`](@ref)/[`wait`](@ref) is also being executed serially resulting in an overall poorer performance.
 
-As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
-Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+### [Use `MutableArithmetics` for more control over allocation for mutable arithmetic types](@id man-perftips-mutablearithmetics)
 
-These are external packages, so we will not discuss them in detail here.
-Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
+Some [`Number`](@ref) subtypes, such as [`BigInt`](@ref) or [`BigFloat`](@ref), may
+be implemented as [`mutable struct`](@ref) types, or they may have mutable
+components. The arithmetic interfaces in Julia `Base` usually opt for convenience
+over efficiency in such cases, so using them in a naive manner may result in
+suboptimal performance. The abstractions of the
+[`MutableArithmetics`](https://juliahub.com/ui/Packages/General/MutableArithmetics)
+package, on the other hand, make it possible to exploit the mutability of such types
+for writing fast code that allocates only as much as necessary. `MutableArithmetics`
+also makes it possible to copy values of mutable arithmetic types explicitly when
+necessary. `MutableArithmetics` is a user package and is not affiliated with the
+Julia project.
diff --git a/doc/src/tutorials/profile.md b/doc/src/manual/profile.md
similarity index 100%
rename from doc/src/tutorials/profile.md
rename to doc/src/manual/profile.md
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index ed3fe85194d93..1f9f3129ca16b 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -79,6 +79,18 @@ julia> `echo "foo bar"`[2]
 "foo bar"
 ```
 
+You can also pass a `IOBuffer`, and later read from it:
+
+```jldoctest
+julia> io = PipeBuffer(); # PipeBuffer is a type of IOBuffer
+
+julia> run(`echo world`, devnull, io, stderr);
+
+julia> readlines(io)
+1-element Vector{String}:
+ "world"
+```
+
 ## [Interpolation](@id command-interpolation)
 
 Suppose you want to do something a bit more complicated and use the name of a file in the variable
@@ -320,8 +332,8 @@ will attempt to store the data in the kernel's buffers while waiting for a reade
 Another common solution is to separate the reader and writer of the pipeline into separate [`Task`](@ref)s:
 
 ```julia
-writer = @async write(process, "data")
-reader = @async do_compute(read(process, String))
+writer = Threads.@spawn write(process, "data")
+reader = Threads.@spawn do_compute(read(process, String))
 wait(writer)
 fetch(reader)
 ```
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index f4acffb2ae91b..5ba27b3921cec 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -25,7 +25,7 @@ There are a few noteworthy high-level features about Julia's strings:
     the [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding. (A [`transcode`](@ref) function is
     provided to convert to/from other Unicode encodings.)
   * All string types are subtypes of the abstract type `AbstractString`, and external packages define
-    additional `AbstractString` subtypes (e.g. for other encodings).  If you define a function expecting
+    additional `AbstractString` subtypes (e.g. for other encodings). If you define a function expecting
     a string argument, you should declare the type as `AbstractString` in order to accept any string
     type.
   * Like C and Java, but unlike most dynamic languages, Julia has a first-class type for representing
@@ -244,7 +244,7 @@ happens to contain only a single character. In Julia these are very different th
 Range indexing makes a copy of the selected part of the original string.
 Alternatively, it is possible to create a view into a string using the type [`SubString`](@ref).
 More simply, using the [`@views`](@ref) macro on a block of code converts all string slices
-into substrings.  For example:
+into substrings. For example:
 
 ```jldoctest
 julia> str = "long string"
@@ -402,7 +402,7 @@ julia> collect(eachindex(s))
 ```
 
 To access the raw code units (bytes for UTF-8) of the encoding, you can use the [`codeunit(s,i)`](@ref)
-function, where the index `i` runs consecutively from `1` to [`ncodeunits(s)`](@ref).  The [`codeunits(s)`](@ref)
+function, where the index `i` runs consecutively from `1` to [`ncodeunits(s)`](@ref). The [`codeunits(s)`](@ref)
 function returns an `AbstractVector{UInt8}` wrapper that lets you access these raw codeunits (bytes) as an array.
 
 Strings in Julia can contain invalid UTF-8 code unit sequences. This convention allows to
@@ -957,11 +957,11 @@ i   Do case-insensitive pattern matching.
     that would cross the Unicode rules/non-Unicode rules boundary
     (ords 255/256) will not succeed.
 
-m   Treat string as multiple lines.  That is, change "^" and "$"
+m   Treat string as multiple lines. That is, change "^" and "$"
     from matching the start or end of the string to matching the
     start or end of any line anywhere within the string.
 
-s   Treat string as single line.  That is, change "." to match any
+s   Treat string as single line. That is, change "." to match any
     character whatsoever, even a newline, which normally it would
     not match.
 
@@ -1012,7 +1012,7 @@ ERROR: syntax: invalid escape sequence
 Triple-quoted regex strings, of the form `r"""..."""`, are also supported (and may be convenient
 for regular expressions containing quotation marks or newlines).
 
-The `Regex()` constructor may be used to create a valid regex string programmatically.  This permits using the contents of string variables and other string operations when constructing the regex string. Any of the regex codes above can be used within the single string argument to `Regex()`. Here are some examples:
+The `Regex()` constructor may be used to create a valid regex string programmatically. This permits using the contents of string variables and other string operations when constructing the regex string. Any of the regex codes above can be used within the single string argument to `Regex()`. Here are some examples:
 
 ```jldoctest
 julia> using Dates
@@ -1142,7 +1142,7 @@ some confusion regarding the matter.
 
 Version numbers can easily be expressed with non-standard string literals of the form [`v"..."`](@ref @v_str).
 Version number literals create [`VersionNumber`](@ref) objects which follow the
-specifications of [semantic versioning](https://semver.org/),
+specifications of [semantic versioning 2.0.0-rc2](https://semver.org/spec/v2.0.0-rc.2.html),
 and therefore are composed of major, minor and patch numeric values, followed by pre-release and
 build alphanumeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
 `0`, minor version `2`, patch version `1`, pre-release `rc1` and build `win64`. When entering
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index 92516623724b1..b9740102faea7 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -116,7 +116,7 @@ end
 
 Julia Base uses this convention throughout and contains examples of functions
 with both copying and modifying forms (e.g., [`sort`](@ref) and [`sort!`](@ref)), and others
-which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)).  It
+which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)). It
 is typical for such functions to also return the modified array for convenience.
 
 Functions related to IO or making use of random number generators (RNG) are notable exceptions:
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 0d7b2795401f1..0b1d380c2c601 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -1102,7 +1102,7 @@ Array{Vector{T}, 1} where T
 Type `T1` defines a 1-dimensional array of 1-dimensional arrays; each
 of the inner arrays consists of objects of the same type, but this type may vary from one inner array to the next.
 On the other hand, type `T2` defines a 1-dimensional array of 1-dimensional arrays all of whose inner arrays must have the
-same type.  Note that `T2` is an abstract type, e.g., `Array{Array{Int,1},1} <: T2`, whereas `T1` is a concrete type. As a consequence, `T1` can be constructed with a zero-argument constructor `a=T1()` but `T2` cannot.
+same type. Note that `T2` is an abstract type, e.g., `Array{Array{Int,1},1} <: T2`, whereas `T1` is a concrete type. As a consequence, `T1` can be constructed with a zero-argument constructor `a=T1()` but `T2` cannot.
 
 There is a convenient syntax for naming such types, similar to the short form of function
 definition syntax:
@@ -1424,8 +1424,8 @@ Closest candidates are:
 
 ## [Custom pretty-printing](@id man-custom-pretty-printing)
 
-Often, one wants to customize how instances of a type are displayed.  This is accomplished by
-overloading the [`show`](@ref) function.  For example, suppose we define a type to represent
+Often, one wants to customize how instances of a type are displayed. This is accomplished by
+overloading the [`show`](@ref) function. For example, suppose we define a type to represent
 complex numbers in polar form:
 
 ```jldoctest polartype
@@ -1480,13 +1480,13 @@ julia> [Polar(3, 4.0), Polar(4.0,5.3)]
  4.0 * exp(5.3im)
 ```
 
-where the single-line `show(io, z)` form is still used for an array of `Polar` values.   Technically,
-the REPL calls `display(z)` to display the result of executing a line, which defaults to `show(stdout, MIME("text/plain"), z)`,
-which in turn defaults to `show(stdout, z)`, but you should *not* define new [`display`](@ref)
+where the single-line `show(io, z)` form is still used for an array of `Polar` values. Technically,
+the REPL calls `display(z)` to display the result `z` of executing a line, which defaults to `show(io, MIME("text/plain"), z)` (where `io` is an [`IOContext`](@ref) wrapper around [`stdout`](@ref)),
+which in turn defaults to `show(io, z)`, but you should *not* define new [`display`](@ref)
 methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref Multimedia-I/O)).
 
 Moreover, you can also define `show` methods for other MIME types in order to enable richer display
-(HTML, images, etcetera) of objects in environments that support this (e.g. IJulia).   For example,
+(HTML, images, etcetera) of objects in environments that support this (e.g. IJulia). For example,
 we can define formatted HTML display of `Polar` objects, with superscripts and italics, via:
 
 ```jldoctest polartype
@@ -1508,9 +1508,9 @@ julia> show(stdout, "text/html", Polar(3.0,4.0))
 ```
 
 As a rule of thumb, the single-line `show` method should print a valid Julia expression for creating
-the shown object.  When this `show` method contains infix operators, such as the multiplication
+the shown object. When this `show` method contains infix operators, such as the multiplication
 operator (`*`) in our single-line `show` method for `Polar` above, it may not parse correctly when
-printed as part of another object.  To see this, consider the expression object (see [Program
+printed as part of another object. To see this, consider the expression object (see [Program
 representation](@ref)) which takes the square of a specific instance of our `Polar` type:
 
 ```jldoctest polartype
@@ -1524,7 +1524,7 @@ julia> print(:($a^2))
 
 Because the operator `^` has higher precedence than `*` (see [Operator Precedence and Associativity](@ref)), this
 output does not faithfully represent the expression `a ^ 2` which should be equal to `(3.0 *
-exp(4.0im)) ^ 2`.  To solve this issue, we must make a custom method for `Base.show_unquoted(io::IO,
+exp(4.0im)) ^ 2`. To solve this issue, we must make a custom method for `Base.show_unquoted(io::IO,
 z::Polar, indent::Int, precedence::Int)`, which is called internally by the expression object when
 printing:
 
@@ -1544,7 +1544,7 @@ julia> :($a^2)
 ```
 
 The method defined above adds parentheses around the call to `show` when the precedence of the
-calling operator is higher than or equal to the precedence of multiplication.  This check allows
+calling operator is higher than or equal to the precedence of multiplication. This check allows
 expressions which parse correctly without the parentheses (such as `:($a + 2)` and `:($a == 2)`) to
 omit them when printing:
 
@@ -1587,11 +1587,24 @@ julia> [Polar(3, 4.0) Polar(4.0,5.3)]
 See the [`IOContext`](@ref) documentation for a list of common properties which can be used
 to adjust printing.
 
+### Output-function summary
+
+Here is a brief summary of the different output functions in Julia and how they are related.
+Most new types should only need to define `show` methods, if anything.
+
+* [`display(x)`](@ref) tells the current environment to display `x` in whatever way it thinks best. (This might even be a graphical display in something like a Jupyter or Pluto notebook.) By default (e.g. in scripts or in the text REPL), it calls `show(io, "text/plain", x)`, or equivalently `show(io, MIME"text/plain"(), x)`, for an appropriate `io` stream. (In the REPL, `io` is an [`IOContext`](@ref) wrapper around [`stdout`](@ref).) The REPL uses `display` to output the result of an evaluated expression.
+* The 3-argument [`show(io, ::MIME"text/plain", x)`](@ref) method performs verbose pretty-printing of `x`. By default (if no 3-argument method is defined for `typeof(x)`), it calls the 2-argument `show(io, x)`. It is called by the 2-argument `repr("text/plain", x)`. Other 3-argument `show` methods can be defined for additional MIME types as discussed above, to enable richer display of `x` in some interactive environments.
+* The 2-argument [`show(io, x)`](@ref) is the default simple text representation of `x`. It is called by the 1-argument [`repr(x)`](@ref), and is typically the format you might employ to input `x` into Julia. The 1-argument `show(x)` calls `show(stdout, x)`.
+* [`print(io, x)`](@ref) by default calls `show(io, x)`, but a few types have a distinct `print` format — most notably, when `x` is a string, `print` outputs the raw text whereas `show` outputs an escaped string enclosed in quotation marks. The 1-argument `print(x)` calls `print(stdout, x)`. `print` is also called by [`string(x)`](@ref).
+* [`write(io, x)`](@ref), if it is defined (it generally has *no* default definition for new types), writes a "raw" binary representation of `x` to `io`, e.g. an `x::Int32` will be written as 4 bytes.
+
+It is also helpful to be familiar with the metadata that can be attached to an `io` stream by an [`IOContext`](@ref) wrapper. For example, the REPL sets the `:limit => true` flag from `display` for an evaluated expression, in order to limit the output to fit in the terminal; you can query this flag with `get(io, :limit, false)`. And when displaying an object contained within, for example, a multi-column matrix, the `:compact => true` flag could be set, which you can query with `get(io, :compact, false)`.
+
 ## "Value types"
 
 In Julia, you can't dispatch on a *value* such as `true` or `false`. However, you can dispatch
 on parametric types, and Julia allows you to include "plain bits" values (Types, Symbols, Integers,
-floating-point numbers, tuples, etc.) as type parameters.  A common example is the dimensionality
+floating-point numbers, tuples, etc.) as type parameters. A common example is the dimensionality
 parameter in `Array{T,N}`, where `T` is a type (e.g., [`Float64`](@ref)) but `N` is just an `Int`.
 
 You can create your own custom types that take values as parameters, and use them to control dispatch
@@ -1609,7 +1622,7 @@ julia> Val(x) = Val{x}()
 Val
 ```
 
-There is no more to the implementation of `Val` than this.  Some functions in Julia's standard
+There is no more to the implementation of `Val` than this. Some functions in Julia's standard
 library accept `Val` instances as arguments, and you can also use it to write your own functions.
  For example:
 
@@ -1632,7 +1645,7 @@ a *type*, i.e., use `foo(Val(:bar))` rather than `foo(Val{:bar})`.
 
 It's worth noting that it's extremely easy to mis-use parametric "value" types, including `Val`;
 in unfavorable cases, you can easily end up making the performance of your code much *worse*.
- In particular, you would never want to write actual code as illustrated above.  For more information
+ In particular, you would never want to write actual code as illustrated above. For more information
 about the proper (and improper) uses of `Val`, please read [the more extensive discussion in the performance tips](@ref man-performance-value-type).
 
 [^1]: "Small" is defined by the `max_union_splitting` configuration, which currently defaults to 4.
diff --git a/doc/src/manual/unicode-input.md b/doc/src/manual/unicode-input.md
index db1bd69c2e480..eba970c051f1e 100644
--- a/doc/src/manual/unicode-input.md
+++ b/doc/src/manual/unicode-input.md
@@ -2,7 +2,7 @@
 
 The following table lists Unicode characters that can be entered via
 tab completion of LaTeX-like abbreviations in the Julia REPL (and
-in various other editing environments).  You can also get information on how to
+in various other editing environments). You can also get information on how to
 type a symbol by entering it in the REPL help, i.e. by typing `?` and then
 entering the symbol in the REPL (e.g., by copy-paste from somewhere you saw
 the symbol).
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index e921ed8931e84..64a12ea88c7dd 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -16,17 +16,58 @@ introduce a "soft scope", which affects whether
 [shadowing](https://en.wikipedia.org/wiki/Variable_shadowing)
 a global variable by the same name is allowed or not.
 
-### [Scope constructs](@id man-scope-table)
+!!! info "Summary"
+    Variables defined in global scope may be undefined in inner local scopes,
+    depending on where the code is run, in order to balance safety and convenience.
+    The hard and soft local scoping rules define the interplay between global and local variables.
+
+    However, variables defined only in local scope behave consistently in all contexts.
+    If the variable is already defined, it will be reused. If the variable is not defined,
+    it will be made available to the current and inner scopes (but not outer scopes).
+
+!!! tip "A Common Confusion"
+    If you run into an unexpectedly undefined variable,
+
+    ```julia
+    # Print the numbers 1 through 5
+    i = 0
+    while i < 5
+        i += 1     # ERROR: UndefVarError: `i` not defined
+        println(i)
+    end
+    ```
+
+    a simple fix is to change all global variable definitions into local definitions
+    by wrapping the code in a `let` block or `function`.
+
+    ```julia
+    # Print the numbers 1 through 5
+    let i = 0
+        while i < 5
+            i += 1     # Now outer `i` is defined in the inner scope of the while loop
+            println(i)
+        end
+    end
+    ```
+
+    This is a common source of confusion when writing procedural scripts,
+    but it becomes a non-issue if code is moved inside functions
+    or executed interactively in the REPL.
+
+    See also the [`global`](@ref) and [`local`](@ref) keywords
+    to explicitly achieve any desired scoping behavior.
+
+### [Scope Constructs](@id man-scope-table)
 
 The constructs introducing scope blocks are:
 
-| Construct | Scope type | Allowed within |
-|:----------|:-----------|:---------------|
+| Construct | Scope Type Introduced | Scope Types Able to Contain Construct |
+|:----------|:----------------------|:--------------------------------------|
 | [`module`](@ref), [`baremodule`](@ref) | global | global |
 | [`struct`](@ref) | local (soft) | global |
-| [`for`](@ref), [`while`](@ref), [`try`](@ref try) | local (soft) | global, local |
 | [`macro`](@ref) | local (hard) | global |
-| functions, [`do`](@ref) blocks, [`let`](@ref) blocks, comprehensions, generators | local (hard) | global, local |
+| [`for`](@ref), [`while`](@ref), [`try`](@ref try) | local (soft) | global, local |
+| [`function`](@ref), [`do`](@ref), [`let`](@ref), [comprehensions](@ref man-comprehensions), [generators](@ref man-generators) | local (hard) | global, local |
 
 Notably missing from this table are
 [begin blocks](@ref man-compound-expressions) and [if blocks](@ref man-conditional-evaluation)
@@ -702,7 +743,7 @@ ERROR: invalid redefinition of constant x
 julia> const y = 1.0
 1.0
 
-julia> y = 2.0
+julia> const y = 2.0
 WARNING: redefinition of constant y. This may fail, cause incorrect answers, or produce other errors.
 2.0
 ```
@@ -714,34 +755,13 @@ julia> const z = 100
 julia> z = 100
 100
 ```
-The last rule applies to immutable objects even if the variable binding would change, e.g.:
-```julia-repl
-julia> const s1 = "1"
-"1"
-
-julia> s2 = "1"
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x00000000132c9638
- Ptr{UInt8} @0x0000000013dd3d18
-
-julia> s1 = s2
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x0000000013dd3d18
- Ptr{UInt8} @0x0000000013dd3d18
-```
-However, for mutable objects the warning is printed as expected:
+* if an assignment would change the mutable object to which the variable points (regardless of whether those two objects are deeply equal), a warning is printed:
 ```jldoctest
 julia> const a = [1]
 1-element Vector{Int64}:
  1
 
-julia> a = [1]
+julia> const a = [1]
 WARNING: redefinition of constant a. This may fail, cause incorrect answers, or produce other errors.
 1-element Vector{Int64}:
  1
@@ -762,7 +782,7 @@ f (generic function with 1 method)
 julia> f()
 1
 
-julia> x = 2
+julia> const x = 2
 WARNING: redefinition of constant x. This may fail, cause incorrect answers, or produce other errors.
 2
 
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 75c2163896d9c..ad2c60a029032 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -110,7 +110,7 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some
 can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f`
 will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed
 as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron`
-to define `⊗` as an infix Kronecker product).  Operators can also be suffixed with modifying marks,
+to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks,
 primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`.
 A space is required between an operator that ends with a subscript/superscript letter and a subsequent
 variable name. For example, if `+ᵃ` is an operator, then `+ᵃx` must be written as `+ᵃ x` to distinguish
@@ -156,7 +156,7 @@ The minus sign `−` (U+2212) is treated as equivalent to the hyphen-minus sign
 
 An assignment `variable = value` "binds" the name `variable` to the `value` computed
 on the right-hand side, and the whole assignment is treated by Julia as an expression
-equal to the right-hand-side `value`.  This means that assignments can be *chained*
+equal to the right-hand-side `value`. This means that assignments can be *chained*
 (the same `value` assigned to multiple variables with `variable1 = variable2 = value`)
 or used in other expressions, and is also why their result is shown in the REPL as
 the value of the right-hand side.  (In general, the REPL displays the value of whatever
@@ -175,7 +175,7 @@ julia> b
 ```
 
 A common confusion is the distinction between *assignment* (giving a new "name" to a value)
-and *mutation* (changing a value).  If you run `a = 2` followed by `a = 3`, you have changed
+and *mutation* (changing a value). If you run `a = 2` followed by `a = 3`, you have changed
 the "name" `a` to refer to a new value `3` … you haven't changed the number `2`, so `2+2`
 will still give `4` and not `6`!   This distinction becomes more clear when dealing with
 *mutable* types like [arrays](@ref lib-arrays), whose contents *can* be changed:
@@ -213,11 +213,11 @@ julia> b   # b refers to the original array object, which has been mutated
   3
 ```
 That is, `a[i] = value` (an alias for [`setindex!`](@ref)) *mutates* an existing array object
-in memory, accessible via either `a` or `b`.  Subsequently setting `a = 3.14159`
+in memory, accessible via either `a` or `b`. Subsequently setting `a = 3.14159`
 does not change this array, it simply binds `a` to a different object; the array is still
 accessible via `b`. Another common syntax to mutate an existing object is
 `a.field = value` (an alias for [`setproperty!`](@ref)), which can be used to change
-a [`mutable struct`](@ref).  There is also mutation via dot assignment, for example
+a [`mutable struct`](@ref). There is also mutation via dot assignment, for example
 `b .= 5:7` (which mutates our array `b` in-place to contain `[5,6,7]`), as part of Julia's
 [vectorized "dot" syntax](@ref man-dot-operators).
 
diff --git a/doc/src/manual/workflow-tips.md b/doc/src/manual/workflow-tips.md
index c3bbbbae8146f..bfc526edbf8dd 100644
--- a/doc/src/manual/workflow-tips.md
+++ b/doc/src/manual/workflow-tips.md
@@ -114,5 +114,5 @@ the following modifications:
    ```
 
    You can iteratively modify the code in MyPkg in your editor and re-run the
-   tests with `include("runtests.jl")`.  You generally should not need to restart
+   tests with `include("runtests.jl")`. You generally should not need to restart
    your Julia session to see the changes take effect (subject to a few [limitations](https://timholy.github.io/Revise.jl/stable/limitations/)).
diff --git a/doc/src/tutorials/creating-packages.md b/doc/src/tutorials/creating-packages.md
deleted file mode 100644
index bd5c9b32d34b3..0000000000000
--- a/doc/src/tutorials/creating-packages.md
+++ /dev/null
@@ -1,647 +0,0 @@
-# [Creating Packages](@id creating-packages-tutorial)
-
-## Generating files for a package
-
-!!! note
-    The [PkgTemplates](https://github.com/invenia/PkgTemplates.jl) package offers an easy, repeatable, and
-    customizable way to generate the files for a new package. It can also generate files needed for Documentation, CI, etc.
-    We recommend that you use PkgTemplates for creating
-    new packages instead of using the minimal `pkg> generate` functionality described below.
-
-To generate the bare minimum files for a new package, use `pkg> generate`.
-
-```julia-repl
-(@v1.8) pkg> generate HelloWorld
-```
-
-This creates a new project `HelloWorld` in a subdirectory by the same name, with the following files (visualized with the external [`tree` command](https://linux.die.net/man/1/tree)):
-
-```julia-repl
-shell> tree HelloWorld/
-HelloWorld/
-├── Project.toml
-└── src
-    └── HelloWorld.jl
-
-2 directories, 2 files
-```
-
-The `Project.toml` file contains the name of the package, its unique UUID, its version, the authors and potential dependencies:
-
-```toml
-name = "HelloWorld"
-uuid = "b4cd1eb8-1e24-11e8-3319-93036a3eb9f3"
-version = "0.1.0"
-authors = ["Some One <someone@email.com>"]
-
-[deps]
-```
-
-The content of `src/HelloWorld.jl` is:
-
-```julia
-module HelloWorld
-
-greet() = print("Hello World!")
-
-end # module
-```
-
-We can now activate the project by using the path to the directory where it is installed, and load the package:
-
-```julia-repl
-pkg> activate ./HelloWorld
-
-julia> import HelloWorld
-
-julia> HelloWorld.greet()
-Hello World!
-```
-
-For the rest of the tutorial we enter inside the directory of the project, for convenience:
-
-```julia-repl
-julia> cd("HelloWorld")
-```
-
-## Adding dependencies to the project
-
-Let’s say we want to use the standard library package `Random` and the registered package `JSON` in our project.
-We simply `add` these packages (note how the prompt now shows the name of the newly generated project,
-since we `activate`d it):
-
-```julia-repl
-(HelloWorld) pkg> add Random JSON
-   Resolving package versions...
-    Updating `~/HelloWorld/Project.toml`
-  [682c06a0] + JSON v0.21.3
-  [9a3f8284] + Random
-    Updating `~/HelloWorld/Manifest.toml`
-  [682c06a0] + JSON v0.21.3
-  [69de0a69] + Parsers v2.4.0
-  [ade2ca70] + Dates
- ...
-```
-
-Both `Random` and `JSON` got added to the project’s `Project.toml` file, and the resulting dependencies got added to the `Manifest.toml` file.
-The resolver has installed each package with the highest possible version, while still respecting the compatibility that each package enforces on its dependencies.
-
-We can now use both `Random` and `JSON` in our project. Changing `src/HelloWorld.jl` to
-
-```julia
-module HelloWorld
-
-import Random
-import JSON
-
-greet() = print("Hello World!")
-greet_alien() = print("Hello ", Random.randstring(8))
-
-end # module
-```
-
-and reloading the package, the new `greet_alien` function that uses `Random` can be called:
-
-```julia-repl
-julia> HelloWorld.greet_alien()
-Hello aT157rHV
-```
-
-## Defining a public API
-
-If you want your package to be useful to other packages and you want folks to be able to
-easily update to newer version of your package when they come out, it is important to
-document what behavior will stay consistent across updates.
-
-Unless you note otherwise, the public API of your package is defined as all the behavior you
-describe about public symbols. A public symbol is a symbol that is exported from your
-package with the `export` keyword or marked as public with the `public` keyword. When you
-change the behavior of something that was previously public so that the new
-version no longer conforms to the specifications provided in the old version, you should
-adjust your package version number according to [Julia's variant on SemVer](#Version-specifier-format).
-If you would like to include a symbol in your public API without exporting it into the
-global namespace of folks who call `using YourPackage`, you should mark that symbol as
-public with `public that_symbol`. Symbols marked as public with the `public` keyword are
-just as public as those marked as public with the `export` keyword, but when folks call
-`using YourPackage`, they will still have to qualify access to those symbols with
-`YourPackage.that_symbol`.
-
-Let's say we would like our `greet` function to be part of the public API, but not the
-`greet_alien` function. We could the write the following and release it as version `1.0.0`.
-
-```julia
-module HelloWorld
-
-export greet
-
-import Random
-import JSON
-
-"Writes a friendly message."
-greet() = print("Hello World!")
-
-"Greet an alien by a randomly generated name."
-greet_alien() = print("Hello ", Random.randstring(8))
-
-end # module
-```
-
-Then, if we change `greet` to
-
-```julia
-"Writes a friendly message that is exactly three words long."
-greet() = print("Hello Lovely World!")
-```
-
-We would release the new version as `1.1.0`. This is not breaking
-because the new implementation conforms to the old documentation, but
-it does add a new feature, that the message must be three words long.
-
-Later, we may wish to change `greet_alien` to
-
-```julia
-"Greet an alien by the name of \"Zork\"."
-greet_alien() = print("Hello Zork")
-```
-
-And also export it by changing
-
-```julia
-export greet
-```
-
-to
-
-```julia
-export greet, greet_alien
-```
-
-We should release this new version as `1.2.0` because it adds a new feature
-`greet_alien` to the public API. Even though `greet_alien` was documented before
-and the new version does not conform to the old documentation, this is not breaking
-because the old documentation was not attached to a symbol that was exported
-at the time so that documentation does not apply across released versions.
-
-However, if we now wish to change `greet` to
-
-```julia
-"Writes a friendly message that is exactly four words long."
-greet() = print("Hello very lovely world")
-```
-
-we would need to release the new version as `2.0.0`. In version `1.1.0`, we specified that
-the greeting would be three words long, and because `greet` was exported, that description
-also applies to all future versions until the next breaking release. Because this new
-version does not conform to the old specification, it must be tagged as a breaking change.
-
-Please note that version numbers are free and unlimited. It is okay to use lots of them
-(e.g. version `6.62.8`).
-
-## Adding a build step to the package
-
-The build step is executed the first time a package is installed or when explicitly invoked with `build`.
-A package is built by executing the file `deps/build.jl`.
-
-```julia-repl
-julia> mkpath("deps");
-
-julia> write("deps/build.jl",
-             """
-             println("I am being built...")
-             """);
-
-(HelloWorld) pkg> build
-  Building HelloWorld → `deps/build.log`
- Resolving package versions...
-
-julia> print(readchomp("deps/build.log"))
-I am being built...
-```
-
-If the build step fails, the output of the build step is printed to the console
-
-```julia-repl
-julia> write("deps/build.jl",
-             """
-             error("Ooops")
-             """);
-
-(HelloWorld) pkg> build
-    Building HelloWorld → `~/HelloWorld/deps/build.log`
-ERROR: Error building `HelloWorld`:
-ERROR: LoadError: Ooops
-Stacktrace:
- [1] error(s::String)
-   @ Base ./error.jl:35
- [2] top-level scope
-   @ ~/HelloWorld/deps/build.jl:1
- [3] include(fname::String)
-   @ Base.MainInclude ./client.jl:476
- [4] top-level scope
-   @ none:5
-in expression starting at /home/kc/HelloWorld/deps/build.jl:1
-```
-
-!!! warning
-    A build step should generally not create or modify any files in the package directory. If you need to store some files
-    from the build step, use the [Scratch.jl](https://github.com/JuliaPackaging/Scratch.jl) package.
-
-## [Adding tests to the package](@id adding-tests-to-packages)
-
-When a package is tested the file `test/runtests.jl` is executed:
-
-```julia-repl
-julia> mkpath("test");
-
-julia> write("test/runtests.jl",
-             """
-             println("Testing...")
-             """);
-
-(HelloWorld) pkg> test
-   Testing HelloWorld
- Resolving package versions...
-Testing...
-   Testing HelloWorld tests passed
-```
-
-Tests are run in a new Julia process, where the package itself, and any
-test-specific dependencies, are available, see below.
-
-
-!!! warning
-    Tests should generally not create or modify any files in the package directory. If you need to store some files
-    from the build step, use the [Scratch.jl](https://github.com/JuliaPackaging/Scratch.jl) package.
-
-### Test-specific dependencies
-
-There are two ways of adding test-specific dependencies (dependencies that are not dependencies of the package but will still be available to
-load when the package is tested).
-
-#### `target` based test specific dependencies
-
-Using this method of adding test-specific dependencies, the packages are added under an `[extras]` section and to a test target,
-e.g. to add `Markdown` and `Test` as test dependencies, add the following to the `Project.toml` file:
-
-```toml
-[extras]
-Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Markdown", "Test"]
-```
-
-Note that the only supported targets are `test` and `build`, the latter of which (not recommended) can be used
-for any `deps/build.jl` scripts.
-
-#### Alternative approach: `test/Project.toml` file test specific dependencies
-
-!!! note
-    The exact interaction between `Project.toml`, `test/Project.toml` and their corresponding
-    `Manifest.toml`s are not fully worked out and may be subject to change in future versions.
-    The older method of adding test-specific dependencies, described in the previous section,
-    will therefore be supported throughout all Julia 1.X releases.
-
-In Julia 1.2 and later test dependencies can be declared in `test/Project.toml`. When running
-tests, Pkg will automatically merge this and the package Projects to create the test environment.
-
-!!! note
-    If no `test/Project.toml` exists Pkg will use the `target` based test specific dependencies.
-
-To add a test-specific dependency, i.e. a dependency that is available only when testing,
-it is thus enough to add this dependency to the `test/Project.toml` project. This can be
-done from the Pkg REPL by activating this environment, and then use `add` as one normally
-does. Let's add the `Test` standard library as a test dependency:
-
-```julia-repl
-(HelloWorld) pkg> activate ./test
-[ Info: activating environment at `~/HelloWorld/test/Project.toml`.
-
-(test) pkg> add Test
- Resolving package versions...
-  Updating `~/HelloWorld/test/Project.toml`
-  [8dfed614] + Test
-  Updating `~/HelloWorld/test/Manifest.toml`
-  [...]
-```
-
-We can now use `Test` in the test script and we can see that it gets installed when testing:
-
-```julia-repl
-julia> write("test/runtests.jl",
-             """
-             using Test
-             @test 1 == 1
-             """);
-
-(test) pkg> activate .
-
-(HelloWorld) pkg> test
-   Testing HelloWorld
- Resolving package versions...
-  Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Project.toml`
-  [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`]
-  [8dfed614] + Test
-  Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Manifest.toml`
-  [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`]
-   Testing HelloWorld tests passed```
-```
-
-## Compatibility on dependencies
-
-Every dependency should in general have a compatibility constraint on it.
-This is an important topic so there is a chapter in the package docs about it:
-[Compatibility](https://pkgdocs.julialang.org/v1/compatibility).
-
-## Weak dependencies
-
-!!! note
-    This is a somewhat advanced usage of Pkg which can be skipped for people new to Julia and Julia packages.
-
-!!! compat
-    The described feature requires Julia 1.9+.
-
-A weak dependency is a dependency that will not automatically install when the package is installed but
-you can still control what versions of that package are allowed to be installed by setting compatibility on it.
-These are listed in the project file under the `[weakdeps]` section:
-
-```toml
-[weakdeps]
-SomePackage = "b3785f31-9d33-4cdf-bc73-f646780f1739"
-
-[compat]
-SomePackage = "1.2"
-```
-
-The current usage of this is almost solely limited to "extensions" which is described in the next section.
-
-## Conditional loading of code in packages (Extensions)
-
-!!! note
-    This is a somewhat advanced usage of Pkg which can be skipped for people new to Julia and Julia packages.
-
-!!! compat
-    The described feature requires Julia 1.9+.
-
-Sometimes one wants to make two or more packages work well together, but may be reluctant (perhaps due to increased load times) to make one an unconditional dependency of the other.
-A package *extension* is a module in a file (similar to a package) that is automatically loaded when *some other set of packages* are
-loaded into the Julia session. This is very similar to functionality that the external package
-[Requires.jl](https://github.com/JuliaPackaging/Requires.jl) provides, but which is now available directly through Julia,
-and provides added benefits such as being able to precompile the extension.
-
-### Code structure
-
-A useful application of extensions could be for a plotting package that should be able to plot
-objects from a wide variety of different Julia packages.
-Adding all those different Julia packages as dependencies of the plotting package
-could be expensive since they would end up getting loaded even if they were never used.
-Instead, the code required to plot objects for specific packages can be put into separate files
-(extensions) and these are loaded only when the packages that define the type(s) we want to plot
-are loaded.
-
-Below is an example of how the code can be structured for a use case in which a
-`Plotting` package wants to be able to display objects defined in the external package `Contour`.
-The file and folder structure shown below is found in the `Plotting` package.
-
- `Project.toml`:
- ```toml
-name = "Plotting"
-version = "0.1.0"
-uuid = "..."
-
-[weakdeps]
-Contour = "d38c429a-6771-53c6-b99e-75d170b6e991"
-
-[extensions]
-# name of extension to the left
-# extension dependencies required to load the extension to the right
-# use a list for multiple extension dependencies
-PlottingContourExt = "Contour"
-
-[compat]
-Contour = "0.6.2"
-```
-
-`src/Plotting.jl`:
-```julia
-module Plotting
-
-function plot(x::Vector)
-    # Some functionality for plotting a vector here
-end
-
-end # module
-```
-
-`ext/PlottingContourExt.jl` (can also be in `ext/PlottingContourExt/PlottingContourExt.jl`):
-```julia
-module PlottingContourExt # Should be same name as the file (just like a normal package)
-
-using Plotting, Contour
-
-function Plotting.plot(c::Contour.ContourCollection)
-    # Some functionality for plotting a contour here
-end
-
-end # module
-```
-
-Extensions can have any arbitrary name (here `PlottingContourExt`), but using something similar to the format of
-this example that makes the extended functionality and dependency of the extension clear is likely a good idea.
-
-!!! compat
-    Often you will put the extension dependencies into the `test` target so they are loaded when running e.g. `Pkg.test()`. On earlier Julia versions
-    this requires you to also put the package in the `[extras]` section. This is unfortunate but the project verifier on older Julia versions will
-    complain if this is not done.
-
-!!! note
-    If you use a manifest generated by a Julia version that does not know about extensions with a Julia version that does
-    know about them, the extensions will not load. This is because the manifest lacks some information that tells Julia
-    when it should load these packages. So make sure you use a manifest generated at least the Julia version you are using.
-
-### Behavior of extensions
-
-A user that depends only on `Plotting` will not pay the cost of the "extension" inside the `PlottingContourExt` module.
-It is only when the `Contour` package actually gets loaded that the `PlottingContourExt` extension is loaded too
-and provides the new functionality.
-
-In our example, the new functionality is an additional _method_, which we add to an existing _function_ from the parent package `Plotting`.
-Implementing such methods is among the most standard use cases of package extensions.
-Within the parent package, the function to extend can even be defined with zero methods, as follows:
-
-```julia
-function plot end
-```
-
-!!! note
-    If one considers `PlottingContourExt` as a completely separate package, it could be argued that defining `Plotting.plot(c::Contour.ContourCollection)` is
-    [type piracy](@ref avoid-type-piracy) since `PlottingContourExt` _owns_ neither the function `Plotting.plot` nor the type `Contour.ContourCollection`.
-    However, for extensions, it is ok to assume that the extension owns the functions in its parent package.
-
-In other situations, one may need to define new symbols in the extension (types, structs, functions, etc.) instead of reusing those from the parent package.
-Such symbols are created in a separate module corresponding to the extension, namely `PlottingContourExt`, and thus not in `Plotting` itself.
-If extension symbols are needed in the parent package, one must call `Base.get_extension` to retrieve them.
-Here is an example showing how a custom type defined in `PlottingContourExt` can be accessed in `Plotting`:
-
-```julia
-ext = Base.get_extension(@__MODULE__, :PlottingContourExt)
-if !isnothing(ext)
-    ContourPlotType = ext.ContourPlotType
-end
-```
-
-On the other hand, accessing extension symbols from a third-party package (i.e. not the parent) is not a recommended practice at the moment.
-
-### Backwards compatibility
-
-This section discusses various methods for using extensions on Julia versions that support them,
-while simultaneously providing similar functionality on older Julia versions.
-
-#### Requires.jl
-
-This section is relevant if you are currently using Requires.jl but want to transition to using extensions (while still having Requires be used on Julia versions that do not support extensions).
-This is done by making the following changes (using the example above):
-
-- Add the following to the package file. This makes it so that Requires.jl loads and inserts the
-  callback only when extensions are not supported
-  ```julia
-  # This symbol is only defined on Julia versions that support extensions
-  if !isdefined(Base, :get_extension)
-  using Requires
-  end
-
-  @static if !isdefined(Base, :get_extension)
-  function __init__()
-      @require Contour = "d38c429a-6771-53c6-b99e-75d170b6e991" include("../ext/PlottingContourExt.jl")
-  end
-  end
-  ```
-  or if you have other things in your `__init__()` function:
-  ```julia
-  if !isdefined(Base, :get_extension)
-  using Requires
-  end
-
-  function __init__()
-      # Other init functionality here
-
-      @static if !isdefined(Base, :get_extension)
-          @require Contour = "d38c429a-6771-53c6-b99e-75d170b6e991" include("../ext/PlottingContourExt.jl")
-      end
-  end
-  ```
-- Make the following change in the conditionally-loaded code:
-  ```julia
-  isdefined(Base, :get_extension) ? (using Contour) : (using ..Contour)
-  ```
-- Add `Requires` to `[weakdeps]` in your `Project.toml` file, so that it is listed in both `[deps]` and `[weakdeps]`.
-  Julia 1.9+ knows to not install it as a regular dependency, whereas earlier versions will consider it a dependency.
-
-The package should now work with Requires.jl on Julia versions before extensions were introduced
-and with extensions on more recent Julia versions.
-
-####  Transition from normal dependency to extension
-
-This section is relevant if you have a normal dependency that you want to transition be an extension (while still having the dependency be a normal dependency on Julia versions that do not support extensions).
-This is done by making the following changes (using the example above):
-
-- Make sure that the package is **both** in the `[deps]` and `[weakdeps]` section. Newer Julia versions will ignore dependencies in `[deps]` that are also in `[weakdeps]`.
-- Add the following to your main package file (typically at the bottom):
-  ```julia
-  if !isdefined(Base, :get_extension)
-    include("../ext/PlottingContourExt.jl")
-  end
-  ```
-
-#### Using an extension while supporting older Julia versions
-
-In the case where one wants to use an extension (without worrying about the
-feature of the extension being available on older Julia versions) while still
-supporting older Julia versions the packages under `[weakdeps]` should be
-duplicated into `[extras]`. This is an unfortunate duplication, but without
-doing this the project verifier under older Julia versions will throw an error
-if it finds packages under `[compat]` that is not listed in `[extras]`.
-
-## Package naming guidelines
-
-Package names should be sensible to most Julia users, *even to those who are not domain experts*.
-The following guidelines apply to the `General` registry but may be useful for other package
-registries as well.
-
-Since the `General` registry belongs to the entire community, people may have opinions about
-your package name when you publish it, especially if it's ambiguous or can be confused with
-something other than what it is. Usually, you will then get suggestions for a new name that
-may fit your package better.
-
-1. Avoid jargon. In particular, avoid acronyms unless there is minimal possibility of confusion.
-
-     * It's ok to say `USA` if you're talking about the USA.
-     * It's not ok to say `PMA`, even if you're talking about positive mental attitude.
-2. Avoid using `Julia` in your package name or prefixing it with `Ju`.
-
-     * It is usually clear from context and to your users that the package is a Julia package.
-     * Package names already have a `.jl` extension, which communicates to users that `Package.jl` is a Julia package.
-     * Having Julia in the name can imply that the package is connected to, or endorsed by, contributors
-       to the Julia language itself.
-3. Packages that provide most of their functionality in association with a new type should have pluralized
-   names.
-
-     * `DataFrames` provides the `DataFrame` type.
-     * `BloomFilters` provides the `BloomFilter` type.
-     * In contrast, `JuliaParser` provides no new type, but instead new functionality in the `JuliaParser.parse()`
-       function.
-4. Err on the side of clarity, even if clarity seems long-winded to you.
-
-     * `RandomMatrices` is a less ambiguous name than `RndMat` or `RMT`, even though the latter are shorter.
-5. A less systematic name may suit a package that implements one of several possible approaches to
-   its domain.
-
-     * Julia does not have a single comprehensive plotting package. Instead, `Gadfly`, `PyPlot`, `Winston`
-       and other packages each implement a unique approach based on a particular design philosophy.
-     * In contrast, `SortingAlgorithms` provides a consistent interface to use many well-established
-       sorting algorithms.
-6. Packages that wrap external libraries or programs should be named after those libraries or programs.
-
-     * `CPLEX.jl` wraps the `CPLEX` library, which can be identified easily in a web search.
-     * `MATLAB.jl` provides an interface to call the MATLAB engine from within Julia.
-
-7. Avoid naming a package closely to an existing package
-     * `Websocket` is too close to `WebSockets` and can be confusing to users. Rather use a new name such as `SimpleWebsockets`.
-
-8. Avoid using a distinctive name that is already in use in a well known, unrelated project.
-     * Don't use the names `Tkinter.jl`, `TkinterGUI.jl`, etc. for a package that is unrelated
-       to the popular `tkinter` python package, even if it provides bindings to Tcl/Tk.
-       A package name of `Tkinter.jl` would only be appropriate if the package used Python's
-       library to accomplish its work or was spearheaded by the same community of developers.
-     * It's okay to name a package `HTTP.jl` even though it is unrelated to the popular rust
-       crate `http` because in most usages the name "http" refers to the hypertext transfer
-       protocol, not to the `http` rust crate.
-     * It's okay to name a package `OpenSSL.jl` if it provides an interface to the OpenSSL
-       library, even without explicit affiliation with the creators of the OpenSSL (provided
-       there's no copyright or trademark infringement etc.)
-
-## Registering packages
-
-Once a package is ready it can be registered with the [General Registry](https://github.com/JuliaRegistries/General#registering-a-package-in-general) (see also the [FAQ](https://github.com/JuliaRegistries/General#faq)).
-Currently, packages are submitted via [`Registrator`](https://juliaregistrator.github.io/).
-In addition to `Registrator`, [`TagBot`](https://github.com/marketplace/actions/julia-tagbot) helps manage the process of tagging releases.
-
-## Best Practices
-
-Packages should avoid mutating their own state (writing to files within their package directory).
-Packages should, in general, not assume that they are located in a writable location (e.g. if installed as part of a system-wide depot) or even a stable one (e.g. if they are bundled into a system image by [PackageCompiler.jl](https://github.com/JuliaLang/PackageCompiler.jl)).
-To support the various use cases in the Julia package ecosystem, the Pkg developers have created a number of auxiliary packages and techniques to help package authors create self-contained, immutable, and relocatable packages:
-
-* [`Artifacts`](https://pkgdocs.julialang.org/v1/artifacts/) can be used to bundle chunks of data alongside your package, or even allow them to be downloaded on-demand.
-  Prefer artifacts over attempting to open a file via a path such as `joinpath(@__DIR__, "data", "my_dataset.csv")` as this is non-relocatable.
-  Once your package has been precompiled, the result of `@__DIR__` will have been baked into your precompiled package data, and if you attempt to distribute this package, it will attempt to load files at the wrong location.
-  Artifacts can be bundled and accessed easily using the `artifact"name"` string macro.
-
-* [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) provides the notion of "scratch spaces", mutable containers of data for packages.
-  Scratch spaces are designed for data caches that are completely managed by a package and should be removed when the package itself is uninstalled.
-  For important user-generated data, packages should continue to write out to a user-specified path that is not managed by Julia or Pkg.
-
-* [`Preferences.jl`](https://github.com/JuliaPackaging/Preferences.jl) allows packages to read and write preferences to the top-level `Project.toml`.
-  These preferences can be read at runtime or compile-time, to enable or disable different aspects of package behavior.
-  Packages previously would write out files to their own package directories to record options set by the user or environment, but this is highly discouraged now that `Preferences` is available.
diff --git a/doc/src/tutorials/external.md b/doc/src/tutorials/external.md
deleted file mode 100644
index 0211db3d63a5e..0000000000000
--- a/doc/src/tutorials/external.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# External Tutorials
-
-We have created a non-exhaustive list of community provided Julia tutorials.
-[Check them out to learn Julia through the lens of someone from the community](https://julialang.org/learning/tutorials/).
diff --git a/pkgimage.mk b/pkgimage.mk
index c9de49d2f8421..740b9760cab48 100644
--- a/pkgimage.mk
+++ b/pkgimage.mk
@@ -6,147 +6,32 @@ include $(JULIAHOME)/stdlib/stdlib.mk
 
 
 # set some influential environment variables
-export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
-export JULIA_LOAD_PATH := @stdlib
+export JULIA_DEPOT_PATH := $(shell echo $(call cygpath_w,$(build_prefix)/share/julia))
+export JULIA_LOAD_PATH := @stdlib$(PATHSEP)$(shell echo $(call cygpath_w,$(JULIAHOME)/stdlib))
 unexport JULIA_PROJECT :=
 unexport JULIA_BINDIR :=
 
 export JULIA_FALLBACK_REPL := true
 
 default: release
-release: all-release
-debug: all-debug
+release: $(BUILDDIR)/stdlib/release.image
+debug: $(BUILDDIR)/stdlib/debug.image
 all: release debug
 
-$(JULIA_DEPOT_PATH):
+$(JULIA_DEPOT_PATH)/compiled:
 	mkdir -p $@
 
 print-depot-path:
 	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH')
 
-all-release: $(addprefix cache-release-, $(STDLIBS))
-all-debug:   $(addprefix cache-debug-, $(STDLIBS))
+$(BUILDDIR)/stdlib/%.image: $(JULIAHOME)/stdlib/Project.toml $(JULIAHOME)/stdlib/Manifest.toml $(INDEPENDENT_STDLIBS_SRCS) $(JULIA_DEPOT_PATH)/compiled
+	export JULIA_CPU_TARGET="$(JULIA_CPU_TARGET)"
+	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.Precompilation.precompilepkgs(;configs=[``=>Base.CacheFlags(), `--check-bounds=yes`=>Base.CacheFlags(;check_bounds=1)])')
+	touch $@
 
-define stdlib_builder
-ifneq ($(filter $(1),$(INDEPENDENT_STDLIBS) $(PKG_EXTS)),)
-# Define target-specific export for `JULIA_CPU_TARGET`
-$$(BUILDDIR)/stdlib/$1.release.image: export JULIA_CPU_TARGET=$(JULIA_CPU_TARGET)
-$$(BUILDDIR)/stdlib/$1.debug.image: export JULIA_CPU_TARGET=$(JULIA_CPU_TARGET)
-ifneq ($(filter $(1),$(INDEPENDENT_STDLIBS)),)
-$$(BUILDDIR)/stdlib/$1.release.image: $$($1_SRCS) $$(addsuffix .release.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys.$(SHLIB_EXT)
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
-	touch $$@
-$$(BUILDDIR)/stdlib/$1.debug.image: $$($1_SRCS) $$(addsuffix .debug.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
-	touch $$@
-endif
-ifneq ($(filter $(1),$(PKG_EXTS)),)
-# This is weird. It set up for multiple Pkg exts because that suits the structure here better
-# but it hard codes the deps and `import REPL, Pkg`
-$$(BUILDDIR)/stdlib/REPLExt.release.image: $$(REPLExt_SRCS) $$(BUILDDIR)/stdlib/Pkg.release.image $$(BUILDDIR)/stdlib/REPL.release.image
-	@$$(call PRINT_JULIA, $$(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'using REPL; using Pkg')
-	@$$(call PRINT_JULIA, $$(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e 'using REPL; using Pkg')
-	touch $$@
-$$(BUILDDIR)/stdlib/REPLExt.debug.image: $$(REPLExt_SRCS) $(BUILDDIR)/stdlib/Pkg.debug.image $$(BUILDDIR)/stdlib/REPL.debug.image
-	@$$(call PRINT_JULIA, $$(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'using REPL; using Pkg')
-	@$$(call PRINT_JULIA, $$(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e 'using REPL; using Pkg')
-	touch $$@
-endif
-else
-ifneq ($(filter $(1),$(STDLIBS_WITHIN_SYSIMG)),)
-$$(BUILDDIR)/stdlib/$1.release.image:
-	touch $$@
-$$(BUILDDIR)/stdlib/$1.debug.image:
-	touch $$@
-else
-$$(error $(1) neither in STDLIBS_WITHIN_SYSIMG nor INDEPENDENT_STDLIBS)
-endif
-endif
-cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
-cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
-.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
-endef
+$(BUILDDIR)/stdlib/release.image: $(build_private_libdir)/sys.$(SHLIB_EXT)
+$(BUILDDIR)/stdlib/debug.image: $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
 
-# Note: you can check for the correctness of this tree by running `JULIA_DEBUG=nested_precomp make` and looking
-# out for `Debug: Nested precompilation` logs.
-
-# no dependencies
-$(eval $(call stdlib_builder,MozillaCACerts_jll,))
-$(eval $(call stdlib_builder,ArgTools,))
-$(eval $(call stdlib_builder,Artifacts,))
-$(eval $(call stdlib_builder,Base64,))
-$(eval $(call stdlib_builder,CRC32c,))
-$(eval $(call stdlib_builder,FileWatching,))
-$(eval $(call stdlib_builder,Libdl,))
-$(eval $(call stdlib_builder,Mmap,))
-$(eval $(call stdlib_builder,NetworkOptions,))
-$(eval $(call stdlib_builder,SHA,))
-$(eval $(call stdlib_builder,Serialization,))
-$(eval $(call stdlib_builder,Sockets,))
-$(eval $(call stdlib_builder,Unicode,))
-$(eval $(call stdlib_builder,Profile,))
-$(eval $(call stdlib_builder,StyledStrings,))
-$(eval $(call stdlib_builder,SuiteSparse_jll,))
-
-# 1-depth packages
-$(eval $(call stdlib_builder,GMP_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,LLVMLibUnwind_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,LibUV_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,LibUnwind_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,MbedTLS_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,nghttp2_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,OpenLibm_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,PCRE2_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,Zlib_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,dSFMT_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,libLLVM_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,libblastrampoline_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,p7zip_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,OpenBLAS_jll,Artifacts Libdl))
-$(eval $(call stdlib_builder,Markdown,Base64))
-$(eval $(call stdlib_builder,Printf,Unicode))
-$(eval $(call stdlib_builder,Random,SHA))
-$(eval $(call stdlib_builder,Logging,StyledStrings))
-$(eval $(call stdlib_builder,Tar,ArgTools,SHA))
-$(eval $(call stdlib_builder,DelimitedFiles,Mmap))
-$(eval $(call stdlib_builder,JuliaSyntaxHighlighting,StyledStrings))
-
-# 2-depth packages
-$(eval $(call stdlib_builder,LLD_jll,Zlib_jll libLLVM_jll Artifacts Libdl))
-$(eval $(call stdlib_builder,LibSSH2_jll,Artifacts Libdl MbedTLS_jll))
-$(eval $(call stdlib_builder,MPFR_jll,Artifacts Libdl GMP_jll))
-$(eval $(call stdlib_builder,LinearAlgebra,Libdl libblastrampoline_jll OpenBLAS_jll))
-$(eval $(call stdlib_builder,Dates,Printf))
-$(eval $(call stdlib_builder,Distributed,Random Serialization Sockets))
-$(eval $(call stdlib_builder,Future,Random))
-$(eval $(call stdlib_builder,UUIDs,Random SHA))
-$(eval $(call stdlib_builder,InteractiveUtils,Markdown))
-
- # 3-depth packages
-$(eval $(call stdlib_builder,LibGit2_jll,MbedTLS_jll LibSSH2_jll Artifacts Libdl))
-$(eval $(call stdlib_builder,LibCURL_jll,LibSSH2_jll nghttp2_jll MbedTLS_jll Zlib_jll Artifacts Libdl))
-$(eval $(call stdlib_builder,REPL,InteractiveUtils Markdown Sockets StyledStrings Unicode))
-$(eval $(call stdlib_builder,SharedArrays,Distributed Mmap Random Serialization))
-$(eval $(call stdlib_builder,TOML,Dates))
-$(eval $(call stdlib_builder,Test,Logging Random Serialization InteractiveUtils))
-
-# 4-depth packages
-$(eval $(call stdlib_builder,LibGit2,LibGit2_jll NetworkOptions Printf SHA Base64))
-$(eval $(call stdlib_builder,LibCURL,LibCURL_jll MozillaCACerts_jll))
-
-# 5-depth packages
-$(eval $(call stdlib_builder,Downloads,ArgTools FileWatching LibCURL NetworkOptions))
-
-# 6-depth packages
-$(eval $(call stdlib_builder,Pkg, Artifacts Dates Downloads FileWatching LibGit2 Libdl\
-								  Logging Markdown Printf REPL Random SHA Serialization\
-								  TOML Tar UUIDs p7zip_jll))
-
-# 7-depth packages
-$(eval $(call stdlib_builder,LazyArtifacts,Artifacts Pkg))
-$(eval $(call stdlib_builder,REPLExt,Pkg REPL))
-
-$(eval $(call stdlib_builder,SparseArrays,Libdl LinearAlgebra Random Serialization SuiteSparse_jll))
-$(eval $(call stdlib_builder,Statistics,LinearAlgebra SparseArrays))
+clean:
+	rm -rf $(JULIA_DEPOT_PATH)/compiled
+	rm -f $(BUILDDIR)/stdlib/*.image
diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp
index 22b3beef996db..e73399c2ecde4 100644
--- a/src/APInt-C.cpp
+++ b/src/APInt-C.cpp
@@ -475,6 +475,27 @@ void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerP
     memcpy(pr, pa, onumbytes);
 }
 
+#if JL_LLVM_VERSION >= 170000
+extern "C" JL_DLLEXPORT
+unsigned countr_zero_8(uint8_t Val) {
+    return countr_zero(Val);
+}
+
+extern "C" JL_DLLEXPORT
+unsigned countr_zero_16(uint16_t Val) {
+    return countr_zero(Val);
+}
+
+extern "C" JL_DLLEXPORT
+unsigned countr_zero_32(uint32_t Val) {
+    return countr_zero(Val);
+}
+
+extern "C" JL_DLLEXPORT
+unsigned countr_zero_64(uint64_t Val) {
+    return countr_zero(Val);
+}
+#else
 extern "C" JL_DLLEXPORT
 unsigned countTrailingZeros_8(uint8_t Val) {
     return countTrailingZeros(Val);
@@ -494,6 +515,7 @@ extern "C" JL_DLLEXPORT
 unsigned countTrailingZeros_64(uint64_t Val) {
     return countTrailingZeros(Val);
 }
+#endif
 
 extern "C" JL_DLLEXPORT
 void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr) {
@@ -523,6 +545,37 @@ void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integer
         memcpy(pr, pa, numbytes);
 }
 
+#if JL_LLVM_VERSION >= 170000
+extern "C" JL_DLLEXPORT
+unsigned LLVMPopcount(unsigned numbits, integerPart *pa) {
+    CREATE(a)
+    return a.popcount();
+}
+
+extern "C" JL_DLLEXPORT
+unsigned LLVMCountr_one(unsigned numbits, integerPart *pa) {
+    CREATE(a)
+    return a.countr_one();
+}
+
+extern "C" JL_DLLEXPORT
+unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa) {
+    CREATE(a)
+    return a.countr_zero();
+}
+
+extern "C" JL_DLLEXPORT
+unsigned LLVMCountl_one(unsigned numbits, integerPart *pa) {
+    CREATE(a)
+    return a.countl_one();
+}
+
+extern "C" JL_DLLEXPORT
+unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa) {
+    CREATE(a)
+    return a.countl_zero();
+}
+#else
 extern "C" JL_DLLEXPORT
 unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa) {
     CREATE(a)
@@ -552,3 +605,4 @@ unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa) {
     CREATE(a)
     return a.countLeadingZeros();
 }
+#endif
diff --git a/src/APInt-C.h b/src/APInt-C.h
index 816d40ccc6529..a44d922a40d24 100644
--- a/src/APInt-C.h
+++ b/src/APInt-C.h
@@ -5,12 +5,13 @@
 
 #include "julia.h"
 #include "dtypes.h"
+#include "llvm-version.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#ifdef LLVM_VERSION_MAJOR
+#if defined(__cplusplus) && defined(LLVM_VERSION_MAJOR)
 using integerPart = llvm::APInt::WordType;
 #else
 typedef void integerPart;
@@ -53,11 +54,19 @@ JL_DLLEXPORT int LLVMDiv_uov(unsigned numbits, integerPart *pa, integerPart *pb,
 JL_DLLEXPORT int LLVMRem_sov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT int LLVMRem_uov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 
+#if JL_LLVM_VERSION >= 170000
+JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountr_one(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountl_one(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa);
+#else
 JL_DLLEXPORT unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa);
 JL_DLLEXPORT unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa);
 JL_DLLEXPORT unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa);
 JL_DLLEXPORT unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa);
 JL_DLLEXPORT unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa);
+#endif
 
 JL_DLLEXPORT void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 JL_DLLEXPORT void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
@@ -73,10 +82,17 @@ JL_DLLEXPORT int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatyp
 JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 
+#if JL_LLVM_VERSION >= 170000
+JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val);
+JL_DLLEXPORT unsigned countr_zero_16(uint16_t Val);
+JL_DLLEXPORT unsigned countr_zero_32(uint32_t Val);
+JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val);
+#else
 JL_DLLEXPORT unsigned countTrailingZeros_8(uint8_t Val);
 JL_DLLEXPORT unsigned countTrailingZeros_16(uint16_t Val);
 JL_DLLEXPORT unsigned countTrailingZeros_32(uint32_t Val);
 JL_DLLEXPORT unsigned countTrailingZeros_64(uint64_t Val);
+#endif
 
 //uint8_t getSwappedBytes_8(uint8_t Value); // no-op
 //uint16_t getSwappedBytes_16(uint16_t Value);
diff --git a/src/Makefile b/src/Makefile
index ce81ff03cdd70..52e673aa6cc1a 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -26,7 +26,7 @@ endif
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
 ifeq ($(USECLANG),1)
-FLAGS += -Wno-return-type-c-linkage
+FLAGS += -Wno-return-type-c-linkage -Wno-atomic-alignment
 endif
 
 FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"'
@@ -44,9 +44,9 @@ SRCS := \
 	jltypes gf typemap smallintset ast builtins module interpreter symbol \
 	dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \
 	simplevector runtime_intrinsics precompile jloptions mtarraylist \
-	threading scheduler stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler method \
+	threading scheduler stackwalk gc-common gc-stock gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler method \
 	jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
-	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
+	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall engine
 
 RT_LLVMLINK :=
 CG_LLVMLINK :=
@@ -77,9 +77,10 @@ else
 # JULIACODEGEN != LLVM
 endif
 
-RT_LLVM_LIBS := support # for APMath and some other useful ADT
-ifneq ($(LLVM_VER_MAJ),15)
-RT_LLVM_LIBS += targetparser # for getHostCPUName on LLVM 16+
+RT_LLVM_LIBS := support
+
+ifeq ($(shell test $(LLVM_VER_MAJ) -ge 16 && echo true),true)
+RT_LLVM_LIBS += targetparser
 endif
 
 ifeq ($(OS),WINNT)
@@ -102,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
 ifeq ($(OS),WINNT)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
 endif
@@ -126,8 +127,8 @@ ifeq ($(JULIACODEGEN),LLVM)
 ifneq ($(USE_SYSTEM_LLVM),0)
 # USE_SYSTEM_LLVM != 0
 CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
-LLVM_SHLIB_SYMBOL_VERSION := $(shell nm -D --with-symbol-versions $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
-                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -e 's/.*@//')
+LLVM_SHLIB_SYMBOL_VERSION := $(shell readelf -W --dyn-syms $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
+                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -ne 's/.*@//p')
 
 # HACK: llvm-config doesn't correctly point to shared libs on all platforms
 #       https://github.com/JuliaLang/julia/issues/29981
@@ -315,11 +316,12 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
 $(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
 $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
-$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
-$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
-$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h
+$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h
+$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h
@@ -329,10 +331,10 @@ $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDI
 $(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
 $(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
 $(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h
-$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
@@ -346,7 +348,7 @@ $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(SRCDIR)/common_symbols1.inc $(SRCDIR)/common_symbols2.inc
 $(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
 
-$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
+$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc-common.o gc-stock.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
@@ -383,13 +385,13 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 
 CXXLD = $(CXX) -shared
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION $(LLVM_CONFIG_ABSOLUTE)
 	sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \
 		        -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/"
 
 $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
@@ -417,7 +419,7 @@ libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
 
 $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
@@ -474,6 +476,8 @@ $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG
 ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc
 ifeq ($(OS),Darwin)
 ANALYSIS_DEPS += llvmunwind
+else ifeq ($(OS),OpenBSD)
+ANALYSIS_DEPS += llvmunwind
 else ifneq ($(OS),WINNT)
 ANALYSIS_DEPS += unwind
 endif
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index 7c31b6606139a..0a193ee132556 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout {
 Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0`
     if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
         return nullptr;
     size_t nfields = dt->layout->nfields;
@@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0`
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
@@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return nullptr;
     return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
@@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
     // uniquely addressable members.
     // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
     size_t dsz = jl_datatype_size(dt);
-    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding)
+    if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return NULL;
     nele = 0;
     ElementType eltype;
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 68f980d7b40da..8839a37da6e13 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
     if (jl_is_structtype(dt)) {
         // Fast path checks before descending the type hierarchy
         // (4 x 128b vector == 64B max size)
-        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->flags.haspadding)
+        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
             return 0;
 
         base = NULL;
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index 1f10817cfeeee..f02e1022ddc2d 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout {
 // count the homogeneous floating aggregate size (saturating at max count of 8)
 unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
 {
-    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->flags.haspadding)
+    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding)
         return 9;
 
     size_t i, l = ty->layout->nfields;
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index c07f7bf32780a..b4c8ef6095a55 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -4,7 +4,12 @@
 #include "platform.h"
 
 // target support
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Triple.h>
+#else
 #include <llvm/ADT/Triple.h>
+#endif
+#include "llvm/Support/CodeGen.h"
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
@@ -14,24 +19,9 @@
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/BasicAliasAnalysis.h>
-#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
-#include <llvm/Analysis/ScopedNoAliasAA.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
@@ -288,30 +278,30 @@ static void makeSafeName(GlobalObject &G)
         G.setName(StringRef(SafeName.data(), SafeName.size()));
 }
 
-static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world, jl_code_instance_t **ci_out, jl_code_info_t **src_out)
+jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world)
 {
     ++CICacheLookups;
     jl_value_t *ci = cgparams.lookup(mi, world, world);
     JL_GC_PROMISE_ROOTED(ci);
     jl_code_instance_t *codeinst = NULL;
-    if (ci != jl_nothing) {
+    if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) {
         codeinst = (jl_code_instance_t*)ci;
-        *src_out = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        jl_method_t *def = codeinst->def->def.method;
-        if ((jl_value_t*)*src_out == jl_nothing)
-            *src_out = NULL;
-        if (*src_out && jl_is_method(def))
-            *src_out = jl_uncompress_ir(def, codeinst, (jl_value_t*)*src_out);
-    }
-    if (*src_out == NULL || !jl_is_code_info(*src_out)) {
+    }
+    else {
         if (cgparams.lookup != jl_rettype_inferred_addr) {
             jl_error("Refusing to automatically run type inference with custom cache lookup.");
         }
         else {
-            *ci_out = jl_type_infer(mi, world, 0, SOURCE_MODE_ABI);
+            codeinst = jl_type_infer(mi, world, SOURCE_MODE_ABI);
+            /* Even if this codeinst is ordinarily not cacheable, we need to force
+             * it into the cache here, since it was explicitly requested and is
+             * otherwise not reachable from anywhere in the system image.
+             */
+            if (!jl_mi_cache_has_ci(mi, codeinst))
+                jl_mi_cache_insert(mi, codeinst);
         }
     }
-    *ci_out = codeinst;
+    return codeinst;
 }
 
 // takes the running content that has collected in the shadow module and dump it to disk
@@ -355,7 +345,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
 
     // compile all methods for the current world and type-inference world
 
-    JL_LOCK(&jl_codegen_lock);
     auto target_info = clone.withModuleDo([&](Module &M) {
         return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
     });
@@ -390,16 +379,14 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
             // then we want to compile and emit this
             if (jl_atomic_load_relaxed(&mi->def.method->primary_world) <= this_world && this_world <= jl_atomic_load_relaxed(&mi->def.method->deleted_world)) {
                 // find and prepare the source code to compile
-                jl_code_instance_t *codeinst = NULL;
-                jl_ci_cache_lookup(*cgparams, mi, this_world, &codeinst, &src);
-                if (src && !params.compiled_functions.count(codeinst)) {
+                jl_code_instance_t *codeinst = jl_ci_cache_lookup(*cgparams, mi, this_world);
+                if (codeinst && !params.compiled_functions.count(codeinst)) {
                     // now add it to our compilation results
                     JL_GC_PROMISE_ROOTED(codeinst->rettype);
                     orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
                             params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
                             Triple(clone.getModuleUnlocked()->getTargetTriple()));
-                    jl_llvm_functions_t decls = jl_emit_code(result_m, mi, src, codeinst->rettype, params, jl_atomic_load_relaxed(&codeinst->min_world),
-                        jl_atomic_load_relaxed(&codeinst->max_world));
+                    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
                     if (result_m)
                         params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
                 }
@@ -409,7 +396,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         // finally, make sure all referenced methods also get compiled or fixed up
         jl_compile_workqueue(params, policy);
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
     JL_GC_POP();
 
     // process the globals array, before jl_merge_module destroys them
@@ -697,7 +683,11 @@ static FunctionInfo getFunctionWeight(const Function &F)
         auto val = F.getFnAttribute("julia.mv.clones").getValueAsString();
         // base16, so must be at most 4 * length bits long
         // popcount gives number of clones
+        #if JL_LLVM_VERSION >= 170000
+        info.clones = APInt(val.size() * 4, val, 16).popcount() + 1;
+        #else
         info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1;
+        #endif
     }
     info.weight += info.insts;
     // more basic blocks = more complex than just sum of insts,
@@ -1159,7 +1149,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.obj);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::ObjectFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of object files\n");
         emitter.run(M);
         timers.obj.stopTimer();
@@ -1170,7 +1164,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.asm_);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::AssemblyFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of assembly files\n");
         emitter.run(M);
         timers.asm_.stopTimer();
@@ -1235,6 +1233,8 @@ static void materializePreserved(Module &M, Partition &partition) {
         GV.setInitializer(nullptr);
         GV.setLinkage(GlobalValue::ExternalLinkage);
         GV.setVisibility(GlobalValue::HiddenVisibility);
+        if (GV.getDLLStorageClass() != GlobalValue::DLLStorageClassTypes::DefaultStorageClass)
+            continue; // Don't mess with exported or imported globals
         GV.setDSOLocal(true);
     }
 
@@ -1440,7 +1440,9 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
         for (unsigned i = 0; i < threads; i++) {
             std::function<void()> func = [&, i]() {
                 LLVMContext ctx;
+                #if JL_LLVM_VERSION < 170000
                 SetOpaquePointer(ctx);
+                #endif
                 // Lazily deserialize the entire module
                 timers[i].deserialize.startTimer();
                 auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
@@ -1606,13 +1608,14 @@ void jl_dump_native_impl(void *native_code,
         TheTriple.setOSName(Str);
     }
     Optional<Reloc::Model> RelocModel;
-    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
+    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD() || TheTriple.isOSOpenBSD()) {
         RelocModel = Reloc::PIC_;
     }
+
     CodeModel::Model CMModel = CodeModel::Small;
-    if (TheTriple.isPPC()) {
-        // On PPC the small model is limited to 16bit offsets
-        CMModel = CodeModel::Medium;
+    if (TheTriple.isPPC() || (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())) {
+        // On PPC the small model is limited to 16bit offsets. For very large images the small code model
+        CMModel = CodeModel::Medium; //  isn't good enough on x86 so use Medium, it has no cost because only the image goes in .ldata
     }
     std::unique_ptr<TargetMachine> SourceTM(
         jl_ExecutionEngine->getTarget().createTargetMachine(
@@ -1622,7 +1625,11 @@ void jl_dump_native_impl(void *native_code,
             jl_ExecutionEngine->getTargetOptions(),
             RelocModel,
             CMModel,
+#if JL_LLVM_VERSION >= 180000
+            CodeGenOptLevel::Aggressive // -O3 TODO: respect command -O0 flag?
+#else
             CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
+#endif
             ));
     fixupTM(*SourceTM);
     auto DL = jl_create_datalayout(*SourceTM);
@@ -1643,7 +1650,9 @@ void jl_dump_native_impl(void *native_code,
     if (z) {
         JL_TIMING(NATIVE_AOT, NATIVE_Sysimg);
         LLVMContext Context;
+        #if JL_LLVM_VERSION < 170000
         SetOpaquePointer(Context);
+        #endif
         Module sysimgM("sysimg", Context);
         sysimgM.setTargetTriple(TheTriple.str());
         sysimgM.setDataLayout(DL);
@@ -1655,6 +1664,12 @@ void jl_dump_native_impl(void *native_code,
                                      GlobalVariable::ExternalLinkage,
                                      data, "jl_system_image_data");
         sysdata->setAlignment(Align(64));
+#if JL_LLVM_VERSION >= 180000
+        sysdata->setCodeModel(CodeModel::Large);
+#else
+        if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
+            sysdata->setSection(".ldata");
+#endif
         addComdat(sysdata, TheTriple);
         Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
         addComdat(new GlobalVariable(sysimgM, len->getType(), true,
@@ -1684,6 +1699,7 @@ void jl_dump_native_impl(void *native_code,
         JL_TIMING(NATIVE_AOT, NATIVE_Setup);
         dataM.setTargetTriple(TheTriple.str());
         dataM.setDataLayout(DL);
+        dataM.setPICLevel(PICLevel::BigPIC);
         auto &Context = dataM.getContext();
 
         Type *T_psize = dataM.getDataLayout().getIntPtrType(Context)->getPointerTo();
@@ -1759,6 +1775,7 @@ void jl_dump_native_impl(void *native_code,
             if (jl_small_typeof_copy) {
                 jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
                 jl_small_typeof_copy->setDSOLocal(true);
+                jl_small_typeof_copy->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DefaultStorageClass);
             }
         }
 
@@ -1780,7 +1797,9 @@ void jl_dump_native_impl(void *native_code,
     {
         JL_TIMING(NATIVE_AOT, NATIVE_Metadata);
         LLVMContext Context;
+        #if JL_LLVM_VERSION < 170000
         SetOpaquePointer(Context);
+        #endif
         Module metadataM("metadata", Context);
         metadataM.setTargetTriple(TheTriple.str());
         metadataM.setDataLayout(DL);
@@ -1790,16 +1809,18 @@ void jl_dump_native_impl(void *native_code,
         // reflect the address of the jl_RTLD_DEFAULT_handle variable
         // back to the caller, so that we can check for consistency issues
         GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&metadataM);
-        addComdat(new GlobalVariable(metadataM,
-                                    jlRTLD_DEFAULT_var->getType(),
-                                    true,
-                                    GlobalVariable::ExternalLinkage,
-                                    jlRTLD_DEFAULT_var,
-                                    "jl_RTLD_DEFAULT_handle_pointer"), TheTriple);
 
         Type *T_size = DL.getIntPtrType(Context);
         Type *T_psize = T_size->getPointerTo();
 
+        auto FT = FunctionType::get(Type::getInt8Ty(Context)->getPointerTo()->getPointerTo(), {}, false);
+        auto F = Function::Create(FT, Function::ExternalLinkage, "get_jl_RTLD_DEFAULT_handle_addr", metadataM);
+        llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
+        builder.CreateRet(jlRTLD_DEFAULT_var);
+        F->setLinkage(GlobalValue::ExternalLinkage);
+        if (TheTriple.isOSBinFormatCOFF())
+            F->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
+
         if (TheTriple.isOSWindows()) {
             // Windows expect that the function `_DllMainStartup` is present in an dll.
             // Normal compilers use something like Zig's crtdll.c instead we provide a
@@ -1868,26 +1889,31 @@ void jl_dump_native_impl(void *native_code,
         JL_TIMING(NATIVE_AOT, NATIVE_Write);
 
         object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+#if JL_LLVM_VERSION >= 180000
+#define WritingMode SymtabWritingMode::NormalSymtab
+#else
+#define WritingMode true
+#endif
 #define WRITE_ARCHIVE(fname, field, prefix, suffix) \
-        if (fname) {\
-            SmallVector<NewArchiveMember, 0> archive; \
-            SmallVector<std::string, 16> filenames; \
-            SmallVector<StringRef, 16> buffers; \
-            for (size_t i = 0; i < threads; i++) { \
-                filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \
-                buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \
-            } \
-            filenames.push_back("metadata" prefix suffix); \
-            buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \
-            if (z) { \
-                filenames.push_back("sysimg" prefix suffix); \
-                buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \
-            } \
-            for (size_t i = 0; i < filenames.size(); i++) { \
-                archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
-            } \
-            handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \
-        }
+    if (fname) {\
+        SmallVector<NewArchiveMember, 0> archive; \
+        SmallVector<std::string, 16> filenames; \
+        SmallVector<StringRef, 16> buffers; \
+        for (size_t i = 0; i < threads; i++) { \
+            filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \
+            buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \
+        } \
+        filenames.push_back("metadata" prefix suffix); \
+        buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \
+        if (z) { \
+            filenames.push_back("sysimg" prefix suffix); \
+            buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \
+        } \
+        for (size_t i = 0; i < filenames.size(); i++) { \
+            archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
+        } \
+        handleAllErrors(writeArchive(fname, archive, WritingMode, Kind, true, false), reportWriterError); \
+    }
 
         WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
         WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
@@ -1903,104 +1929,68 @@ void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIR
     PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
 }
 
-// sometimes in GDB you want to find out what code was created from a mi
+// sometimes in GDB you want to find out what code would be created from a mi
 extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
 {
     jl_llvmf_dump_t llvmf_dump;
     size_t world = jl_current_task->world_age;
     JL_STREAM *stream = (JL_STREAM*)STDERR_FILENO;
 
+    jl_code_info_t *src = jl_gdbcodetyped1(mi, world);
+    JL_GC_PUSH1(&src);
+
     jl_printf(stream, "---- dumping IR for ----\n");
     jl_static_show(stream, (jl_value_t*)mi);
     jl_printf(stream, "\n----\n");
 
-    jl_printf(stream, "\n---- unoptimized IR ----");
-    jl_get_llvmf_defn(&llvmf_dump, mi, world, 0, false, jl_default_cgparams);
+    jl_printf(stream, "\n---- unoptimized IR ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, false, jl_default_cgparams);
     if (llvmf_dump.F) {
         jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
-        jl_static_show(stream, ir);
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    jl_printf(stream, "----\n");
+    jl_printf(stream, "\n----\n");
 
-    jl_printf(stream, "\n---- optimized IR ----");
-    jl_get_llvmf_defn(&llvmf_dump, mi, world, 0, true, jl_default_cgparams);
+    jl_printf(stream, "\n---- optimized IR ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
     if (llvmf_dump.F) {
         jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
-        jl_static_show(stream, ir);
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    jl_printf(stream, "----\n");
+    jl_printf(stream, "\n----\n");
 
-    jl_printf(stream, "\n---- assembly ----");
-    jl_get_llvmf_defn(&llvmf_dump, mi, world, 0, true, jl_default_cgparams);
+    jl_printf(stream, "\n---- assembly ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
     if (llvmf_dump.F) {
         jl_value_t *ir = jl_dump_function_asm(&llvmf_dump, 0, "", "source", 0, true);
-        jl_static_show(stream, ir);
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    jl_printf(stream, "----\n");
+    jl_printf(stream, "\n----\n");
+    JL_GC_POP();
 
-    jl_code_info_t *src = NULL;
-    jl_value_t *ci = jl_default_cgparams.lookup(mi, world, world);
-    if (ci == jl_nothing) {
-        ci = (jl_value_t*)jl_type_infer(mi, world, 0, SOURCE_MODE_FORCE_SOURCE_UNCACHED);
-    } else {
-        ci = NULL;
-    }
-    if (ci) {
-        jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method)) {
-            JL_GC_PUSH2(&codeinst, &src);
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-            JL_GC_POP();
-        }
-    }
     return src;
 }
 
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// This is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
-// misuse will leak memory or cause read-after-free
+// This is paired with jl_dump_function_ir and jl_dump_function_asm, either of which will free all memory allocated here
 extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
+void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params)
 {
-    if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
-            mi->def.method->generator == NULL && !mi->def.method->is_for_opaque_closure) {
-        // not a generic function
-        dump->F = NULL;
-        return;
-    }
-
-    // get the source code for this function
-    jl_code_info_t *src = NULL;
-    jl_code_instance_t *codeinst = NULL;
-    JL_GC_PUSH2(&src, &codeinst);
-    jl_value_t *ci = params.lookup(mi, world, world);
-    if (ci && ci != jl_nothing) {
-        codeinst = (jl_code_instance_t*)ci;
-        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-    }
-    if (!src || (jl_value_t*)src == jl_nothing) {
-        codeinst = jl_type_infer(mi, world, 0, SOURCE_MODE_FORCE_SOURCE_UNCACHED);
-        if (codeinst) {
-            src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        }
-    }
-    if (src) {
-        if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-    }
-
     // emit this function into a new llvm module
-    if (codeinst && src && jl_is_code_info(src)) {
+    dump->F = nullptr;
+    dump->TSM = nullptr;
+    if (src && jl_is_code_info(src)) {
         auto ctx = jl_ExecutionEngine->getContext();
         orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx);
         uint64_t compiler_start_time = 0;
         uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
         if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
-        JL_LOCK(&jl_codegen_lock);
         auto target_info = m.withModuleDo([&](Module &M) {
             return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
         });
@@ -2014,8 +2004,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
         // This would also be nice, but it seems to cause OOMs on the windows32 builder
         // To get correct names in the IR this needs to be at least 2
         output.debug_level = params.debug_info_level;
-        auto decls = jl_emit_code(m, mi, src, codeinst->rettype, output, jl_atomic_load_relaxed(&codeinst->min_world),  jl_atomic_load_relaxed(&codeinst->max_world));
-        JL_UNLOCK(&jl_codegen_lock); // Might GC
+        auto decls = jl_emit_code(m, mi, src, output);
 
         Function *F = NULL;
         if (m) {
@@ -2027,12 +2016,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
                     global.second->setLinkage(GlobalValue::ExternalLinkage);
                 } else {
                     auto p = literal_static_pointer_val(global.first, global.second->getValueType());
+                    #if JL_LLVM_VERSION >= 170000
+                    Type *elty = PointerType::get(output.getContext(), 0);
+                    #else
                     Type *elty;
                     if (p->getType()->isOpaquePointerTy()) {
                         elty = PointerType::get(output.getContext(), 0);
                     } else {
                         elty = p->getType()->getNonOpaquePointerElementType();
                     }
+                    #endif
                     // For pretty printing, when LLVM inlines the global initializer into its loads
                     auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
                     global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
@@ -2061,7 +2054,6 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
                 fname = &decls.functionObject;
             F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
         }
-        JL_GC_POP();
         if (measure_compile_time_enabled) {
             auto end = jl_hrtime();
             jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
@@ -2072,7 +2064,4 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
             return;
         }
     }
-
-    const char *mname = name_from_method_instance(mi);
-    jl_errorf("unable to compile source for function %s", mname);
 }
diff --git a/src/array.c b/src/array.c
index 979772e649727..f0051ec17565a 100644
--- a/src/array.c
+++ b/src/array.c
@@ -304,22 +304,8 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
     jl_task_t *ct = jl_current_task;
     jl_value_t *s;
     jl_ptls_t ptls = ct->ptls;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass_align8(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        s = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
+    s = (jl_value_t*)jl_gc_alloc(ptls, sz, jl_string_type);
     jl_set_typetagof(s, jl_string_tag, 0);
-    maybe_record_alloc_to_profile(s, len, jl_string_type);
     *(size_t*)s = len;
     jl_string_data(s)[len] = 0;
     return s;
diff --git a/src/ast.c b/src/ast.c
index bd3460cbe7fd2..ea1de429a946c 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
 #endif
@@ -60,6 +61,7 @@ JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
 JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
 JL_DLLEXPORT jl_sym_t *jl_as_sym;
 JL_DLLEXPORT jl_sym_t *jl_global_sym;
+JL_DLLEXPORT jl_sym_t *jl_globaldecl_sym;
 JL_DLLEXPORT jl_sym_t *jl_local_sym;
 JL_DLLEXPORT jl_sym_t *jl_list_sym;
 JL_DLLEXPORT jl_sym_t *jl_dot_sym;
@@ -116,7 +118,7 @@ JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
 JL_DLLEXPORT jl_sym_t *jl_release_sym;
 JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
 JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
-
+JL_DLLEXPORT jl_sym_t *jl_uninferred_sym;
 
 static const uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
@@ -174,7 +176,8 @@ static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
     jl_sym_t *var = scmsym_to_julia(fl_ctx, args[0]);
     jl_binding_t *b = jl_get_module_binding(ctx->module, var, 0);
-    return (b != NULL && jl_atomic_load_relaxed(&b->owner) == b) ? fl_ctx->T : fl_ctx->F;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return (bpart != NULL && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL) ? fl_ctx->T : fl_ctx->F;
 }
 
 static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
@@ -195,8 +198,7 @@ static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint
             mod = *(jl_module_t**)cv_data((cvalue_t*)ptr(argmod));
             JL_GC_PROMISE_ROOTED(mod);
         } else {
-            (void)tosymbol(fl_ctx, argmod, "nothrow-julia-global");
-            if (scmsym_to_julia(fl_ctx, argmod) != jl_thismodule_sym) {
+            if (!iscons(argmod) || !issymbol(car_(argmod)) || scmsym_to_julia(fl_ctx, car_(argmod)) != jl_thismodule_sym) {
                 lerrorf(fl_ctx, fl_ctx->ArgError, "nothrow-julia-global: Unknown globalref module kind");
             }
         }
@@ -204,25 +206,56 @@ static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint
         var = scmsym_to_julia(fl_ctx, args[1]);
     }
     jl_binding_t *b = jl_get_module_binding(mod, var, 0);
-    b = b ? jl_atomic_load_relaxed(&b->owner) : NULL;
-    return b != NULL && jl_atomic_load_relaxed(&b->value) != NULL ? fl_ctx->T : fl_ctx->F;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (!bpart)
+        return fl_ctx->F;
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return fl_ctx->F;
+    return  (jl_bkind_is_some_constant(decode_restriction_kind(pku)) ?
+        decode_restriction_value(pku) : jl_atomic_load_relaxed(&b->value)) != NULL ? fl_ctx->T : fl_ctx->F;
 }
 
-static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
+// Used to generate a unique suffix for a given symbol (e.g. variable or type name)
+// first argument contains a stack of method definitions seen so far by `closure-convert` in flisp.
+// if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes
+// of the form $top_level_method_name##$counter, where `counter` is the smallest integer
+// such that the resulting name is not already defined in the current module's bindings.
+// If the top of the stack is NIL, we simply return the current module's counter.
+// This ensures that precompile statements are a bit more stable across different versions
+// of a codebase. see #53719
+static value_t fl_module_unique_name(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
+    argcount(fl_ctx, "julia-module-unique-name", nargs, 1);
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    assert(ctx->module);
-    return fixnum(jl_module_next_counter(ctx->module));
-}
-
-static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
-{
-    return symbol(fl_ctx, jl_filename);
-}
-
-static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
-{
-    return fixnum(jl_lineno);
+    jl_module_t *m = ctx->module;
+    assert(m != NULL);
+    // Get the outermost function name from the `parsed_method_stack` top
+    char *funcname = NULL;
+    value_t parsed_method_stack = args[0];
+    if (parsed_method_stack != fl_ctx->NIL) {
+        value_t bottom_stack_symbol = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "last")), parsed_method_stack);
+        funcname = tosymbol(fl_ctx, bottom_stack_symbol, "julia-module-unique-name")->name;
+    }
+    size_t sz = funcname != NULL ? strlen(funcname) + 32 : 32; // 32 is enough for the suffix
+    char *buf = (char*)alloca(sz);
+    if (funcname != NULL && strchr(funcname, '#') == NULL) {
+        for (int i = 0; ; i++) {
+            snprintf(buf, sz, "%s##%d", funcname, i);
+            jl_sym_t *sym = jl_symbol(buf);
+            JL_LOCK(&m->lock);
+            if (jl_get_module_binding(m, sym, 0) == NULL) { // make sure this name is not already taken
+                jl_get_module_binding(m, sym, 1); // create the binding
+                JL_UNLOCK(&m->lock);
+                return symbol(fl_ctx, buf);
+            }
+            JL_UNLOCK(&m->lock);
+        }
+    }
+    else {
+        snprintf(buf, sz, "%d", jl_module_next_counter(m));
+    }
+    return symbol(fl_ctx, buf);
 }
 
 static int jl_is_number(jl_value_t *v)
@@ -255,10 +288,8 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
 static const builtinspec_t julia_flisp_ast_ext[] = {
     { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
     { "nothrow-julia-global", fl_nothrow_julia_global },
-    { "current-julia-module-counter", fl_current_module_counter },
+    { "current-julia-module-counter", fl_module_unique_name },
     { "julia-scalar?", fl_julia_scalar },
-    { "julia-current-file", fl_julia_current_file },
-    { "julia-current-line", fl_julia_current_line },
     { NULL, NULL }
 };
 
@@ -368,6 +399,7 @@ void jl_init_common_symbols(void)
     jl_opaque_closure_method_sym = jl_symbol("opaque_closure_method");
     jl_const_sym = jl_symbol("const");
     jl_global_sym = jl_symbol("global");
+    jl_globaldecl_sym = jl_symbol("globaldecl");
     jl_local_sym = jl_symbol("local");
     jl_thunk_sym = jl_symbol("thunk");
     jl_toplevel_sym = jl_symbol("toplevel");
@@ -428,6 +460,7 @@ void jl_init_common_symbols(void)
     jl_release_sym = jl_symbol("release");
     jl_acquire_release_sym = jl_symbol("acquire_release");
     jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
+    jl_uninferred_sym = jl_symbol("uninferred");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
@@ -475,7 +508,7 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
     }
     JL_CATCH {
         // if expression cannot be converted, replace with error expr
-        //jl_(jl_current_exception());
+        //jl_(jl_current_exception(jl_current_task));
         //jlbacktrace();
         jl_expr_t *ex = jl_exprn(jl_error_sym, 1);
         v = (jl_value_t*)ex;
@@ -576,20 +609,16 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             JL_GC_POP();
             return temp;
         }
-        else if (sym == jl_lineinfo_sym && n == 5) {
-            jl_value_t *modu=NULL, *name=NULL, *file=NULL, *linenum=NULL, *inlinedat=NULL;
-            JL_GC_PUSH5(&modu, &name, &file, &linenum, &inlinedat);
+        else if (sym == jl_lineinfo_sym && n == 3) {
+            jl_value_t *file=NULL, *linenum=NULL, *inlinedat=NULL;
+            JL_GC_PUSH3(&file, &linenum, &inlinedat);
             value_t lst = e;
-            modu = scm_to_julia_(fl_ctx, car_(lst), mod);
-            lst = cdr_(lst);
-            name = scm_to_julia_(fl_ctx, car_(lst), mod);
-            lst = cdr_(lst);
             file = scm_to_julia_(fl_ctx, car_(lst), mod);
             lst = cdr_(lst);
             linenum = scm_to_julia_(fl_ctx, car_(lst), mod);
             lst = cdr_(lst);
             inlinedat = scm_to_julia_(fl_ctx, car_(lst), mod);
-            temp = jl_new_struct(jl_lineinfonode_type, modu, name, file, linenum, inlinedat);
+            temp = jl_new_struct(jl_lineinfonode_type, file, linenum, inlinedat);
             JL_GC_POP();
             return temp;
         }
@@ -733,8 +762,20 @@ static int julia_to_scm_noalloc1(fl_context_t *fl_ctx, jl_value_t *v, value_t *r
 
 static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) JL_NOTSAFEPOINT
 {
-    if (jl_is_long(v) && fits_fixnum(jl_unbox_long(v)))
-        return fixnum(jl_unbox_long(v));
+    if (jl_is_long(v)) {
+        if (fits_fixnum(jl_unbox_long(v))) {
+            return fixnum(jl_unbox_long(v));
+        } else {
+#ifdef _P64
+            value_t prim = cprim(fl_ctx, fl_ctx->int64type, sizeof(int64_t));
+            *((int64_t*)cp_data((cprim_t*)ptr(prim))) = jl_unbox_long(v);
+#else
+            value_t prim = cprim(fl_ctx, fl_ctx->int32type, sizeof(int32_t));
+            *((int32_t*)cp_data((cprim_t*)ptr(prim))) = jl_unbox_long(v);
+#endif
+            return prim;
+        }
+    }
     if (check_valid) {
         if (jl_is_ssavalue(v))
             lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
@@ -938,10 +979,6 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         jl_gc_wb(new_ci, new_ci->slotnames);
         new_ci->slotflags = jl_array_copy(new_ci->slotflags);
         jl_gc_wb(new_ci, new_ci->slotflags);
-        new_ci->codelocs = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->codelocs);
-        jl_gc_wb(new_ci, new_ci->codelocs);
-        new_ci->linetable = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->linetable);
-        jl_gc_wb(new_ci, new_ci->linetable);
         new_ci->ssaflags = jl_array_copy(new_ci->ssaflags);
         jl_gc_wb(new_ci, new_ci->ssaflags);
 
@@ -1146,7 +1183,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
                 margs[0] = jl_cstr_to_string("<macrocall>");
             margs[1] = jl_fieldref(lno, 0); // extract and allocate line number
             jl_rethrow_other(jl_new_struct(jl_loaderror_type, margs[0], margs[1],
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
         }
     }
     ct->world_age = last_age;
@@ -1162,7 +1199,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     jl_expr_t *e = (jl_expr_t*)expr;
     if (e->head == jl_inert_sym ||
         e->head == jl_module_sym ||
-        //e->head == jl_toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
+        e->head == jl_toplevel_sym ||
         e->head == jl_meta_sym) {
         return expr;
     }
diff --git a/src/ast.scm b/src/ast.scm
index abfce314fc569..5cc97014e373e 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -114,7 +114,7 @@
                    (deparse-prefix-call (cadr e) (cddr e) #\( #\)))))
            (($ &)          (if (and (pair? (cadr e))
                                     (not (memq (caadr e)
-                                               '(outerref null true false tuple $ vect braces))))
+                                               '(null true false tuple $ vect braces))))
                                (string (car e) "(" (deparse (cadr e)) ")")
                                (string (car e) (deparse (cadr e)))))
            ((|::|)         (if (length= e 2)
@@ -254,7 +254,6 @@
            ((top)          (deparse (cadr e)))
            ((core)         (string "Core." (deparse (cadr e))))
            ((globalref)    (string (deparse (cadr e)) "." (deparse-colon-dot (caddr e))))
-           ((outerref)     (string (deparse (cadr e))))
            ((ssavalue)     (string "SSAValue(" (cadr e) ")"))
            ((line)         (if (length= e 2)
                                (string "# line " (cadr e))
@@ -298,7 +297,7 @@
 ;; predicates and accessors
 
 (define (quoted? e)
-  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo)))
+  (memq (car e) '(quote top core globalref line break inert meta inbounds inline noinline loopinfo)))
 (define (quotify e) `',e)
 (define (unquote e)
   (if (and (pair? e) (memq (car e) '(quote inert)))
@@ -393,9 +392,6 @@
 (define (globalref? e)
   (and (pair? e) (eq? (car e) 'globalref)))
 
-(define (outerref? e)
-  (and (pair? e) (eq? (car e) 'outerref)))
-
 (define (nothing? e)
   (and (pair? e) (eq? (car e) 'null)))
 
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index b0536bef24e27..7fbd555758675 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -69,6 +69,7 @@ DECLARE_BUILTIN(svec);
 DECLARE_BUILTIN(swapfield);
 DECLARE_BUILTIN(swapglobal);
 DECLARE_BUILTIN(throw);
+DECLARE_BUILTIN(throw_methoderror);
 DECLARE_BUILTIN(tuple);
 DECLARE_BUILTIN(typeassert);
 DECLARE_BUILTIN(typeof);
@@ -79,7 +80,6 @@ JL_CALLABLE(jl_f__primitivetype);
 JL_CALLABLE(jl_f__setsuper);
 JL_CALLABLE(jl_f__equiv_typedef);
 JL_CALLABLE(jl_f_get_binding_type);
-JL_CALLABLE(jl_f_set_binding_type);
 JL_CALLABLE(jl_f__compute_sparams);
 JL_CALLABLE(jl_f__svec_ref);
 #ifdef __cplusplus
diff --git a/src/builtins.c b/src/builtins.c
index 29aec53ae5a40..75c4d02c898b2 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -115,7 +115,7 @@ static int NOINLINE compare_fields(const jl_value_t *a, const jl_value_t *b, jl_
                     continue; // skip this field (it is #undef)
                 }
             }
-            if (!ft->layout->flags.haspadding) {
+            if (!ft->layout->flags.haspadding && ft->layout->flags.isbitsegal) {
                 if (!bits_equal(ao, bo, ft->layout->size))
                     return 0;
             }
@@ -284,7 +284,7 @@ inline int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t
     if (sz == 0)
         return 1;
     size_t nf = jl_datatype_nfields(dt);
-    if (nf == 0 || !dt->layout->flags.haspadding)
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->flags.isbitsegal))
         return bits_equal(a, b, sz);
     return compare_fields(a, b, dt);
 }
@@ -394,7 +394,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     if (sz == 0)
         return ~h;
     size_t f, nf = jl_datatype_nfields(dt);
-    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->npointers == 0)) {
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->flags.isbitsegal && dt->layout->npointers == 0)) {
         // operate element-wise if there are unused bits inside,
         // otherwise just take the whole data block at once
         // a few select pointers (notably symbol) also have special hash values
@@ -580,6 +580,14 @@ JL_CALLABLE(jl_f_throw)
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f_throw_methoderror)
+{
+    JL_NARGSV(throw_methoderror, 1);
+    size_t world = jl_get_tls_world_age();
+    jl_method_error(args[0], &args[1], nargs, world);
+    return jl_nothing;
+}
+
 JL_CALLABLE(jl_f_ifelse)
 {
     JL_NARGS(ifelse, 3, 3);
@@ -1219,6 +1227,8 @@ static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
             tt = ((jl_tvar_t*)tt)->ub;
         if (tt == (jl_value_t*)jl_any_type)
             return (jl_value_t*)jl_any_type;
+        if (tt == (jl_value_t*)jl_bottom_type)
+            return (jl_value_t*)jl_bottom_type;
         JL_GC_PUSH1(&f);
         if (jl_is_symbol(f))
             f = jl_box_long(field_index+1);
@@ -1276,7 +1286,7 @@ JL_CALLABLE(jl_f_isdefined)
             order = jl_memory_order_unordered;
         if (order < jl_memory_order_unordered)
             jl_atomic_error("isdefined: module binding cannot be accessed non-atomically");
-        int bound = jl_boundp(m, s); // seq_cst always
+        int bound = jl_boundp(m, s, 1); // seq_cst always
         return bound ? jl_true : jl_false;
     }
     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
@@ -1353,7 +1363,7 @@ JL_CALLABLE(jl_f_setglobal)
         jl_atomic_error("setglobal!: module binding cannot be written non-atomically");
     else if (order >= jl_memory_order_seq_cst)
         jl_fence();
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     jl_checked_assignment(b, mod, var, args[2]); // release store
     if (order >= jl_memory_order_seq_cst)
         jl_fence();
@@ -1367,40 +1377,10 @@ JL_CALLABLE(jl_f_get_binding_type)
     jl_sym_t *var = (jl_sym_t*)args[1];
     JL_TYPECHK(get_binding_type, module, (jl_value_t*)mod);
     JL_TYPECHK(get_binding_type, symbol, (jl_value_t*)var);
-    jl_value_t *ty = jl_get_binding_type(mod, var);
-    if (ty == (jl_value_t*)jl_nothing) {
-        jl_binding_t *b = jl_get_module_binding(mod, var, 0);
-        if (b == NULL)
-            return (jl_value_t*)jl_any_type;
-        jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-        if (b2 != b)
-            return (jl_value_t*)jl_any_type;
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-        return jl_atomic_load_relaxed(&b->ty);
-    }
-    return ty;
-}
-
-JL_CALLABLE(jl_f_set_binding_type)
-{
-    JL_NARGS(set_binding_type!, 2, 3);
-    jl_module_t *m = (jl_module_t*)args[0];
-    jl_sym_t *s = (jl_sym_t*)args[1];
-    JL_TYPECHK(set_binding_type!, module, (jl_value_t*)m);
-    JL_TYPECHK(set_binding_type!, symbol, (jl_value_t*)s);
-    jl_value_t *ty = nargs == 2 ? (jl_value_t*)jl_any_type : args[2];
-    JL_TYPECHK(set_binding_type!, type, ty);
-    jl_binding_t *b = jl_get_binding_wr(m, s);
-    jl_value_t *old_ty = NULL;
-    if (jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty)) {
-        jl_gc_wb(b, ty);
-    }
-    else if (nargs != 2 && !jl_types_equal(ty, old_ty)) {
-        jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
-                  jl_symbol_name(m->name), jl_symbol_name(s));
-    }
-    return jl_nothing;
+    jl_value_t *ret = jl_get_binding_type(mod, var);
+    if (ret == jl_nothing)
+        return (jl_value_t*)jl_any_type;
+    return ret;
 }
 
 JL_CALLABLE(jl_f_swapglobal)
@@ -1418,7 +1398,7 @@ JL_CALLABLE(jl_f_swapglobal)
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("swapglobal!: module binding cannot be written non-atomically");
     // is seq_cst already, no fence needed
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     return jl_checked_swap(b, mod, var, args[2]);
 }
 
@@ -1436,7 +1416,7 @@ JL_CALLABLE(jl_f_modifyglobal)
     JL_TYPECHK(modifyglobal!, symbol, (jl_value_t*)var);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("modifyglobal!: module binding cannot be written non-atomically");
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     // is seq_cst already, no fence needed
     return jl_checked_modify(b, mod, var, args[2], args[3]);
 }
@@ -1465,7 +1445,7 @@ JL_CALLABLE(jl_f_replaceglobal)
         jl_atomic_error("replaceglobal!: module binding cannot be written non-atomically");
     if (failure_order == jl_memory_order_notatomic)
         jl_atomic_error("replaceglobal!: module binding cannot be accessed non-atomically");
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     // is seq_cst already, no fence needed
     return jl_checked_replace(b, mod, var, args[2], args[3]);
 }
@@ -1494,7 +1474,7 @@ JL_CALLABLE(jl_f_setglobalonce)
         jl_atomic_error("setglobalonce!: module binding cannot be written non-atomically");
     if (failure_order == jl_memory_order_notatomic)
         jl_atomic_error("setglobalonce!: module binding cannot be accessed non-atomically");
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     // is seq_cst already, no fence needed
     jl_value_t *old = jl_checked_assignonce(b, mod, var, args[2]);
     return old == NULL ? jl_true : jl_false;
@@ -1562,11 +1542,11 @@ JL_CALLABLE(jl_f_apply_type)
         jl_vararg_t *vm = (jl_vararg_t*)args[0];
         if (!vm->T) {
             JL_NARGS(apply_type, 2, 3);
-            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1);
+            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1, 0);
         }
         else if (!vm->N) {
             JL_NARGS(apply_type, 2, 2);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1, 0);
         }
     }
     else if (jl_is_unionall(args[0])) {
@@ -2103,6 +2083,12 @@ static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layou
         return references_name(((jl_uniontype_t*)p)->a, name, affects_layout, freevars) ||
                references_name(((jl_uniontype_t*)p)->b, name, affects_layout, freevars);
     }
+    if (jl_is_vararg(p)) {
+        jl_value_t *T = ((jl_vararg_t*)p)->T;
+        jl_value_t *N = ((jl_vararg_t*)p)->N;
+        return (T && references_name(T, name, affects_layout, freevars)) ||
+               (N && references_name(N, name, affects_layout, freevars));
+    }
     if (jl_is_typevar(p))
         return 0; // already checked by unionall, if applicable
     if (jl_is_datatype(p)) {
@@ -2219,6 +2205,9 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     JL_GC_PUSH2(&a, &b);
     a = jl_rewrap_unionall((jl_value_t*)dta->super, dta->name->wrapper);
     b = jl_rewrap_unionall((jl_value_t*)dtb->super, dtb->name->wrapper);
+    // if tb recursively refers to itself in its supertype, assume that it refers to ta
+    // before checking whether the supertypes are equal
+    b = jl_substitute_datatype(b, dtb, dta);
     if (!jl_types_equal(a, b))
         goto no;
     JL_TRY {
@@ -2414,14 +2403,13 @@ void jl_init_primitives(void) JL_GC_DISABLED
     jl_builtin_getglobal = add_builtin_func("getglobal", jl_f_getglobal);
     jl_builtin_setglobal = add_builtin_func("setglobal!", jl_f_setglobal);
     add_builtin_func("get_binding_type", jl_f_get_binding_type);
-    add_builtin_func("set_binding_type!", jl_f_set_binding_type);
     jl_builtin_swapglobal = add_builtin_func("swapglobal!", jl_f_swapglobal);
     jl_builtin_replaceglobal = add_builtin_func("replaceglobal!", jl_f_replaceglobal);
     jl_builtin_modifyglobal = add_builtin_func("modifyglobal!", jl_f_modifyglobal);
     jl_builtin_setglobalonce = add_builtin_func("setglobalonce!", jl_f_setglobalonce);
 
     // memory primitives
-    jl_builtin_memoryref = add_builtin_func("memoryref", jl_f_memoryref);
+    jl_builtin_memoryref = add_builtin_func("memoryrefnew", jl_f_memoryref);
     jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset);
     jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget);
     jl_builtin_memoryrefset = add_builtin_func("memoryrefset!", jl_f_memoryrefset);
@@ -2457,6 +2445,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin_func("_compute_sparams", jl_f__compute_sparams);
     add_builtin_func("_svec_ref", jl_f__svec_ref);
     add_builtin_func("current_scope", jl_f_current_scope);
+    add_builtin_func("throw_methoderror", jl_f_throw_methoderror);
 
     // builtin types
     add_builtin("Any", (jl_value_t*)jl_any_type);
@@ -2472,7 +2461,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Tuple", (jl_value_t*)jl_anytuple_type);
     add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type);
     add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type);
-    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0));
+    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0, 0));
 
     add_builtin("Module", (jl_value_t*)jl_module_type);
     add_builtin("MethodTable", (jl_value_t*)jl_methtable_type);
@@ -2510,7 +2499,8 @@ void jl_init_primitives(void) JL_GC_DISABLED
 
     add_builtin("Expr", (jl_value_t*)jl_expr_type);
     add_builtin("LineNumberNode", (jl_value_t*)jl_linenumbernode_type);
-    add_builtin("LineInfoNode", (jl_value_t*)jl_lineinfonode_type);
+    add_builtin("LegacyLineInfoNode", (jl_value_t*)jl_lineinfonode_type);
+    add_builtin("DebugInfo", (jl_value_t*)jl_debuginfo_type);
     add_builtin("GotoNode", (jl_value_t*)jl_gotonode_type);
     add_builtin("GotoIfNot", (jl_value_t*)jl_gotoifnot_type);
     add_builtin("EnterNode", (jl_value_t*)jl_enternode_type);
@@ -2522,6 +2512,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("QuoteNode", (jl_value_t*)jl_quotenode_type);
     add_builtin("NewvarNode", (jl_value_t*)jl_newvarnode_type);
     add_builtin("Binding", (jl_value_t*)jl_binding_type);
+    add_builtin("BindingPartition", (jl_value_t*)jl_binding_partition_type);
     add_builtin("GlobalRef", (jl_value_t*)jl_globalref_type);
     add_builtin("NamedTuple", (jl_value_t*)jl_namedtuple_type);
 
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 0fcc838905004..e336de8e3574f 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -22,6 +22,10 @@ TRANSFORMED_CCALL_STAT(jl_cpu_wake);
 TRANSFORMED_CCALL_STAT(jl_gc_safepoint);
 TRANSFORMED_CCALL_STAT(jl_get_ptls_states);
 TRANSFORMED_CCALL_STAT(jl_threadid);
+TRANSFORMED_CCALL_STAT(jl_get_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_set_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_get_tls_world_age);
+TRANSFORMED_CCALL_STAT(jl_get_world_counter);
 TRANSFORMED_CCALL_STAT(jl_gc_enable_disable_finalizers_internal);
 TRANSFORMED_CCALL_STAT(jl_get_current_task);
 TRANSFORMED_CCALL_STAT(jl_set_next_task);
@@ -84,9 +88,9 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
         runtime_lib = true;
         auto &libgv = ctx.emission_context.libMapGV[f_lib];
         if (libgv.first == NULL) {
-            libptrgv = new GlobalVariable(*M, getInt8PtrTy(M->getContext()), false,
+            libptrgv = new GlobalVariable(*M, getPointerTy(M->getContext()), false,
                                           GlobalVariable::ExternalLinkage,
-                                          Constant::getNullValue(getInt8PtrTy(M->getContext())), name);
+                                          Constant::getNullValue(getPointerTy(M->getContext())), name);
             libgv.first = libptrgv;
         }
         else {
@@ -168,7 +172,7 @@ static Value *runtime_sym_lookup(
         }
         else {
             // f_lib is actually one of the special sentinel values
-            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
+            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getPointerTy(irbuilder.getContext()));
         }
         auto lookup = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
                     { libname, nameval, libptrgv });
@@ -185,7 +189,7 @@ static Value *runtime_sym_lookup(
     p->addIncoming(llvmf_orig, enter_bb);
     p->addIncoming(llvmf, llvmf->getParent());
     setName(emission_context, p, f_name);
-    return irbuilder.CreateBitCast(p, funcptype);
+    return p;
 }
 
 static Value *runtime_sym_lookup(
@@ -243,7 +247,7 @@ static GlobalVariable *emit_plt_thunk(
     auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
     GlobalVariable *got = new GlobalVariable(*M, T_pvoidfunc, false,
                                              GlobalVariable::ExternalLinkage,
-                                             ConstantExpr::getBitCast(plt, T_pvoidfunc),
+                                             plt,
                                              fname + "_got");
     if (runtime_lib) {
         got->addAttribute("julia.libname", f_lib);
@@ -255,12 +259,14 @@ static GlobalVariable *emit_plt_thunk(
     IRBuilder<> irbuilder(b0);
     Value *ptr = runtime_sym_lookup(ctx.emission_context, irbuilder, NULL, funcptype, f_lib, NULL, f_name, plt, libptrgv,
                                     llvmgv, runtime_lib);
-    StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, Align(sizeof(void*)));
+    StoreInst *store = irbuilder.CreateAlignedStore(ptr, got, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     SmallVector<Value*, 16> args;
     for (auto &arg : plt->args())
         args.push_back(&arg);
+    #if JL_LLVM_VERSION < 170000
     assert(cast<PointerType>(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype));
+    #endif
     CallInst *ret = irbuilder.CreateCall(
         functype,
         ptr, ArrayRef<Value*>(args));
@@ -308,7 +314,6 @@ static Value *emit_plt(
     GlobalVariable *libptrgv;
     GlobalVariable *llvmgv;
     bool runtime_lib = runtime_sym_gvs(ctx, f_lib, f_name, libptrgv, llvmgv);
-    PointerType *funcptype = PointerType::get(functype, 0);
 
     auto &pltMap = ctx.emission_context.allPltMap[attrs];
     auto key = std::make_tuple(llvmgv, functype, cc);
@@ -325,7 +330,7 @@ static Value *emit_plt(
     // since the only thing we do to this loaded pointer is to call it
     // immediately.
     got_val->setAtomic(AtomicOrdering::Unordered);
-    return ctx.builder.CreateBitCast(got_val, funcptype);
+    return got_val;
 }
 
 // --- ABI Implementations ---
@@ -370,18 +375,18 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
 
 #if defined ABI_LLVM
   typedef ABI_LLVMLayout DefaultAbiState;
-#elif defined _CPU_X86_64_
-#  if defined _OS_WINDOWS_
+#elif defined _OS_WINDOWS_
+#  if defined _CPU_X86_64_
      typedef ABI_Win64Layout DefaultAbiState;
-#  else
-     typedef ABI_x86_64Layout DefaultAbiState;
-#  endif
-#elif defined _CPU_X86_
-#  if defined _OS_WINDOWS_
+#  elif defined _CPU_X86_
      typedef ABI_Win32Layout DefaultAbiState;
 #  else
-     typedef ABI_x86Layout DefaultAbiState;
+#    error Windows is currently only supported on x86 and x86_64
 #  endif
+#elif defined _CPU_X86_64_
+  typedef ABI_x86_64Layout DefaultAbiState;
+#elif defined _CPU_X86_
+  typedef ABI_x86Layout DefaultAbiState;
 #elif defined _CPU_ARM_
   typedef ABI_ARMLayout DefaultAbiState;
 #elif defined _CPU_AARCH64_
@@ -408,7 +413,7 @@ static Value *llvm_type_rewrite(
 
     assert(from_type->isPointerTy() == target_type->isPointerTy()); // expect that all ABIs consider all pointers to be equivalent
     if (target_type->isPointerTy())
-        return emit_bitcast(ctx, v, target_type);
+        return v;
 
     // simple integer and float widening & conversion cases
     if (from_type->getPrimitiveSizeInBits() > 0 &&
@@ -441,16 +446,14 @@ static Value *llvm_type_rewrite(
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type));
     if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
-        to = emit_static_alloca(ctx, target_type);
+        to = emit_static_alloca(ctx, target_type, align);
         setName(ctx.emission_context, to, "type_rewrite_buffer");
-        cast<AllocaInst>(to)->setAlignment(align);
-        from = emit_bitcast(ctx, to, from_type->getPointerTo());
+        from = to;
     }
     else {
-        from = emit_static_alloca(ctx, from_type);
+        from = emit_static_alloca(ctx, from_type, align);
         setName(ctx.emission_context, from, "type_rewrite_buffer");
-        cast<AllocaInst>(from)->setAlignment(align);
-        to = emit_bitcast(ctx, from, target_type->getPointerTo());
+        to = from;
     }
     ctx.builder.CreateAlignedStore(v, from, align);
     auto pun = ctx.builder.CreateAlignedLoad(target_type, to, align);
@@ -550,9 +553,8 @@ static Value *julia_to_native(
 
     // pass the address of an alloca'd thing, not a box
     // since those are immutable.
-    Value *slot = emit_static_alloca(ctx, to);
-    unsigned align = julia_alignment(jlto);
-    cast<AllocaInst>(slot)->setAlignment(Align(align));
+    Align align(julia_alignment(jlto));
+    Value *slot = emit_static_alloca(ctx, to, align);
     setName(ctx.emission_context, slot, "native_convert_buffer");
     if (!jvinfo.ispointer()) {
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
@@ -612,11 +614,13 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         jl_cgval_t arg1 = emit_expr(ctx, arg);
         jl_value_t *ptr_ty = arg1.typ;
         if (!jl_is_cpointer_type(ptr_ty)) {
+            if (!ccall)
+                return;
             const char *errmsg = invalid_symbol_err_msg(ccall);
             emit_cpointercheck(ctx, arg1, errmsg);
         }
         arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
-        jl_ptr = emit_unbox(ctx, ctx.types().T_size, arg1, (jl_value_t*)jl_voidpointer_type);
+        jl_ptr = emit_unbox(ctx, ctx.types().T_ptr, arg1, (jl_value_t*)jl_voidpointer_type);
     }
     else {
         out.gcroot = ptr;
@@ -696,35 +700,34 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     else {
         rt = (jl_value_t*)jl_voidpointer_type;
     }
-    Type *lrt = ctx.types().T_size;
+    Type *lrt = ctx.types().T_ptr;
     assert(lrt == julia_type_to_llvm(ctx, rt));
 
     interpret_symbol_arg(ctx, sym, args[1], /*ccall=*/false, false);
 
-    if (sym.f_name == NULL && sym.fptr == NULL && sym.jl_ptr == NULL && sym.gcroot != NULL) {
-        const char *errmsg = invalid_symbol_err_msg(/*ccall=*/false);
-        jl_cgval_t arg1 = emit_expr(ctx, args[1]);
-        emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
-        JL_GC_POP();
-        return jl_cgval_t();
-    }
-
     if (sym.jl_ptr != NULL) {
-        res = ctx.builder.CreateBitCast(sym.jl_ptr, lrt);
+        res = sym.jl_ptr;
     }
     else if (sym.fptr != NULL) {
         res = ConstantInt::get(lrt, (uint64_t)sym.fptr);
         if (ctx.emission_context.imaging_mode)
             jl_printf(JL_STDERR,"WARNING: literal address used in cglobal for %s; code cannot be statically compiled\n", sym.f_name);
     }
-    else {
+    else if (sym.f_name != NULL) {
         if (sym.lib_expr) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), NULL, sym.lib_expr, sym.f_name, ctx.f);
+            res = runtime_sym_lookup(ctx, getPointerTy(ctx.builder.getContext()), NULL, sym.lib_expr, sym.f_name, ctx.f);
         }
         else /*if (ctx.emission_context.imaging) */{
-            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
-            res = ctx.builder.CreatePtrToInt(res, lrt);
+            res = runtime_sym_lookup(ctx, getPointerTy(ctx.builder.getContext()), sym.f_lib, NULL, sym.f_name, ctx.f);
         }
+    } else {
+        // Fall back to runtime intrinsic
+        JL_GC_POP();
+        jl_cgval_t argv[2];
+        argv[0] = emit_expr(ctx, args[1]);
+        if (nargs == 2)
+            argv[1] = emit_expr(ctx, args[2]);
+        return emit_runtime_call(ctx, nargs == 1 ? JL_I::cglobal_auto : JL_I::cglobal, argv, nargs);
     }
 
     JL_GC_POP();
@@ -814,18 +817,13 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     JL_TYPECHK(llvmcall, type, rt);
     JL_TYPECHK(llvmcall, type, at);
 
-    // Generate arguments
-    std::string arguments;
-    raw_string_ostream argstream(arguments);
-    jl_svec_t *tt = ((jl_datatype_t*)at)->parameters;
-    jl_value_t *rtt = rt;
+    // Determine argument types
+    //
+    // Semantics for arguments are as follows:
+    // If the argument type is immutable (including bitstype), we pass the loaded llvm value
+    // type. Otherwise we pass a pointer to a jl_value_t.
+    jl_svec_t *tt = ((jl_datatype_t *)at)->parameters;
     size_t nargt = jl_svec_len(tt);
-
-    /*
-     * Semantics for arguments are as follows:
-     * If the argument type is immutable (including bitstype), we pass the loaded llvm value
-     * type. Otherwise we pass a pointer to a jl_value_t.
-     */
     SmallVector<llvm::Type*, 0> argtypes;
     SmallVector<Value *, 8> argvals(nargt);
     for (size_t i = 0; i < nargt; ++i) {
@@ -846,45 +844,87 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         argvals[i] = llvm_type_rewrite(ctx, v, t, issigned);
     }
 
+    // Determine return type
+    jl_value_t *rtt = rt;
     bool retboxed;
     Type *rettype = julia_type_to_llvm(ctx, rtt, &retboxed);
 
     // Make sure to find a unique name
     std::string ir_name;
     while (true) {
-        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+        raw_string_ostream(ir_name)
+            << (ctx.f->getName().str()) << "u"
+            << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         if (jl_Module->getFunction(ir_name) == NULL)
             break;
     }
 
     // generate a temporary module that contains our IR
     std::unique_ptr<Module> Mod;
+    Function *f;
     if (entry == NULL) {
         // we only have function IR, which we should put in a function
 
-        bool first = true;
+        // stringify arguments
+        std::string arguments;
+        raw_string_ostream argstream(arguments);
         for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
-            if (!first)
+            if (it != argtypes.begin())
                 argstream << ",";
-            else
-                first = false;
             (*it)->print(argstream);
             argstream << " ";
         }
 
+        // stringify return type
         std::string rstring;
         raw_string_ostream rtypename(rstring);
         rettype->print(rtypename);
-        std::map<uint64_t,std::string> localDecls;
 
+        // generate IR function definition
         std::string ir_string;
         raw_string_ostream ir_stream(ir_string);
-        ir_stream << "; Number of arguments: " << nargt << "\n"
-        << "define "<<rtypename.str()<<" @\"" << ir_name << "\"("<<argstream.str()<<") {\n"
-        << jl_string_data(ir) << "\n}";
+        ir_stream << "define " << rtypename.str() << " @\"" << ir_name << "\"("
+                  << argstream.str() << ") {\n"
+                  << jl_string_data(ir) << "\n}";
 
         SMDiagnostic Err = SMDiagnostic();
         Mod = parseAssemblyString(ir_stream.str(), Err, ctx.builder.getContext());
+
+        // backwards compatibility: support for IR with integer pointers
+        if (!Mod) {
+            std::string compat_arguments;
+            raw_string_ostream compat_argstream(compat_arguments);
+            for (size_t i = 0; i < nargt; ++i) {
+                if (i > 0)
+                    compat_argstream << ",";
+                jl_value_t *tti = jl_svecref(tt, i);
+                Type *t;
+                if (jl_is_cpointer_type(tti))
+                    t = ctx.types().T_size;
+                else
+                    t = argtypes[i];
+                t->print(compat_argstream);
+                compat_argstream << " ";
+            }
+
+            std::string compat_rstring;
+            raw_string_ostream compat_rtypename(compat_rstring);
+            if (jl_is_cpointer_type(rtt))
+                ctx.types().T_size->print(compat_rtypename);
+            else
+                rettype->print(compat_rtypename);
+
+            std::string compat_ir_string;
+            raw_string_ostream compat_ir_stream(compat_ir_string);
+            compat_ir_stream << "define " << compat_rtypename.str() << " @\"" << ir_name
+                             << "\"(" << compat_argstream.str() << ") {\n"
+                             << jl_string_data(ir) << "\n}";
+
+            SMDiagnostic Err = SMDiagnostic();
+            Mod =
+                parseAssemblyString(compat_ir_stream.str(), Err, ctx.builder.getContext());
+        }
+
         if (!Mod) {
             std::string message = "Failed to parse LLVM assembly: \n";
             raw_string_ostream stream(message);
@@ -894,7 +934,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             return jl_cgval_t();
         }
 
-        Function *f = Mod->getFunction(ir_name);
+        f = Mod->getFunction(ir_name);
         f->addFnAttr(Attribute::AlwaysInline);
     }
     else {
@@ -932,21 +972,96 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             Mod = std::move(ModuleOrErr.get());
         }
 
-        Function *f = Mod->getFunction(jl_string_data(entry));
+        f = Mod->getFunction(jl_string_data(entry));
         if (!f) {
             emit_error(ctx, "Module IR does not contain specified entry function");
             JL_GC_POP();
             return jl_cgval_t();
         }
+        assert(!f->isDeclaration());
         f->setName(ir_name);
+    }
 
-        // verify the function type
-        assert(!f->isDeclaration());
-        assert(f->getReturnType() == rettype);
-        int i = 0;
-        for (SmallVector<Type *, 0>::iterator it = argtypes.begin();
-            it != argtypes.end(); ++it, ++i)
-            assert(*it == f->getFunctionType()->getParamType(i));
+    // backwards compatibility: support for IR with integer pointers
+    bool mismatched_pointers = false;
+    for (size_t i = 0; i < nargt; ++i) {
+        jl_value_t *tti = jl_svecref(tt, i);
+        if (jl_is_cpointer_type(tti) &&
+            !f->getFunctionType()->getParamType(i)->isPointerTy()) {
+            mismatched_pointers = true;
+            break;
+        }
+    }
+    if (mismatched_pointers) {
+        if (jl_options.depwarn) {
+            if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
+                jl_error("llvmcall with integer pointers is deprecated, "
+                         "use an actual pointer type instead.");
+
+            // ensure we only depwarn once per method
+            // TODO: lift this into a reusable codegen-level depwarn utility
+            static std::set<jl_method_t*> llvmcall_depwarns;
+            jl_method_t *m = ctx.linfo->def.method;
+            if (llvmcall_depwarns.find(m) == llvmcall_depwarns.end()) {
+                llvmcall_depwarns.insert(m);
+                jl_printf(JL_STDERR,
+                        "WARNING: llvmcall with integer pointers is deprecated.\n"
+                        "Use actual pointers instead, replacing i32 or i64 with i8* or ptr\n"
+                        "in ");
+                jl_static_show(JL_STDERR, (jl_value_t*) ctx.linfo->def.method);
+                jl_printf(JL_STDERR, " at %s\n", ctx.file.str().c_str());
+            }
+        }
+
+        // wrap the function, performing the necessary pointer conversion
+
+        Function *inner = f;
+        inner->setName(ir_name + ".inner");
+
+        FunctionType *wrapper_ft = FunctionType::get(rettype, argtypes, false);
+        Function *wrapper =
+            Function::Create(wrapper_ft, inner->getLinkage(), ir_name, *Mod);
+
+        wrapper->copyAttributesFrom(inner);
+        inner->addFnAttr(Attribute::AlwaysInline);
+
+        BasicBlock *entry = BasicBlock::Create(ctx.builder.getContext(), "", wrapper);
+        IRBuilder<> irbuilder(entry);
+        SmallVector<Value *, 0> wrapper_args;
+        for (size_t i = 0; i < nargt; ++i) {
+            jl_value_t *tti = jl_svecref(tt, i);
+            Value *v = wrapper->getArg(i);
+            if (jl_is_cpointer_type(tti))
+                v = irbuilder.CreatePtrToInt(v, ctx.types().T_size);
+            wrapper_args.push_back(v);
+        }
+        Value *call = irbuilder.CreateCall(inner, wrapper_args);
+        // check if void
+        if (rettype->isVoidTy())
+            irbuilder.CreateRetVoid();
+        else {
+            if (jl_is_cpointer_type(rtt))
+                call = irbuilder.CreateIntToPtr(call, ctx.types().T_ptr);
+            irbuilder.CreateRet(call);
+        }
+
+        f = wrapper;
+    }
+
+    // verify the function type
+    assert(f->getReturnType() == rettype);
+    int i = 0;
+    for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end();
+         ++it, ++i) {
+        if (*it != f->getFunctionType()->getParamType(i)) {
+            std::string message;
+            raw_string_ostream stream(message);
+            stream << "Malformed llvmcall: argument " << i + 1 << " type "
+                   << *f->getFunctionType()->getParamType(i)
+                   << " does not match expected argument type " << **it;
+            emit_error(ctx, stream.str());
+            return jl_cgval_t();
+        }
     }
 
     // copy module properties that should always match
@@ -984,7 +1099,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     if (inst->getType() != rettype) {
         std::string message;
         raw_string_ostream stream(message);
-        stream << "llvmcall return type " << *inst->getType()
+        stream << "Malformed llvmcall: return type " << *inst->getType()
                << " does not match declared return type" << *rettype;
         emit_error(ctx, stream.str());
         return jl_cgval_t();
@@ -1116,7 +1231,7 @@ std::string generate_func_sig(const char *fname)
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            t = getInt8PtrTy(LLVMCtx);
+            t = getPointerTy(LLVMCtx);
             isboxed = false;
         }
         else if (llvmcall && jl_is_llvmpointer_type(tti)) {
@@ -1462,14 +1577,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     (void)isVa; // prevent compiler warning
     if (is_libjulia_func(jl_value_ptr)) {
         ++CCALL_STAT(jl_value_ptr);
-        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_size);
+        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
         jl_value_t *tti = jl_svecref(at, 0);
         Type *largty;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            largty = ctx.types().T_size;
+            largty = ctx.types().T_ptr;
             isboxed = false;
         }
         else {
@@ -1478,11 +1593,10 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         Value *retval;
         if (isboxed) {
             retval = boxed(ctx, argv[0]);
-            retval = emit_pointer_from_objref(ctx, emit_bitcast(ctx, retval, ctx.types().T_prjlvalue));
+            retval = emit_pointer_from_objref(ctx, retval /*T_prjlvalue*/);
         }
         else {
             retval = emit_unbox(ctx, largty, argv[0], tti);
-            retval = emit_inttoptr(ctx, retval, ctx.types().T_pjlvalue);
         }
         // retval is now an untracked jl_value_t*
         if (retboxed)
@@ -1556,23 +1670,20 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func("jl_get_ptls_states")) {
+    else if (is_libjulia_func(jl_get_ptls_states)) {
         ++CCALL_STAT(jl_get_ptls_states);
-        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        return mark_or_box_ccall_result(ctx,
-            ctx.builder.CreatePtrToInt(get_current_ptls(ctx), lrt),
-            retboxed, rt, unionall, static_rt);
+        return mark_or_box_ccall_result(ctx, get_current_ptls(ctx), retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_threadid)) {
         ++CCALL_STAT(jl_threadid);
         assert(lrt == getInt16Ty(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext()));
+        Value *ptask = get_current_task(ctx);
         const int tid_offset = offsetof(jl_task_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t)));
+        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptask, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int8_t)));
         setName(ctx.emission_context, ptid, "thread_id_ptr");
         LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t)));
         setName(ctx.emission_context, tid, "thread_id");
@@ -1580,15 +1691,83 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ai.decorateInst(tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_get_ptls_rng)) {
+        ++CCALL_STAT(jl_get_ptls_rng);
+        assert(lrt == getInt64Ty(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        LoadInst *rng_value = ctx.builder.CreateAlignedLoad(getInt64Ty(ctx.builder.getContext()), rng_ptr, Align(sizeof(void*)));
+        setName(ctx.emission_context, rng_value, "rngseed");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(rng_value);
+        return mark_or_box_ccall_result(ctx, rng_value, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(jl_set_ptls_rng)) {
+        ++CCALL_STAT(jl_set_ptls_rng);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        assert(argv[0].V->getType() == getInt64Ty(ctx.builder.getContext()));
+        auto store = ctx.builder.CreateAlignedStore(argv[0].V, rng_ptr, Align(sizeof(void*)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(store);
+        return ghostValue(ctx, jl_nothing_type);
+    }
+    else if (is_libjulia_func(jl_get_tls_world_age)) {
+        bool toplevel = !(ctx.linfo && jl_is_method(ctx.linfo->def.method));
+        if (!toplevel) { // top level code does not see a stable world age during execution
+            ++CCALL_STAT(jl_get_tls_world_age);
+            assert(lrt == ctx.types().T_size);
+            assert(!isVa && !llvmcall && nccallargs == 0);
+            JL_GC_POP();
+            Instruction *world_age = cast<Instruction>(ctx.world_age_at_entry);
+            setName(ctx.emission_context, world_age, "task_world_age");
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+            ai.decorateInst(world_age);
+            return mark_or_box_ccall_result(ctx, world_age, retboxed, rt, unionall, static_rt);
+        }
+    }
+    else if (is_libjulia_func(jl_get_world_counter)) {
+        ++CCALL_STAT(jl_get_world_counter);
+        assert(lrt == ctx.types().T_size);
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+
+        // jl_task_t *ct = jl_current_task;
+        // if (ct->ptls->in_pure_callback)
+        //     return ~(size_t)0;
+        // return jl_atomic_load_acquire(&jl_world_counter);
+        Type *T_int16 = getInt16Ty(ctx.builder.getContext());
+        Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_tls_states_t, in_pure_callback) / sizeof(int16_t));
+        Value *field_ptr = ctx.builder.CreateInBoundsGEP(T_int16, get_current_ptls(ctx), offset);
+        Instruction *in_pure_callback = ai.decorateInst(ctx.builder.CreateAlignedLoad(T_int16,
+            field_ptr, Align(sizeof(int16_t)), "in_pure_callback"));
+        Value *cond = ctx.builder.CreateICmpEQ(in_pure_callback, ConstantInt::get(T_int16, 0));
+
+        Value *world_counter = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
+        cast<LoadInst>(world_counter)->setOrdering(AtomicOrdering::Acquire);
+        Value *ret = ctx.builder.CreateSelect(cond, world_counter, ConstantInt::get(ctx.types().T_size, ~(size_t)0));
+        return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
+    }
     else if (is_libjulia_func(jl_gc_disable_finalizers_internal)
 #ifdef NDEBUG
              || is_libjulia_func(jl_gc_enable_finalizers_internal)
 #endif
              ) {
         JL_GC_POP();
-        Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext()));
+        Value *ptls_p = get_current_ptls(ctx);
         const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
-        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4));
+        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, finh_offset / sizeof(int8_t)));
         setName(ctx.emission_context, pfinh, "finalizers_inhibited_ptr");
         LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t)));
         setName(ctx.emission_context, finh, "finalizers_inhibited");
@@ -1611,7 +1790,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == ctx.types().T_prjlvalue);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        auto ct = track_pjlvalue(ctx, emit_bitcast(ctx, get_current_task(ctx), ctx.types().T_pjlvalue));
+        auto ct = track_pjlvalue(ctx, get_current_task(ctx));
         return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_set_next_task)) {
@@ -1619,7 +1798,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue);
+        Value *ptls_pv = get_current_ptls(ctx);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
         Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*)));
         setName(ctx.emission_context, pnt, "next_task_ptr");
@@ -1672,8 +1851,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ctx.builder.SetInsertPoint(checkBB);
         auto signal_page_load = ctx.builder.CreateLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
-                    get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
+                emit_ptrgep(ctx, get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const),
+                    -sizeof(size_t)),
                 true);
         setName(ctx.emission_context, signal_page_load, "signal_page_load");
         ctx.builder.CreateBr(contBB);
@@ -1683,30 +1862,23 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     }
     else if (is_libjulia_func(jl_string_ptr)) {
         ++CCALL_STAT(jl_string_ptr);
-        assert(lrt == ctx.types().T_size);
+        assert(lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
-        auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                ctx.types().T_pprjlvalue);
+        auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
-        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
-        setName(ctx.emission_context, strp, "string_ptr");
+        auto strp = emit_ptrgep(ctx, obj, ctx.types().sizeof_ptr, "string_ptr");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_symbol_name)) {
         ++CCALL_STAT(jl_symbol_name);
-        assert(lrt == ctx.types().T_size);
+        assert(lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
-        auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                ctx.types().T_pprjlvalue);
+        auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
-        setName(ctx.emission_context, strp, "symbol_name");
+        auto strp = emit_ptrgep(ctx, obj, sizeof(jl_sym_t), "symbol_name");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
@@ -1750,14 +1922,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemCpy(
-                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                destp,
                 MaybeAlign(1),
-                emit_inttoptr(ctx,
-                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    getInt8PtrTy(ctx.builder.getContext())),
+                emit_unbox(ctx, ctx.types().T_ptr, src, (jl_value_t*)jl_voidpointer_type),
                 MaybeAlign(1),
                 emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
@@ -1770,11 +1940,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &val = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
         Value *val32 = emit_unbox(ctx, getInt32Ty(ctx.builder.getContext()), val, (jl_value_t*)jl_uint32_type);
         Value *val8 = ctx.builder.CreateTrunc(val32, getInt8Ty(ctx.builder.getContext()), "memset_val");
         ctx.builder.CreateMemSet(
-            emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+            destp,
             val8,
             emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
             MaybeAlign(1)
@@ -1788,14 +1958,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemMove(
-                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                destp,
                 MaybeAlign(0),
-                emit_inttoptr(ctx,
-                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    getInt8PtrTy(ctx.builder.getContext())),
+                emit_unbox(ctx, ctx.types().T_ptr, src, (jl_value_t*)jl_voidpointer_type),
                 MaybeAlign(0),
                 emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
@@ -1810,7 +1978,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (val.typ == (jl_value_t*)jl_symbol_type) {
             JL_GC_POP();
             const int hash_offset = offsetof(jl_sym_t, hash);
-            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo());
+            Value *ph1 = decay_derived(ctx, boxed(ctx, val));
             Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr));
             setName(ctx.emission_context, ph2, "object_id_ptr");
             LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr);
@@ -1822,15 +1990,13 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         else if (!val.isboxed) {
             // If the value is not boxed, try to compute the object id without
             // reboxing it.
-            auto T_pint8_derived = PointerType::get(getInt8Ty(ctx.builder.getContext()), AddressSpace::Derived);
+            auto T_p_derived = PointerType::get(ctx.builder.getContext(), AddressSpace::Derived);
             if (!val.isghost && !val.ispointer())
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
                 emit_typeof(ctx, val, false, true),
-                val.isghost ? ConstantPointerNull::get(T_pint8_derived) :
-                    ctx.builder.CreateBitCast(
-                        decay_derived(ctx, data_pointer(ctx, val)),
-                        T_pint8_derived)
+                val.isghost ? ConstantPointerNull::get(T_p_derived) :
+                        decay_derived(ctx, data_pointer(ctx, val))
             };
             Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), ArrayRef<Value*>(args));
             setName(ctx.emission_context, ret, "object_id");
@@ -1921,10 +2087,10 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     if (sret) {
         assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid");
         if (jl_is_pointerfree(rt)) {
-            result = emit_static_alloca(ctx, lrt);
+            result = emit_static_alloca(ctx, lrt, Align(julia_alignment(rt)));
             setName(ctx.emission_context, result, "ccall_sret");
             sretty = lrt;
-            argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig[0]);
+            argvals[0] = result;
         }
         else {
             // XXX: result needs to be zero'd and given a GC root here
@@ -1935,7 +2101,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             sretty = ctx.types().T_jlvalue;
             sretboxed = true;
             gc_uses.push_back(result);
-            argvals[0] = ctx.builder.CreateBitCast(emit_pointer_from_objref(ctx, result), fargt_sig[0]);
+            argvals[0] = emit_pointer_from_objref(ctx, result);
         }
     }
 
@@ -1967,7 +2133,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (!isa<Function>(llvmf) || cast<Function>(llvmf)->isIntrinsic() || cast<Function>(llvmf)->getFunctionType() != functype)
                     llvmf = NULL;
             }
-            else if (f_name.startswith("llvm.")) {
+            else if (f_name.starts_with("llvm.")) {
                 // compute and verify auto-mangling for intrinsic name
                 auto ID = Function::lookupIntrinsicID(f_name);
                 if (ID != Intrinsic::not_intrinsic) {
@@ -1998,8 +2164,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     else if (symarg.jl_ptr != NULL) {
         ++LiteralCCalls;
         null_pointer_check(ctx, symarg.jl_ptr, nullptr);
-        Type *funcptype = PointerType::get(functype, 0);
-        llvmf = emit_inttoptr(ctx, symarg.jl_ptr, funcptype);
+        llvmf = symarg.jl_ptr;
     }
     else if (symarg.fptr != NULL) {
         ++LiteralCCalls;
@@ -2090,7 +2255,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
                 setName(ctx.emission_context, strct, "ccall_ret_box");
                 MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-                int boxalign = julia_alignment(rt);
+                Align boxalign(julia_alignment(rt));
                 // copy the data from the return value to the new struct
                 const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
@@ -2098,10 +2263,9 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
                     // When this happens, cast through memory.
-                    auto slot = emit_static_alloca(ctx, resultTy);
+                    auto slot = emit_static_alloca(ctx, resultTy, boxalign);
                     setName(ctx.emission_context, slot, "type_pun_slot");
-                    slot->setAlignment(Align(boxalign));
-                    ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
+                    ctx.builder.CreateAlignedStore(result, slot, boxalign);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
                     emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign);
                 }
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index b627224e027a9..c78e6092ca5db 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -25,6 +25,9 @@
 #  include <sys/types.h>
 #  include <sys/resource.h>
 #endif
+#ifdef _OS_OPENBSD_
+#  include <sys/resource.h>
+#endif
 #include "julia_assert.h"
 
 namespace {
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 7bbf7a84a0385..bf5c67ae9f849 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -57,10 +57,14 @@ static Value *maybe_decay_untracked(jl_codectx_t &ctx, Value *V)
 static Value *decay_derived(jl_codectx_t &ctx, Value *V)
 {
     Type *T = V->getType();
-    if (cast<PointerType>(T)->getAddressSpace() == AddressSpace::Derived)
+    if (T->getPointerAddressSpace() == AddressSpace::Derived)
         return V;
     // Once llvm deletes pointer element types, we won't need it here any more either.
+    #if JL_LLVM_VERSION >= 170000
+    Type *NewT = PointerType::get(T, AddressSpace::Derived);
+    #else
     Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
+    #endif
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -68,9 +72,13 @@ static Value *decay_derived(jl_codectx_t &ctx, Value *V)
 static Value *maybe_decay_tracked(jl_codectx_t &ctx, Value *V)
 {
     Type *T = V->getType();
-    if (cast<PointerType>(T)->getAddressSpace() != AddressSpace::Tracked)
+    if (T->getPointerAddressSpace() != AddressSpace::Tracked)
         return V;
+    #if JL_LLVM_VERSION >= 170000
+    Type *NewT = PointerType::get(T, AddressSpace::Derived);
+    #else
     Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
+    #endif
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -122,14 +130,8 @@ static Value *stringConstPtr(
     }
     // Doesn't need to be aligned, we shouldn't operate on these like julia objects
     GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, Align(1), "_j_str_" + StringRef(ctxt.data(), ctxt.size()), *M);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0);
-    Value *Args[] = { zero, zero };
-    auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(),
-                                       // AddrSpaceCast in case globals are in non-0 AS
-                                       irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)),
-                                       Args);
-    setName(emission_context, gep, "string_const_ptr");
-    return gep;
+    // AddrSpaceCast in case globals are in non-0 AS
+    return irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0));
 }
 
 
@@ -295,13 +297,10 @@ void jl_debugcache_t::initialize(Module *m) {
 
 static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
 {
-    unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
+    unsigned AS = V->getType()->getPointerAddressSpace();
     if (AS != AddressSpace::Tracked && AS != AddressSpace::Derived)
         return V;
     V = decay_derived(ctx, V);
-    Type *T = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-    if (V->getType() != T)
-        V = ctx.builder.CreateBitCast(V, T);
     Function *F = prepare_call(pointer_from_objref_func);
     CallInst *Call = ctx.builder.CreateCall(F, V);
     Call->setAttributes(F->getAttributes());
@@ -310,7 +309,7 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
 }
 
 static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile=false);
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, Align alignment, bool isVolatile=false);
 
 static bool type_is_permalloc(jl_value_t *typ)
 {
@@ -563,47 +562,28 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
     return load;
 }
 
-// Returns ctx.types().T_pjlvalue
-static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
-{
-    // emit a pointer to any jl_value_t which will be valid across reloading code
-    if (p == NULL)
-        return Constant::getNullValue(ctx.types().T_pjlvalue);
-    // bindings are prefixed with jl_bnd#
-    jl_globalref_t *gr = p->globalref;
-    Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
-            false, sizeof(jl_binding_t), alignof(jl_binding_t)));
-    setName(ctx.emission_context, load, pgv->getName());
-    return load;
-}
-
 // bitcast a value, but preserve its address space when dealing with pointer types
 static Value *emit_bitcast(jl_codectx_t &ctx, Value *v, Type *jl_value)
 {
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        ++EmittedPointerBitcast;
-        return ctx.builder.CreateBitCast(v, jl_value_addr);
+    if (isa<PointerType>(jl_value)) {
+        return v;
     }
     else {
         return ctx.builder.CreateBitCast(v, jl_value);
     }
 }
 
-static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
-    if (to != V->getType())
-        return emit_bitcast(ctx, V, to);
-    return V;
-}
+// static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
+//     if (isa<PointerType>(to)) {
+//         return V;
+//     }
+//     if (to != V->getType())
+//         return emit_bitcast(ctx, V, to);
+//     return V;
+// }
 
 static Value *julia_binding_pvalue(jl_codectx_t &ctx, Value *bv)
 {
-    bv = emit_bitcast(ctx, bv, ctx.types().T_pprjlvalue);
     Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_binding_t, value) / ctx.types().sizeof_ptr);
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, bv, offset);
 }
@@ -635,19 +615,13 @@ static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byt
     return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset);
 }
 
-static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigned byte_offset)
-{
-    unsigned idx = convert_struct_offset(ctx, lty, byte_offset);
-    return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx);
-}
-
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false);
 
 static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM type
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_concrete_immutable(jt)) {
         if (jl_datatype_nbits(jt) == 0)
@@ -690,6 +664,8 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
         return getDoubleTy(ctxt);
     if (bt == (jl_value_t*)jl_bfloat16_type)
         return getBFloatTy(ctxt);
+    if (jl_is_cpointer_type(bt))
+        return PointerType::get(ctxt, 0);
     if (jl_is_llvmpointer_type(bt)) {
         jl_value_t *as_param = jl_tparam1(bt);
         int as;
@@ -699,7 +675,7 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
             as = jl_unbox_int64(as_param);
         else
             jl_error("invalid pointer address space");
-        return PointerType::get(getInt8Ty(ctxt), as);
+        return PointerType::get(ctxt, as);
     }
     int nb = jl_datatype_size(bt);
     return Type::getIntNTy(ctxt, nb * 8);
@@ -755,7 +731,7 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
     // use this where C-compatible (unboxed) structs are desired
     // use julia_type_to_llvm directly when you want to preserve Julia's type semantics
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_primitivetype(jt))
         return bitstype_to_llvm(jt, ctxt, llvmcall);
@@ -943,6 +919,9 @@ static bool for_each_uniontype_small(
         allunbox &= for_each_uniontype_small(f, ((jl_uniontype_t*)ty)->b, counter);
         return allunbox;
     }
+    else if (ty == (jl_value_t*)jl_typeofbottom_type->super) {
+        f(++counter, jl_typeofbottom_type); // treat Tuple{union{}} as identical to typeof(Union{})
+    }
     else if (jl_is_pointerfree(ty)) {
         f(++counter, (jl_datatype_t*)ty);
         return true;
@@ -1001,11 +980,11 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align_dst, unsigned align_src, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, uint64_t sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (sz == 0)
         return;
-    assert(align_dst && "align must be specified");
+    #if JL_LLVM_VERSION < 170000
     // If the types are small and simple, use load and store directly.
     // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
     // that interferes with other optimizations.
@@ -1037,7 +1016,7 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
             dst = emit_bitcast(ctx, dst, srcty);
         }
         else if (dstel->isSized() && dstel->isSingleValueType() &&
-                 DL.getTypeStoreSize(dstel) == sz) {
+                DL.getTypeStoreSize(dstel) == sz) {
             directel = dstel;
             src = emit_bitcast(ctx, src, dstty);
         }
@@ -1047,11 +1026,12 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
             if (isa<Instruction>(dst) && !dst->hasName())
                 setName(ctx.emission_context, dst, "memcpy_refined_dst");
             auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile));
-            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align_dst), is_volatile));
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, align_dst, is_volatile));
             ++SkippedMemcpys;
             return;
         }
     }
+    #endif
     ++EmittedMemcpys;
 
     // the memcpy intrinsic does not allow to specify different alias tags
@@ -1065,12 +1045,12 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     // above problem won't be as serious.
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, Align(align_dst), src, Align(align_src), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, Value *sz, unsigned align_dst, unsigned align_src, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, Value *sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
         emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile);
@@ -1079,20 +1059,20 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     ++EmittedMemcpys;
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                        jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
+                        jl_aliasinfo_t const &src_ai, T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
     emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
-                        T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
+                        T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
     auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
     emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
@@ -1207,18 +1187,18 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
 
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue);
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
+    Value *Ptr = decay_derived(ctx, dt);
+    unsigned Idx = offsetof(jl_datatype_t, types);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-                ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
+                ctx.types().T_pjlvalue, emit_ptrgep(ctx, Ptr, Idx), Align(sizeof(void*))));
     setName(ctx.emission_context, types, "datatype_types");
     return types;
 }
 
 static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 {
-    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo());
+    Value *type_svec = emit_datatype_types(ctx, dt);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto nfields = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*))));
     setName(ctx.emission_context, nfields, "datatype_nfields");
@@ -1229,17 +1209,14 @@ static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt, bool add_isunion=false)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext())->getPointerTo());
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
-    Ptr = ctx.builder.CreateInBoundsGEP(getInt32PtrTy(ctx.builder.getContext()), Ptr, Idx);
-    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
-    Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
-    Value *SizePtr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx);
+    Value *Ptr = decay_derived(ctx, dt);
+    Ptr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_t, layout));
+    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
+    Value *SizePtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, size));
     Value *Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), SizePtr, Align(sizeof(int32_t))));
     setName(ctx.emission_context, Size, "datatype_size");
     if (add_isunion) {
-        Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, flags) / sizeof(int16_t));
-        Value *FlagPtr = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), emit_bitcast(ctx, Ptr, getInt16PtrTy(ctx.builder.getContext())), Idx);
+        Value *FlagPtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, flags));
         Value *Flag = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), FlagPtr, Align(sizeof(int16_t))));
         Flag = ctx.builder.CreateLShr(Flag, 4);
         Flag = ctx.builder.CreateAnd(Flag, ConstantInt::get(Flag->getType(), 1));
@@ -1301,10 +1278,10 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
 static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppint8);
+    Value *Ptr = decay_derived(ctx, dt);
     Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, name));
     Value *Nam = ai.decorateInst(
-            ctx.builder.CreateAlignedLoad(getInt8PtrTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8PtrTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
+            ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getPointerTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
     Value *Idx2 = ConstantInt::get(ctx.types().T_size, offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
     Value *mutabl = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Nam, Idx2), Align(1)));
@@ -1316,7 +1293,7 @@ static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 {
     Value *isprimitive;
-    isprimitive = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isprimitive = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1)));
     isprimitive = ctx.builder.CreateLShr(isprimitive, 7);
@@ -1328,10 +1305,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
     unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*);
-    Value *vptr = ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_pjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue),
-            ConstantInt::get(ctx.types().T_size, n));
+    Value *vptr = emit_ptrgep(ctx, maybe_decay_tracked(ctx, dt), n * sizeof(jl_value_t*));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
     setName(ctx.emission_context, name, "datatype_name");
@@ -1530,8 +1504,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just
             // we lied a bit: this wasn't really an object (though it was valid for GC rooting)
             // and we need to use it as an index to get the real object now
             Module *M = jl_Module;
-            Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jl_small_typeof_var), tag);
-            smallp = ctx.builder.CreateBitCast(smallp, typetag->getType()->getPointerTo(0));
+            Value *smallp = emit_ptrgep(ctx, prepare_global_in(M, jl_small_typeof_var), tag);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0));
             small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None));
@@ -1592,7 +1565,7 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
 // a simple case of emit_isa that is obvious not to include a safe-point
 static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt, bool could_be_null=false)
 {
-    assert(jl_is_concrete_type((jl_value_t*)dt));
+    assert(jl_is_concrete_type((jl_value_t*)dt) || is_uniquerep_Type((jl_value_t*)dt));
     if (arg.TIndex) {
         unsigned tindex = get_box_tindex(dt, arg.typ);
         if (tindex > 0) {
@@ -1615,7 +1588,12 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
             ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
             ctx.builder.SetInsertPoint(isaBB);
-            Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            Value *istype_boxed = NULL;
+            if (is_uniquerep_Type((jl_value_t*)dt)) {
+                istype_boxed = ctx.builder.CreateICmpEQ(decay_derived(ctx, arg.Vboxed), decay_derived(ctx, literal_pointer_val(ctx, jl_tparam0(dt))));
+            } else {
+                istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            }
             ctx.builder.CreateBr(postBB);
             isaBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(postBB);
@@ -1681,6 +1659,8 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         if (intersected_type == (jl_value_t*)jl_bottom_type)
             known_isa = false;
     }
+    if (intersected_type == (jl_value_t*)jl_typeofbottom_type->super)
+        intersected_type = (jl_value_t*)jl_typeofbottom_type; // swap abstract Type{Union{}} for concrete typeof(Union{})
     if (known_isa) {
         if (!*known_isa && !msg.isTriviallyEmpty()) {
             emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
@@ -1804,7 +1784,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
-    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isconcrete = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
     isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
@@ -1856,7 +1836,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
         else { // unboxed jl_value_t*
             Value *a = ainfo.V;
             if (ainfo.isghost) {
-                a = Constant::getNullValue(getInt8PtrTy(ctx.builder.getContext()));
+                a = Constant::getNullValue(getPointerTy(ctx.builder.getContext()));
             }
             else if (!ainfo.ispointer()) {
                 // CreateAlloca is OK here since we are on an error branch
@@ -1866,7 +1846,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
                 a = tempSpace;
             }
             ctx.builder.CreateCall(prepare_call(jluboundserror_func), {
-                    emit_bitcast(ctx, decay_derived(ctx, a), getInt8PtrTy(ctx.builder.getContext())),
+                    decay_derived(ctx, a),
                     literal_pointer_val(ctx, ty),
                     i });
         }
@@ -1934,7 +1914,7 @@ static void emit_lockstate_value(jl_codectx_t &ctx, Value *strct, bool newstate)
 {
     ++EmittedLockstates;
     if (strct->getType()->getPointerAddressSpace() == AddressSpace::Loaded) {
-        Value *v = emit_bitcast(ctx, strct, PointerType::get(ctx.types().T_jlvalue, AddressSpace::Loaded));
+        Value *v = strct;
         ctx.builder.CreateCall(prepare_call(newstate ? jllockfield_func : jlunlockfield_func), v);
     }
     else {
@@ -1957,18 +1937,22 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
             ctx.builder.CreateFence(Order);
         return ghostValue(ctx, jltype);
     }
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type
     AllocaInst *intcast = NULL;
     if (Order == AtomicOrdering::NotAtomic) {
         if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "aggregate_load_box");
         }
     }
     else {
         if (!isboxed && !elty->isIntOrPtrTy()) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_load_box");
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
         }
@@ -1979,21 +1963,12 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         if (nb != nb2)
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
     }
-    Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
-    Value *data;
-    if (ptr->getType() != ptrty)
-        data = emit_bitcast(ctx, ptr, ptrty);
-    else
-        data = ptr;
+    Value *data = ptr;
     if (idx_0based)
         data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
     Value *instr = nullptr;
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
     if (intcast && Order == AtomicOrdering::NotAtomic) {
-        emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment, intcast->getAlign().value());
+        emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, Align(alignment), intcast->getAlign());
     }
     else {
         if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
@@ -2027,7 +2002,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         if (elty != realelty)
             instr = ctx.builder.CreateTrunc(instr, realelty);
         if (intcast) {
-            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
+            ctx.builder.CreateStore(instr, intcast);
             instr = nullptr;
         }
     }
@@ -2078,6 +2053,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         ret = update_julia_type(ctx, ret, jltype);
         return ret;
     };
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
     if (type_is_ghost(elty) ||
             (issetfieldonce && !maybe_null_if_boxed) ||
@@ -2114,12 +2093,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         FailOrder = AtomicOrdering::Monotonic;
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     AllocaInst *intcast = nullptr;
+    Type *intcast_eltyp = nullptr;
+    bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        intcast_eltyp = elty;
+        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
         if (!issetfield) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_store_box");
         }
-        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
     }
     Type *realelty = elty;
     if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
@@ -2128,22 +2110,21 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
     }
     Value *r = nullptr;
-    if (issetfield || isswapfield || isreplacefield || issetfieldonce)  {
-        if (isboxed)
+    if (issetfield || isswapfield || isreplacefield || issetfieldonce)  { // e.g. !ismodifyfield
+        assert(isboxed || rhs.typ == jltype);
+        if (isboxed) {
             r = boxed(ctx, rhs);
-        else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+        }
+        else if (intcast) {
+            emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+            r = ctx.builder.CreateLoad(realelty, intcast);
+        }
+        else if (aliasscope || Order != AtomicOrdering::NotAtomic || tracked_pointers) {
             r = emit_unbox(ctx, realelty, rhs, jltype);
-            if (realelty != elty)
-                r = ctx.builder.CreateZExt(r, elty);
         }
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
     }
-    Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
-    if (ptr->getType() != ptrty)
-        ptr = ctx.builder.CreateBitCast(ptr, ptrty);
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
     Value *instr = nullptr;
     Value *Compare = nullptr;
     Value *Success = nullptr;
@@ -2173,7 +2154,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         else {
             assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
-            emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+            emit_unbox_store(ctx, rhs, ptr, tbaa, Align(alignment));
         }
     }
     else if (isswapfield) {
@@ -2198,7 +2179,8 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else if (!isboxed) {
                 assert(jl_is_concrete_type(jltype));
-                needloop = ((jl_datatype_t*)jltype)->layout->flags.haspadding;
+                needloop = ((jl_datatype_t*)jltype)->layout->flags.haspadding ||
+                          !((jl_datatype_t*)jltype)->layout->flags.isbitsegal;
                 Value *SameType = emit_isa(ctx, cmp, jltype, Twine()).first;
                 if (SameType != ConstantInt::getTrue(ctx.builder.getContext())) {
                     BasicBlock *SkipBB = BasicBlock::Create(ctx.builder.getContext(), "skip_xchg", ctx.f);
@@ -2219,7 +2201,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     Current->addIncoming(instr, SkipBB);
                     ctx.builder.SetInsertPoint(BB);
                 }
-                Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                cmp = update_julia_type(ctx, cmp, jltype);
+                if (intcast) {
+                    emit_unbox_store(ctx, cmp, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+                    Compare = ctx.builder.CreateLoad(realelty, intcast);
+                }
+                else {
+                    Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                }
                 if (realelty != elty)
                     Compare = ctx.builder.CreateZExt(Compare, elty);
             }
@@ -2266,16 +2255,17 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (realelty != elty)
                 realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
             if (intcast) {
-                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
-                if (maybe_null_if_boxed)
-                    realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                assert(!isboxed);
+                ctx.builder.CreateStore(realCompare, intcast);
+                if (tracked_pointers)
+                    realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast);
             }
-            if (maybe_null_if_boxed) {
-                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
-                if (first_ptr)
-                    null_load_check(ctx, first_ptr, mod, var);
+            if (maybe_null_if_boxed && tracked_pointers) {
+                Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare);
+                assert(first_ptr);
+                null_load_check(ctx, first_ptr, mod, var);
             }
-            if (intcast)
+            if (intcast && !tracked_pointers)
                 oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
             else
                 oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
@@ -2283,11 +2273,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (isboxed) {
                 r = boxed(ctx, rhs);
             }
-            else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+            else if (intcast) {
+                emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+                r = ctx.builder.CreateLoad(realelty, intcast);
+                if (!tracked_pointers) // oldval is a slot, so put the oldval back
+                    ctx.builder.CreateStore(realCompare, intcast);
+            }
+            else if (Order != AtomicOrdering::NotAtomic) {
+                assert(!tracked_pointers);
                 r = emit_unbox(ctx, realelty, rhs, jltype);
-                if (realelty != elty)
-                    r = ctx.builder.CreateZExt(r, elty);
             }
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
             if (needlock)
                 emit_lockstate_value(ctx, needlock, true);
             cmp = oldval;
@@ -2325,7 +2322,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else {
                 assert(!isboxed && rhs.typ == jltype);
-                emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+                emit_unbox_store(ctx, rhs, ptr, tbaa, Align(alignment));
             }
             ctx.builder.CreateBr(DoneBB);
             instr = load;
@@ -2352,10 +2349,11 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 if (realelty != elty)
                     realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
                 if (intcast) {
-                    ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
+                    ctx.builder.CreateStore(realinstr, intcast);
+                    // n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root
                     oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
                     if (maybe_null_if_boxed)
-                        realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                        realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
                 }
                 else {
                     oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
@@ -2395,20 +2393,23 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         ctx.builder.SetInsertPoint(DoneBB);
     if (needlock)
         emit_lockstate_value(ctx, needlock, false);
-    if (parent != NULL) {
+    if (parent != NULL && r && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) {
         if (isreplacefield || issetfieldonce) {
-            // TODO: avoid this branch if we aren't making a write barrier
             BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
             DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
             ctx.builder.CreateCondBr(Success, BB, DoneBB);
             ctx.builder.SetInsertPoint(BB);
         }
-        if (r) {
-            if (!isboxed)
-                emit_write_multibarrier(ctx, parent, r, rhs.typ);
-            else if (!type_is_permalloc(rhs.typ))
-                emit_write_barrier(ctx, parent, r);
+        if (realelty != elty)
+            r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty));
+        if (intcast) {
+            ctx.builder.CreateStore(r, intcast);
+            r = ctx.builder.CreateLoad(intcast_eltyp, intcast);
         }
+        if (!isboxed)
+            emit_write_multibarrier(ctx, parent, r, rhs.typ);
+        else if (!type_is_permalloc(rhs.typ))
+            emit_write_barrier(ctx, parent, r);
         if (isreplacefield || issetfieldonce) {
             ctx.builder.CreateBr(DoneBB);
             ctx.builder.SetInsertPoint(DoneBB);
@@ -2426,22 +2427,19 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         if (realelty != elty)
             instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
         if (intcast) {
-            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
-            instr = nullptr;
+            ctx.builder.CreateStore(instr, intcast);
+            if (tracked_pointers)
+                instr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
         }
-        if (maybe_null_if_boxed) {
-            if (intcast)
-                instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+        if (maybe_null_if_boxed && tracked_pointers) {
             Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
-            if (first_ptr)
-                null_load_check(ctx, first_ptr, mod, var);
-            if (intcast && !first_ptr)
-                instr = nullptr;
+            assert(first_ptr);
+            null_load_check(ctx, first_ptr, mod, var);
         }
-        if (instr)
-            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
-        else
+        if (intcast && !tracked_pointers)
             oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
+        else
+            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
             Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
             const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
@@ -2474,6 +2472,32 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                                          unsigned idx, jl_datatype_t *jt,
                                          enum jl_memory_order order, Value **nullcheck=nullptr);
 
+static bool field_may_be_null(const jl_cgval_t &strct, jl_datatype_t *stt, size_t idx)
+{
+    size_t nfields = jl_datatype_nfields(stt);
+    if (idx < nfields - (unsigned)stt->name->n_uninitialized)
+        return false;
+    if (!jl_field_isptr(stt, idx) && !jl_type_hasptr(jl_field_type(stt, idx)))
+        return false;
+    if (strct.constant) {
+        if ((jl_is_immutable(stt) || jl_field_isconst(stt, idx)) && jl_field_isdefined(strct.constant, idx))
+            return false;
+    }
+    return true;
+}
+
+static bool field_may_be_null(const jl_cgval_t &strct, jl_datatype_t *stt)
+{
+    size_t nfields = jl_datatype_nfields(stt);
+    for (size_t i = 0; i < (unsigned)stt->name->n_uninitialized; i++) {
+        size_t idx = nfields - i - 1;
+        if (field_may_be_null(strct, stt, idx))
+            return true;
+    }
+    return false;
+}
+
+
 static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         jl_cgval_t *ret, jl_cgval_t strct,
         Value *idx, jl_datatype_t *stt, jl_value_t *inbounds,
@@ -2481,7 +2505,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
 {
     ++EmittedGetfieldUnknowns;
     size_t nfields = jl_datatype_nfields(stt);
-    bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
+    bool maybe_null = field_may_be_null(strct, stt);
     auto idx0 = [&]() {
         return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(ctx.types().T_size, nfields), inbounds);
     };
@@ -2578,7 +2602,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             }
             Value *fldptr = ctx.builder.CreateInBoundsGEP(
                     ctx.types().T_prjlvalue,
-                    emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue),
+                    data_pointer(ctx, strct),
                     idx0());
             setName(ctx.emission_context, fldptr, "getfield_ptr");
             LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*)));
@@ -2601,8 +2625,8 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             if (!stt->name->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
                                                  ((jl_datatype_t*)jft)->layout->npointers))) {
                 // just compute the pointer and let user load it when necessary
-                Type *fty = julia_type_to_llvm(ctx, jft);
-                Value *addr = ctx.builder.CreateInBoundsGEP(fty, emit_bitcast(ctx, ptr, PointerType::get(fty, 0)), idx);
+                Type *fty = julia_type_to_llvm(ctx, jft); //TODO: move this to a int8 GEP
+                Value *addr = ctx.builder.CreateInBoundsGEP(fty, ptr, idx);
                 *ret = mark_julia_slot(addr, jft, NULL, strct.tbaa);
                 return true;
             }
@@ -2633,17 +2657,113 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
     if (fsz > 0 && mutabl) {
         // move value to an immutable stack slot (excluding tindex)
         Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        AllocaInst *lv = emit_static_alloca(ctx, AT, Align(al));
         setName(ctx.emission_context, lv, "immutable_union");
-        if (al > 1)
-            lv->setAlignment(Align(al));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-        emit_memcpy(ctx, lv, ai, addr, ai, fsz, al, al);
+        emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al));
         addr = lv;
     }
     return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
 }
 
+static bool isTBAA(MDNode *TBAA, std::initializer_list<const char*> const strset)
+{
+    if (!TBAA)
+        return false;
+    while (TBAA->getNumOperands() > 1) {
+        TBAA = cast<MDNode>(TBAA->getOperand(1).get());
+        auto str = cast<MDString>(TBAA->getOperand(0))->getString();
+        for (auto str2 : strset) {
+            if (str == str2) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+// Check if this is a load from an immutable value. The easiest
+// way to do so is to look at the tbaa and see if it derives from
+// jtbaa_immut.
+static bool isLoadFromImmut(LoadInst *LI)
+{
+    if (LI->getMetadata(LLVMContext::MD_invariant_load))
+        return true;
+    MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
+    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"}))
+        return true;
+    return false;
+}
+
+static bool isConstGV(GlobalVariable *gv)
+{
+    return gv->isConstant() || gv->getMetadata("julia.constgv");
+}
+
+// Check if this is can be traced through constant loads to an constant global
+// or otherwise globally rooted value.
+// Almost all `tbaa_const` loads satisfies this with the exception of
+// task local constants which are constant as far as the code is concerned but aren't
+// global constants. For task local constant `task_local` will be true when this function
+// returns.
+// Unlike this function in llvm-late-gc-lowering, we do not examine PhiNode, as those are not emitted yet
+static bool isLoadFromConstGV(LoadInst *LI);
+static bool isLoadFromConstGV(Value *v)
+{
+    v = v->stripInBoundsOffsets();
+    if (auto LI = dyn_cast<LoadInst>(v))
+        return isLoadFromConstGV(LI);
+    if (auto gv = dyn_cast<GlobalVariable>(v))
+        return isConstGV(gv);
+    // null pointer
+    if (isa<ConstantData>(v))
+        return true;
+    // literal pointers
+    if (auto CE = dyn_cast<ConstantExpr>(v))
+        return (CE->getOpcode() == Instruction::IntToPtr &&
+                isa<ConstantData>(CE->getOperand(0)));
+    if (auto SL = dyn_cast<SelectInst>(v))
+        return (isLoadFromConstGV(SL->getTrueValue()) &&
+                isLoadFromConstGV(SL->getFalseValue()));
+    if (auto call = dyn_cast<CallInst>(v)) {
+        auto callee = call->getCalledFunction();
+        if (callee && callee->getName() == "julia.typeof") {
+            return true;
+        }
+        if (callee && callee->getName() == "julia.get_pgcstack") {
+            return true;
+        }
+        if (callee && callee->getName() == "julia.gc_loaded") {
+            return isLoadFromConstGV(call->getArgOperand(0)) &&
+                   isLoadFromConstGV(call->getArgOperand(1));
+        }
+    }
+    if (isa<Argument>(v)) {
+        return true;
+    }
+    return false;
+}
+
+// The white list implemented here and above in `isLoadFromConstGV(Value*)` should
+// cover all the cases we and LLVM generates.
+static bool isLoadFromConstGV(LoadInst *LI)
+{
+    // We only emit single slot GV in codegen
+    // but LLVM global merging can change the pointer operands to GEPs/bitcasts
+    auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
+    assert(load_base); // Static analyzer
+    auto gv = dyn_cast<GlobalVariable>(load_base);
+    if (isLoadFromImmut(LI)) {
+        if (gv)
+            return true;
+        return isLoadFromConstGV(load_base);
+    }
+    if (gv)
+        return isConstGV(gv);
+    return false;
+}
+
+
 static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_datatype_t *jt, unsigned idx, size_t byte_offset)
 {
     auto tbaa = strct.tbaa;
@@ -2664,6 +2784,9 @@ static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_da
                 return ctx.tbaa().tbaa_arraysize;
         }
     }
+    if (strct.V && jl_field_isconst(jt, idx) && isLoadFromConstGV(strct.V))
+        return ctx.tbaa().tbaa_const; //TODO: it seems odd to have a field with a tbaa that doesn't alias it's containing struct's tbaa
+                                      //Does the fact that this is marked as constant make this fine?
     return tbaa;
 }
 
@@ -2700,44 +2823,20 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         assert(strct.isboxed);
         needlock = boxed(ctx, strct);
     }
-    size_t nfields = jl_datatype_nfields(jt);
-    bool maybe_null = idx >= nfields - (unsigned)jt->name->n_uninitialized;
+    bool maybe_null = field_may_be_null(strct, jt, idx);
     size_t byte_offset = jl_field_offset(jt, idx);
     if (strct.ispointer()) {
         auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
         Value *staddr = data_pointer(ctx, strct);
-        bool isboxed;
-        Type *lt = julia_type_to_llvm(ctx, (jl_value_t*)jt, &isboxed);
         Value *addr;
-        if (isboxed) {
-            // byte_offset == 0 is an important special case here, e.g.
-            // for single field wrapper types. Introducing the bitcast
-            // can pessimize mem2reg
-            if (byte_offset > 0) {
-                addr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()),
-                        emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(ctx.types().T_size, byte_offset));
-            }
-            else {
-                addr = staddr;
-            }
-        }
-        else {
-            staddr = maybe_bitcast(ctx, staddr, lt->getPointerTo());
-            if (jl_is_vecelement_type((jl_value_t*)jt))
-                addr = staddr; // VecElement types are unwrapped in LLVM.
-            else if (isa<StructType>(lt))
-                addr = emit_struct_gep(ctx, lt, staddr, byte_offset);
-            else
-                addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx);
-            if (addr != staddr) {
-                setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
-            }
-        }
-        if (jl_field_isptr(jt, idx)) {
+        if (jl_is_vecelement_type((jl_value_t*)jt) || byte_offset == 0)
+            addr = staddr; // VecElement types are unwrapped in LLVM.
+        else
+            addr = emit_ptrgep(ctx, staddr, byte_offset);
+        if (addr != staddr)
             setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
-            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+        if (jl_field_isptr(jt, idx)) {
+            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
             setNameWithField(ctx.emission_context, Load, get_objname, jt, idx, Twine());
             Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx));
@@ -2753,15 +2852,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             bool isptr = (union_max == 0);
             assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
             size_t fsz1 = jl_field_size(jt, idx) - 1;
-            Value *ptindex;
-            if (isboxed) {
-                ptindex = ctx.builder.CreateConstInBoundsGEP1_32(
-                    getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())), byte_offset + fsz1);
-            }
-            else {
-                ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz1);
-            }
-            auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, ctx.tbaa().tbaa_unionselbyte);
+            Value *ptindex = emit_ptrgep(ctx, staddr, byte_offset + fsz1);
+            auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, strct.tbaa);
             if (val.V && val.V != addr) {
                 setNameWithField(ctx.emission_context, val.V, get_objname, jt, idx, Twine());
             }
@@ -2809,23 +2901,22 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 unsigned st_idx = convert_struct_offset(ctx, T, byte_offset);
                 IntegerType *ET = cast<IntegerType>(T->getStructElementType(st_idx));
                 unsigned align = (ET->getBitWidth() + 7) / 8;
-                lv = emit_static_alloca(ctx, ET);
+                lv = emit_static_alloca(ctx, ET, Align(align));
                 lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align));
                 // emit all of the align-sized words
                 unsigned i = 0;
                 for (; i < fsz / align; i++) {
                     unsigned fld = st_idx + i;
                     Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(fld));
-                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                    Value *fldp = emit_ptrgep(ctx, lv, i * align);
                     ctx.builder.CreateAlignedStore(fldv, fldp, Align(align));
                 }
                 // emit remaining bytes up to tindex
                 if (i < ptindex - st_idx) {
-                    Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                    staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
+                    Value *staddr = emit_ptrgep(ctx, lv, i * align);
                     for (; i < ptindex - st_idx; i++) {
                         Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx + i));
-                        Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                        Value *fldp = emit_ptrgep(ctx, staddr, i);
                         ctx.builder.CreateAlignedStore(fldv, fldp, Align(1));
                     }
                 }
@@ -2886,13 +2977,15 @@ static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *
         size_t sz = sty->layout->size;
         if (sty->layout->flags.arrayelem_isunion)
             sz++;
-        return ConstantInt::get(ctx.types().T_size, sz);
+        auto elsize = ConstantInt::get(ctx.types().T_size, sz);
+        return elsize;
     }
     else {
-        v = emit_bitcast(ctx, v, ctx.types().T_prjlvalue);
         Value *t = emit_typeof(ctx, v, false, false, true);
         Value *elsize = emit_datatype_size(ctx, t, add_isunion);
-        return ctx.builder.CreateZExt(elsize, ctx.types().T_size);
+        elsize = ctx.builder.CreateZExt(elsize, ctx.types().T_size);
+        setName(ctx.emission_context, elsize, "elsize");
+        return elsize;
     }
 }
 
@@ -2919,7 +3012,7 @@ static intptr_t genericmemoryype_maxsize(jl_value_t *ty) // the maxsize is stric
 
 static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *typ)
 {
-    addr = emit_bitcast(ctx, decay_derived(ctx, addr), ctx.types().T_jlgenericmemory->getPointerTo()),
+    addr = decay_derived(ctx, addr);
     addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 0);
     LoadInst *LI = ctx.builder.CreateAlignedLoad(ctx.types().T_jlgenericmemory->getElementType(0), addr, Align(sizeof(size_t)));
     jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
@@ -2927,17 +3020,17 @@ static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *
     MDBuilder MDB(ctx.builder.getContext());
     auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, genericmemoryype_maxsize(typ)));
     LI->setMetadata(LLVMContext::MD_range, rng);
+    setName(ctx.emission_context, LI, "memory_len");
     return LI;
 }
 
 static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_datatype_layout_t *layout, unsigned AS)
 {
     ++EmittedArrayptr;
-    PointerType *PT = cast<PointerType>(mem->getType());
-    assert(PT == ctx.types().T_prjlvalue);
-    Value *addr = emit_bitcast(ctx, mem, ctx.types().T_jlgenericmemory->getPointerTo(PT->getAddressSpace()));
+    Value *addr = mem;
+    addr = decay_derived(ctx, addr);
     addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 1);
-    setName(ctx.emission_context, addr, ".data_ptr");
+    setName(ctx.emission_context, addr, "memory_data_ptr");
     PointerType *PPT = cast<PointerType>(ctx.types().T_jlgenericmemory->getElementType(1));
     LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*)));
     LI->setOrdering(AtomicOrdering::NotAtomic);
@@ -2949,14 +3042,13 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data
         assert(AS == AddressSpace::Loaded);
         ptr = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, ptr });
     }
-    if (!layout->flags.arrayelem_isboxed)
-        ptr = ctx.builder.CreateBitCast(ptr, PointerType::get(getInt8Ty(ctx.builder.getContext()), AS));
+    setName(ctx.emission_context, ptr, "memory_data");
     return ptr;
 }
 
 static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
 {
-    Value *m = emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_jlgenericmemory->getPointerTo(0));
+    Value *m = decay_derived(ctx, t);
     Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, m, 1);
     Type *T_data = ctx.types().T_jlgenericmemory->getElementType(1);
     LoadInst *LI = ctx.builder.CreateAlignedLoad(T_data, addr, Align(sizeof(char*)));
@@ -2964,11 +3056,11 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
     LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
     jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryown);
     aliasinfo_mem.decorateInst(LI);
-    addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, emit_bitcast(ctx, m, LI->getType()), JL_SMALL_BYTE_ALIGNMENT / sizeof(void*));
+    addr = emit_ptrgep(ctx, m, JL_SMALL_BYTE_ALIGNMENT);
     Value *foreign = ctx.builder.CreateICmpNE(addr, decay_derived(ctx, LI));
     return emit_guarded_test(ctx, foreign, t, [&] {
             addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1);
-            LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, emit_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+            LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             ai.decorateInst(owner);
             return ctx.builder.CreateSelect(ctx.builder.CreateIsNull(owner), t, owner);
@@ -2980,12 +3072,11 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
 static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized);
 
 static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
-                            unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
+                            Align alignment = Align(sizeof(void*))) // min alignment in julia's gc is pointer-aligned
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
     // newv should already be tagged
-    ai.decorateInst(ctx.builder.CreateAlignedStore(v, emit_bitcast(ctx, newv,
-        PointerType::get(v->getType(), 0)), Align(alignment)));
+    ai.decorateInst(ctx.builder.CreateAlignedStore(v, newv, alignment));
 }
 
 static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, MDNode *tbaa)
@@ -2993,7 +3084,7 @@ static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v,
     // newv should already be tagged
     if (v.ispointer()) {
         unsigned align = std::max(julia_alignment(v.typ), (unsigned)sizeof(void*));
-        emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), align, julia_alignment(v.typ));
+        emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), Align(align), Align(julia_alignment(v.typ)));
     }
     else {
         init_bits_value(ctx, newv, v.V, tbaa);
@@ -3231,10 +3322,8 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
         // at least some of the values can live on the stack
         // try to pick an Integer type size such that SROA will emit reasonable code
         Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        AllocaInst *lv = emit_static_alloca(ctx, AT, Align(align));
         setName(ctx.emission_context, lv, "unionalloca");
-        if (align > 1)
-            lv->setAlignment(Align(align));
         return lv;
     }
     return NULL;
@@ -3363,7 +3452,11 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
     for (auto *User : Val->users()) {
         if (isa<GetElementPtrInst>(User)) {
             GetElementPtrInst *Inst = cast<GetElementPtrInst>(User);
+            #if JL_LLVM_VERSION >= 170000
+            Inst->mutateType(PointerType::get(Inst->getType(), ToAS));
+            #else
             Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
+            #endif
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
         else if (isa<IntrinsicInst>(User)) {
@@ -3372,7 +3465,11 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
         }
         else if (isa<BitCastInst>(User)) {
             BitCastInst *Inst = cast<BitCastInst>(User);
+            #if JL_LLVM_VERSION >= 170000
+            Inst->mutateType(PointerType::get(Inst->getType(), ToAS));
+            #else
             Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
+            #endif
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
     }
@@ -3419,7 +3516,6 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
                 Value *decayed = decay_derived(ctx, box);
                 AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V);
                 box->takeName(originalAlloca);
-                decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived));
                 // Warning: Very illegal IR here temporarily
                 originalAlloca->mutateType(decayed->getType());
                 recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived);
@@ -3452,7 +3548,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
         if (jl_is_pointerfree(typ)) {
             unsigned alignment = julia_alignment(typ);
             if (!src.ispointer() || src.constant) {
-                emit_unbox_store(ctx, src, dest, tbaa_dst, alignment, isVolatile);
+                emit_unbox_store(ctx, src, dest, tbaa_dst, Align(alignment), isVolatile);
             }
             else {
                 Value *src_ptr = data_pointer(ctx, src);
@@ -3462,7 +3558,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                 // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
                 auto f = [&] {
                     (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                      jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
+                                      jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, Align(alignment), Align(alignment), isVolatile);
                     return nullptr;
                 };
                 if (skip)
@@ -3477,8 +3573,6 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
         if (skip)
             tindex = ctx.builder.CreateSelect(skip, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), tindex);
         Value *src_ptr = data_pointer(ctx, src);
-        src_ptr = src_ptr ? maybe_bitcast(ctx, src_ptr, getInt8PtrTy(ctx.builder.getContext())) : src_ptr;
-        dest = maybe_bitcast(ctx, dest, getInt8PtrTy(ctx.builder.getContext()));
         BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "union_move_skip", ctx.f);
         SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
         BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_move", ctx.f);
@@ -3499,7 +3593,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                             return;
                         } else {
                             emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
+                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, Align(alignment), Align(alignment), isVolatile);
                         }
                     }
                     ctx.builder.CreateBr(postBB);
@@ -3525,7 +3619,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
             Value *datatype = emit_typeof(ctx, src, false, false);
             Value *copy_bytes = emit_datatype_size(ctx, datatype);
             (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
-                              jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile);
+                              jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, Align(1), Align(1), isVolatile);
             return nullptr;
         };
         if (skip)
@@ -3584,7 +3678,6 @@ static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_ini
 // allocation for unknown object from an untracked pointer
 static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 {
-    pval = ctx.builder.CreateBitCast(pval, getInt8PtrTy(ctx.builder.getContext()));
     Function *F = prepare_call(jl_newbits_func);
     auto call = ctx.builder.CreateCall(F, { jt, pval });
     call->setAttributes(F->getAttributes());
@@ -3604,9 +3697,9 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*
     if (ptrs.empty())
         return;
     SmallVector<Value*, 8> decay_ptrs;
-    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
+    decay_ptrs.push_back(maybe_decay_untracked(ctx, parent));
     for (auto ptr : ptrs) {
-        decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
+        decay_ptrs.push_back(maybe_decay_untracked(ctx, ptr));
     }
     ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
 }
@@ -3623,7 +3716,7 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
 
 static jl_cgval_t union_store(jl_codectx_t &ctx,
         Value *ptr, Value *ptindex, jl_cgval_t rhs, jl_cgval_t cmp,
-        jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope, MDNode *tbaa_tindex,
+        jl_value_t *jltype, MDNode *tbaa, MDNode *tbaa_tindex,
         AtomicOrdering Order, AtomicOrdering FailOrder,
         Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
         const jl_cgval_t *modifyop, const Twine &fname)
@@ -3681,7 +3774,7 @@ static jl_cgval_t union_store(jl_codectx_t &ctx,
     }
     Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jltype);
     tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_tindex);
     ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
     // copy data
     if (!rhs.isghost) {
@@ -3723,28 +3816,22 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
     auto tbaa = best_field_tbaa(ctx, strct, sty, idx0, byte_offset);
     Value *addr = data_pointer(ctx, strct);
     if (byte_offset > 0) {
-        addr = ctx.builder.CreateInBoundsGEP(
-                getInt8Ty(ctx.builder.getContext()),
-                emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(ctx.types().T_size, byte_offset));
+        addr = emit_ptrgep(ctx, addr, byte_offset);
         setNameWithField(ctx.emission_context, addr, get_objname, sty, idx0, Twine("_ptr"));
     }
     jl_value_t *jfty = jl_field_type(sty, idx0);
     bool isboxed = jl_field_isptr(sty, idx0);
     if (!isboxed && jl_is_uniontype(jfty)) {
         size_t fsz1 = jl_field_size(sty, idx0) - 1;
-        Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(ctx.types().T_size, fsz1));
+        Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
         setNameWithField(ctx.emission_context, ptindex, get_objname, sty, idx0, Twine(".tindex_ptr"));
-        return union_store(ctx, addr, ptindex, rhs, cmp, jfty, tbaa, nullptr, ctx.tbaa().tbaa_unionselbyte,
+        return union_store(ctx, addr, ptindex, rhs, cmp, jfty, tbaa, ctx.tbaa().tbaa_unionselbyte,
             Order, FailOrder,
             needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
             modifyop, fname);
     }
     unsigned align = jl_field_align(sty, idx0);
-    size_t nfields = jl_datatype_nfields(sty);
-    bool maybe_null = idx0 >= nfields - (unsigned)sty->name->n_uninitialized;
+    bool maybe_null = field_may_be_null(strct, sty, idx0);
     return typed_store(ctx, addr, rhs, cmp, jfty, tbaa, nullptr,
         wb ? boxed(ctx, strct) : nullptr,
         isboxed, Order, FailOrder, align,
@@ -3795,10 +3882,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 }
             }
             else {
-                strct = emit_static_alloca(ctx, lt);
+                strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty)));
                 setName(ctx.emission_context, strct, arg_typename);
-                if (nargs < nf)
-                    promotion_point = ctx.builder.CreateStore(ctx.builder.CreateFreeze(UndefValue::get(lt)), strct);
                 if (tracked.count)
                     undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack);
             }
@@ -3830,8 +3915,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (!init_as_value) {
                     // avoid unboxing the argument explicitly
                     // and use memcpy instead
-                    Instruction *inst;
-                    dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
+                    Instruction *inst = cast<Instruction>(emit_ptrgep(ctx, strct, offs));
+                    dest = inst;
                     // Our promotion point needs to come before
                     //  A) All of our arguments' promotion points
                     //  B) Any instructions we insert at any of our arguments' promotion points
@@ -3877,24 +3962,23 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                         if (fsz1 > 0 && !fval_info.isghost) {
                             Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al);
                             assert(lt->getStructElementType(llvm_idx) == ET);
-                            AllocaInst *lv = emit_static_alloca(ctx, ET);
+                            AllocaInst *lv = emit_static_alloca(ctx, ET, Align(al));
                             setName(ctx.emission_context, lv, "unioninit");
                             lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al));
                             emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                             // emit all of the align-sized words
                             unsigned i = 0;
                             for (; i < fsz1 / al; i++) {
-                                Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                                Value *fldp = emit_ptrgep(ctx, lv, i * al);
                                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                 Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
                                 strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                             }
                             // emit remaining bytes up to tindex
                             if (i < ptindex - llvm_idx) {
-                                Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                                staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
+                                Value *staddr = emit_ptrgep(ctx, lv, i * al);
                                 for (; i < ptindex - llvm_idx; i++) {
-                                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                                    Value *fldp = emit_ptrgep(ctx, staddr, i);
                                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                     Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
                                     strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
@@ -3907,7 +3991,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                             fval = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     }
                     else {
-                        Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz1);
+                        Value *ptindex = emit_ptrgep(ctx, strct, offs + fsz1);
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                         ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
                         if (!rhs_union.isghost)
@@ -3921,7 +4005,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     } else if (init_as_value) {
                         fval = emit_unbox(ctx, fty, fval_info, jtype);
                     } else {
-                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, jl_field_align(sty, i));
+                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, Align(jl_field_align(sty, i)));
                     }
                 }
                 if (init_as_value) {
@@ -3943,18 +4027,27 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
                     unsigned offs = jl_field_offset(sty, i);
                     int fsz = jl_field_size(sty, i) - 1;
-                    unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
-                    if (init_as_value)
+                    if (init_as_value) {
+                        unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
                         strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef<unsigned>(llvm_idx));
+                    }
                     else {
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                         ai.decorateInst(ctx.builder.CreateAlignedStore(
                                 ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                                ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx),
+                                emit_ptrgep(ctx, strct, offs + fsz),
                                 Align(1)));
                     }
                 }
             }
+            if (promotion_point && nargs < nf) {
+                assert(!init_as_value);
+                IRBuilderBase::InsertPoint savedIP = ctx.builder.saveIP();
+                ctx.builder.SetInsertPoint(promotion_point);
+                promotion_point = cast<FreezeInst>(ctx.builder.CreateFreeze(UndefValue::get(lt)));
+                ctx.builder.CreateStore(promotion_point, strct);
+                ctx.builder.restoreIP(savedIP);
+            }
             if (type_is_ghost(lt))
                 return mark_julia_const(ctx, sty->instance);
             else if (init_as_value)
@@ -3978,8 +4071,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                 ai.decorateInst(ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                        ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, strct, getInt8PtrTy(ctx.builder.getContext())),
-                                ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
+                        emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1),
                         Align(1)));
             }
         }
@@ -4020,11 +4112,8 @@ static void emit_signal_fence(jl_codectx_t &ctx)
 static Value *emit_defer_signal(jl_codectx_t &ctx)
 {
     ++EmittedDeferSignal;
-    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx),
-                               PointerType::get(ctx.types().T_sigatomic, 0));
-    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()),
-            offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
-    return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
+    Value *ptls = get_current_ptls(ctx);
+    return emit_ptrgep(ctx, ptls, offsetof(jl_tls_states_t, defer_signal));
 }
 
 #ifndef JL_NDEBUG
@@ -4053,6 +4142,7 @@ static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, co
     Value *ref = Constant::getNullValue(get_memoryref_type(ctx.builder.getContext(), ctx.types().T_size, layout, 0));
     ref = ctx.builder.CreateInsertValue(ref, data, 0);
     ref = ctx.builder.CreateInsertValue(ref, mem, 1);
+    setName(ctx.emission_context, ref, "memory_ref");
     return mark_julia_type(ctx, ref, false, typ);
 }
 
@@ -4070,9 +4160,10 @@ static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const
     if (ref.ispointer()) {
         LLVMContext &C = ctx.builder.getContext();
         Type *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
-        LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, data_pointer(ctx, ref), PointerType::get(type, 0)));
+        LoadInst *load = ctx.builder.CreateLoad(type, data_pointer(ctx, ref));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
         ai.decorateInst(load);
+        setName(ctx.emission_context, load, "memory_ref_FCA");
         return load;
     }
     else {
@@ -4089,9 +4180,12 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
         return jl_cgval_t();
     Value *V = emit_memoryref_FCA(ctx, ref, layout);
     Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    maybeSetName(ctx.emission_context, data, "memoryref_data");
     Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, mem, "memoryref_mem");
     Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
     Value *offset = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
+    setName(ctx.emission_context, offset, "memoryref_offset");
     Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
     bool bc = bounds_check_enabled(ctx, inbounds);
 #if 1
@@ -4103,12 +4197,14 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
     bool isghost = layout->size == 0;
     if ((!isboxed && isunion) || isghost) {
         newdata = ctx.builder.CreateAdd(data, offset);
+        setName(ctx.emission_context, newdata, "memoryref_data+offset");
         if (bc) {
             BasicBlock *failBB, *endBB;
             failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
             endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
             Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
             Value *inbound = ctx.builder.CreateICmpULT(newdata, mlen);
+            setName(ctx.emission_context, offset, "memoryref_isinbounds");
             ctx.builder.CreateCondBr(inbound, endBB, failBB);
             failBB->insertInto(ctx.f);
             ctx.builder.SetInsertPoint(failBB);
@@ -4136,31 +4232,24 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
             // and we can further rearrange that as ovflw = !( offset+len < len+len ) as unsigned math
             Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
             ovflw = ctx.builder.CreateICmpUGE(ctx.builder.CreateAdd(offset, mlen), ctx.builder.CreateNUWAdd(mlen, mlen));
+            setName(ctx.emission_context, ovflw, "memoryref_ovflw");
         }
 #endif
-            boffset = ctx.builder.CreateMul(offset, elsz);
-#if 0 // TODO: if opaque-pointers?
-        newdata = emit_bitcast(ctx, data, getInt8PtrTy(ctx.builder.getContext()));
-        newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), newdata, boffset);
-#else
-        Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jl_tparam1(ref.typ));
-        newdata = emit_bitcast(ctx, data, elty->getPointerTo(0));
-        newdata = ctx.builder.CreateInBoundsGEP(elty, newdata, offset);
+        boffset = ctx.builder.CreateMul(offset, elsz);
+        setName(ctx.emission_context, boffset, "memoryref_byteoffset");
+        newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
+        setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
         (void)boffset; // LLVM is very bad at handling GEP with types different from the load
-#endif
-        newdata = emit_bitcast(ctx, newdata, data->getType());
         if (bc) {
             BasicBlock *failBB, *endBB;
             failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
             endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
             Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
             Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
-            mptr = emit_bitcast(ctx, mptr, newdata->getType());
 #if 0
-            Value *mend = emit_bitcast(ctx, mptr, getInt8PtrTy(ctx.builder.getContext()));
+            Value *mend = mptr;
             Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
-            mend = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), mend, blen);
-            mend = emit_bitcast(ctx, mend, newdata->getType());
+            mend = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), mptr, blen);
             Value *inbound = ctx.builder.CreateAnd(
                     ctx.builder.CreateICmpULE(mptr, newdata),
                     ctx.builder.CreateICmpULT(newdata, mend));
@@ -4172,8 +4261,11 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
                 ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
                 ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
             Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            setName(ctx.emission_context, blen, "memoryref_bytelen");
             Value *inbound = ctx.builder.CreateICmpULT(bidx0, blen);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds");
             inbound = ctx.builder.CreateAnd(ctx.builder.CreateNot(ovflw), inbound);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds&notovflw");
 #else
             Value *idx0; // (newdata - mptr) / elsz
             idx0 = ctx.builder.CreateSub(
@@ -4206,13 +4298,14 @@ static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref
     else {
         Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
         Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
-        mptr = emit_bitcast(ctx, mptr, mem->getType());
         // (data - mptr) / elsz
         offset = ctx.builder.CreateSub(
             ctx.builder.CreatePtrToInt(data, ctx.types().T_size),
             ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+        setName(ctx.emission_context, offset, "memoryref_offset");
         Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
         offset = ctx.builder.CreateExactUDiv(offset, elsz);
+        setName(ctx.emission_context, offset, "memoryref_offsetidx");
     }
     offset = ctx.builder.CreateAdd(offset, ConstantInt::get(ctx.types().T_size, 1));
     return mark_julia_type(ctx, offset, false, jl_long_type);
@@ -4221,7 +4314,9 @@ static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref
 static Value *emit_memoryref_mem(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
 {
     Value *V = emit_memoryref_FCA(ctx, ref, layout);
-    return CreateSimplifiedExtractValue(ctx, V, 1);
+    V = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, V, "memoryref_mem");
+    return V;
 }
 
 static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
@@ -4240,37 +4335,21 @@ static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const
         GEPlist.push_back(GEP);
         data = GEP->getPointerOperand()->stripPointerCastsSameRepresentation();
     }
-    data = ctx.builder.CreateBitCast(data, ctx.types().T_pprjlvalue);
     data = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, data });
     if (!GEPlist.empty()) {
         for (auto &GEP : make_range(GEPlist.rbegin(), GEPlist.rend())) {
-            data = ctx.builder.CreateBitCast(data, PointerType::get(GEP->getSourceElementType(), AS));
-            Instruction *GEP2 = GEP->clone();
+            GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GEP->clone());
             GEP2->mutateType(PointerType::get(GEP->getResultElementType(), AS));
             GEP2->setOperand(GetElementPtrInst::getPointerOperandIndex(), data);
+            GEP2->setIsInBounds(true);
             ctx.builder.Insert(GEP2);
             data = GEP2;
         }
     }
+    setName(ctx.emission_context, data, "memoryref_data");
     return data;
 }
 
-jl_value_t *jl_fptr_wait_for_compiled(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
-{
-    // This relies on the invariant that the JIT will have set the invoke ptr
-    // by the time it releases the codegen lock. If the code is refactored to make the
-    // codegen lock more fine-grained, this will have to be replaced by a per-codeinstance futex.
-    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
-    // This should only be possible if there's more than one thread. If not, either there's a bug somewhere
-    // that resulted in this not getting cleared, or we're about to deadlock. Either way, that's bad.
-    if (nthreads == 1) {
-        jl_error("Internal error: Reached jl_fptr_wait_for_compiled in single-threaded execution.");
-    }
-    JL_LOCK(&jl_codegen_lock);
-    JL_UNLOCK(&jl_codegen_lock);
-    return jl_atomic_load_acquire(&m->invoke)(f, args, nargs, m);
-}
-
 // Reset us back to codegen debug type
 #undef DEBUG_TYPE
 #define DEBUG_TYPE "julia_irgen_codegen"
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 97def0fac59f2..ecaeb460ebf91 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -767,7 +767,7 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const S
   SourceLocation Loc = FD->getLocation();
   StringRef Name = SM.getFilename(Loc);
   Name = llvm::sys::path::filename(Name);
-  if (Name.startswith("llvm-"))
+  if (Name.starts_with("llvm-"))
       return true;
   return false;
 }
@@ -795,56 +795,54 @@ static bool isMutexUnlock(StringRef name) {
            false;
 }
 
-#if LLVM_VERSION_MAJOR >= 13
-#define endswith_lower endswith_insensitive
-#endif
 
 bool GCChecker::isGCTrackedType(QualType QT) {
   return isJuliaType(
              [](StringRef Name) {
-               if (Name.endswith_lower("jl_value_t") ||
-                   Name.endswith_lower("jl_svec_t") ||
-                   Name.endswith_lower("jl_sym_t") ||
-                   Name.endswith_lower("jl_expr_t") ||
-                   Name.endswith_lower("jl_code_info_t") ||
-                   Name.endswith_lower("jl_array_t") ||
-                   Name.endswith_lower("jl_genericmemory_t") ||
-                   //Name.endswith_lower("jl_genericmemoryref_t") ||
-                   Name.endswith_lower("jl_method_t") ||
-                   Name.endswith_lower("jl_method_instance_t") ||
-                   Name.endswith_lower("jl_tupletype_t") ||
-                   Name.endswith_lower("jl_datatype_t") ||
-                   Name.endswith_lower("jl_typemap_entry_t") ||
-                   Name.endswith_lower("jl_typemap_level_t") ||
-                   Name.endswith_lower("jl_typename_t") ||
-                   Name.endswith_lower("jl_module_t") ||
-                   Name.endswith_lower("jl_tupletype_t") ||
-                   Name.endswith_lower("jl_gc_tracked_buffer_t") ||
-                   Name.endswith_lower("jl_binding_t") ||
-                   Name.endswith_lower("jl_ordereddict_t") ||
-                   Name.endswith_lower("jl_tvar_t") ||
-                   Name.endswith_lower("jl_typemap_t") ||
-                   Name.endswith_lower("jl_unionall_t") ||
-                   Name.endswith_lower("jl_methtable_t") ||
-                   Name.endswith_lower("jl_cgval_t") ||
-                   Name.endswith_lower("jl_codectx_t") ||
-                   Name.endswith_lower("jl_ast_context_t") ||
-                   Name.endswith_lower("jl_code_instance_t") ||
-                   Name.endswith_lower("jl_excstack_t") ||
-                   Name.endswith_lower("jl_task_t") ||
-                   Name.endswith_lower("jl_uniontype_t") ||
-                   Name.endswith_lower("jl_method_match_t") ||
-                   Name.endswith_lower("jl_vararg_t") ||
-                   Name.endswith_lower("jl_opaque_closure_t") ||
-                   Name.endswith_lower("jl_globalref_t") ||
+               if (Name.ends_with_insensitive("jl_value_t") ||
+                   Name.ends_with_insensitive("jl_svec_t") ||
+                   Name.ends_with_insensitive("jl_sym_t") ||
+                   Name.ends_with_insensitive("jl_expr_t") ||
+                   Name.ends_with_insensitive("jl_code_info_t") ||
+                   Name.ends_with_insensitive("jl_array_t") ||
+                   Name.ends_with_insensitive("jl_genericmemory_t") ||
+                   //Name.ends_with_insensitive("jl_genericmemoryref_t") ||
+                   Name.ends_with_insensitive("jl_method_t") ||
+                   Name.ends_with_insensitive("jl_method_instance_t") ||
+                   Name.ends_with_insensitive("jl_debuginfo_t") ||
+                   Name.ends_with_insensitive("jl_tupletype_t") ||
+                   Name.ends_with_insensitive("jl_datatype_t") ||
+                   Name.ends_with_insensitive("jl_typemap_entry_t") ||
+                   Name.ends_with_insensitive("jl_typemap_level_t") ||
+                   Name.ends_with_insensitive("jl_typename_t") ||
+                   Name.ends_with_insensitive("jl_module_t") ||
+                   Name.ends_with_insensitive("jl_tupletype_t") ||
+                   Name.ends_with_insensitive("jl_gc_tracked_buffer_t") ||
+                   Name.ends_with_insensitive("jl_binding_t") ||
+                   Name.ends_with_insensitive("jl_ordereddict_t") ||
+                   Name.ends_with_insensitive("jl_tvar_t") ||
+                   Name.ends_with_insensitive("jl_typemap_t") ||
+                   Name.ends_with_insensitive("jl_unionall_t") ||
+                   Name.ends_with_insensitive("jl_methtable_t") ||
+                   Name.ends_with_insensitive("jl_cgval_t") ||
+                   Name.ends_with_insensitive("jl_codectx_t") ||
+                   Name.ends_with_insensitive("jl_ast_context_t") ||
+                   Name.ends_with_insensitive("jl_code_instance_t") ||
+                   Name.ends_with_insensitive("jl_excstack_t") ||
+                   Name.ends_with_insensitive("jl_task_t") ||
+                   Name.ends_with_insensitive("jl_uniontype_t") ||
+                   Name.ends_with_insensitive("jl_method_match_t") ||
+                   Name.ends_with_insensitive("jl_vararg_t") ||
+                   Name.ends_with_insensitive("jl_opaque_closure_t") ||
+                   Name.ends_with_insensitive("jl_globalref_t") ||
                    // Probably not technically true for these, but let's allow it
-                   Name.endswith_lower("typemap_intersection_env") ||
-                   Name.endswith_lower("interpreter_state") ||
-                   Name.endswith_lower("jl_typeenv_t") ||
-                   Name.endswith_lower("jl_stenv_t") ||
-                   Name.endswith_lower("jl_varbinding_t") ||
-                   Name.endswith_lower("set_world") ||
-                   Name.endswith_lower("jl_codectx_t")) {
+                   Name.ends_with_insensitive("typemap_intersection_env") ||
+                   Name.ends_with_insensitive("interpreter_state") ||
+                   Name.ends_with_insensitive("jl_typeenv_t") ||
+                   Name.ends_with_insensitive("jl_stenv_t") ||
+                   Name.ends_with_insensitive("jl_varbinding_t") ||
+                   Name.ends_with_insensitive("set_world") ||
+                   Name.ends_with_insensitive("jl_codectx_t")) {
                  return true;
                }
                return false;
@@ -913,9 +911,9 @@ bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
       if (FD->getBuiltinID() != 0 || FD->isTrivial())
         isCalleeSafepoint = false;
       else if (FD->getDeclName().isIdentifier() &&
-               (FD->getName().startswith("uv_") ||
-                FD->getName().startswith("unw_") ||
-                FD->getName().startswith("_U")) &&
+               (FD->getName().starts_with("uv_") ||
+                FD->getName().starts_with("unw_") ||
+                FD->getName().starts_with("_U")) &&
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
       else
@@ -1052,13 +1050,13 @@ bool GCChecker::processAllocationOfResult(const CallEvent &Call,
         // global roots.
         StringRef FDName =
             FD->getDeclName().isIdentifier() ? FD->getName() : "";
-        if (FDName.startswith("jl_box_") || FDName.startswith("ijl_box_")) {
+        if (FDName.starts_with("jl_box_") || FDName.starts_with("ijl_box_")) {
           SVal Arg = Call.getArgSVal(0);
           if (auto CI = Arg.getAs<nonloc::ConcreteInt>()) {
             const llvm::APSInt &Value = CI->getValue();
             bool GloballyRooted = false;
             const int64_t NBOX_C = 1024;
-            if (FDName.startswith("jl_box_u") || FDName.startswith("ijl_box_u")) {
+            if (FDName.starts_with("jl_box_u") || FDName.starts_with("ijl_box_u")) {
               if (Value < NBOX_C) {
                 GloballyRooted = true;
               }
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index dda9bbee184a0..41812d903816c 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -20,7 +20,7 @@ JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_valu
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
 
 JL_DLLEXPORT void *jl_LLVMCreateDisasm_fallback(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
 JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_fallback(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
@@ -43,10 +43,11 @@ JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t
     jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
 }
 
-JL_DLLEXPORT void jl_compile_codeinst_fallback(jl_code_instance_t *unspec)
+JL_DLLEXPORT int jl_compile_codeinst_fallback(jl_code_instance_t *unspec)
 {
     // Do nothing. The caller will notice that we failed to provide a an ->invoke and trigger
     // appropriate fallbacks.
+    return 0;
 }
 
 JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
@@ -106,8 +107,6 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
     return 0;
 }
 
-JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, void *config) UNAVAILABLE
-
 JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { }
 
 #define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 59c7763350fff..73a5f844b31da 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -22,7 +22,11 @@
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
 #include <llvm/MC/TargetRegistry.h>
 #include <llvm/Target/TargetOptions.h>
-#include <llvm/Support/Host.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Host.h>
+#else
+#include <llvm/ADT/Host.h>
+#endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Object/SymbolSize.h>
 
@@ -86,7 +90,7 @@ static bool jl_fpo_disabled(const Triple &TT) {
     // MSAN doesn't support FPO
     return true;
 #endif
-    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD()) {
+    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD() || TT.isOSOpenBSD()) {
         return true;
     }
     return false;
@@ -137,23 +141,8 @@ auto getVoidTy(LLVMContext &ctxt) {
 auto getCharTy(LLVMContext &ctxt) {
     return getInt32Ty(ctxt);
 }
-auto getInt8PtrTy(LLVMContext &ctxt) {
-    return Type::getInt8PtrTy(ctxt);
-}
-auto getInt16PtrTy(LLVMContext &ctxt) {
-    return Type::getInt16PtrTy(ctxt);
-}
-auto getInt32PtrTy(LLVMContext &ctxt) {
-    return Type::getInt32PtrTy(ctxt);
-}
-auto getInt64PtrTy(LLVMContext &ctxt) {
-    return Type::getInt64PtrTy(ctxt);
-}
-auto getFloatPtrTy(LLVMContext &ctxt) {
-    return Type::getFloatPtrTy(ctxt);
-}
-auto getDoublePtrTy(LLVMContext &ctxt) {
-    return Type::getDoublePtrTy(ctxt);
+auto getPointerTy(LLVMContext &ctxt) {
+    return PointerType::get(ctxt, 0);
 }
 
 typedef Instruction TerminatorInst;
@@ -182,6 +171,14 @@ void setName(jl_codegen_params_t &params, Value *V, const Twine &Name)
     }
 }
 
+void maybeSetName(jl_codegen_params_t &params, Value *V, const Twine &Name)
+{
+    // To be used when we may get an Instruction or something that is not an instruction i.e Constants/Arguments
+    if (params.debug_level >= 2 && isa<Instruction>(V)) {
+        V->setName(Name);
+    }
+}
+
 void setName(jl_codegen_params_t &params, Value *V, std::function<std::string()> GetName)
 {
     assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
@@ -283,6 +280,7 @@ extern void _chkstk(void);
 
 // types
 struct jl_typecache_t {
+    Type *T_ptr;
     Type *T_size;
     Type *T_jlvalue;
     Type *T_pjlvalue;
@@ -297,17 +295,16 @@ struct jl_typecache_t {
 
     IntegerType *T_sigatomic;
 
-    Type *T_ppint8;
     unsigned sizeof_ptr;
     Align alignof_ptr;
 
     bool initialized;
 
     jl_typecache_t() :
-        T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
+        T_ptr(nullptr), T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
         T_ppjlvalue(nullptr), T_pprjlvalue(nullptr),
         T_jlgenericmemory(nullptr), T_jlarray(nullptr), T_pjlarray(nullptr),
-        T_jlfunc(nullptr), T_jlfuncparams(nullptr), T_sigatomic(nullptr), T_ppint8(nullptr),
+        T_jlfunc(nullptr), T_jlfuncparams(nullptr), T_sigatomic(nullptr),
         initialized(false) {}
 
     void initialize(LLVMContext &context, const DataLayout &DL) {
@@ -315,7 +312,7 @@ struct jl_typecache_t {
             return;
         }
         initialized = true;
-        T_ppint8 = PointerType::get(getInt8PtrTy(context), 0);
+        T_ptr = getPointerTy(context);
         T_sigatomic = Type::getIntNTy(context, sizeof(sig_atomic_t) * 8);
         T_size = DL.getIntPtrType(context);
         sizeof_ptr = DL.getPointerSize();
@@ -536,9 +533,12 @@ struct JuliaVariable {
         if (GlobalValue *V = m->getNamedValue(name))
             return cast<GlobalVariable>(V);
         auto T_size = m->getDataLayout().getIntPtrType(m->getContext());
-        return new GlobalVariable(*m, _type(T_size),
+        auto var = new GlobalVariable(*m, _type(T_size),
                 isconst, GlobalVariable::ExternalLinkage,
                 NULL, name);
+        if (Triple(m->getTargetTriple()).isOSWindows())
+            var->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLImportStorageClass); // Cross-library imports must be explicit for COFF (Windows)
+        return var;
     }
     GlobalVariable *realize(jl_codectx_t &ctx);
 };
@@ -730,22 +730,22 @@ static AttributeList get_attrs_box_zext(LLVMContext &C, unsigned nbytes)
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
     XSTR(jl_RTLD_DEFAULT_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jlexe_var = new JuliaVariable{
     XSTR(jl_exe_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jldll_var = new JuliaVariable{
     XSTR(jl_libjulia_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jldlli_var = new JuliaVariable{
     XSTR(jl_libjulia_internal_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jl_small_typeof_var = new JuliaVariable{
     XSTR(jl_small_typeof),
@@ -816,19 +816,19 @@ static const auto jlthrow_func = new JuliaFunction<>{
 static const auto jlerror_func = new JuliaFunction<>{
     XSTR(jl_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+            {getPointerTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlatomicerror_func = new JuliaFunction<>{
     XSTR(jl_atomic_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+            {getPointerTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jltypeerror_func = new JuliaFunction<>{
     XSTR(jl_type_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {getPointerTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlundefvarerror_func = new JuliaFunction<>{
@@ -915,12 +915,22 @@ static const auto jlcheckassignonce_func = new JuliaFunction<>{
             {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
-static const auto jldeclareconst_func = new JuliaFunction<>{
-    XSTR(jl_declare_constant),
+static const auto jldeclareconstval_func = new JuliaFunction<>{
+    XSTR(jl_declare_constant_val),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(getVoidTy(C),
-            {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false); },
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, T_prjlvalue}, false); },
+    nullptr,
+};
+static const auto jldeclareglobal_func = new JuliaFunction<>{
+    XSTR(jl_declare_global),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_pjlvalue, T_prjlvalue}, false); },
     nullptr,
 };
 static const auto jlgetbindingorerror_func = new JuliaFunction<>{
@@ -937,7 +947,17 @@ static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false);
+                {T_pjlvalue, T_pjlvalue, getInt32Ty(C)}, false);
+    },
+    nullptr,
+};
+static const auto jlgetbindingvalue_func = new JuliaFunction<>{
+    XSTR(jl_reresolve_binding_value_seqcst),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue}, false);
     },
     nullptr,
 };
@@ -946,7 +966,7 @@ static const auto jlboundp_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(getInt32Ty(C),
-                {T_pjlvalue, T_pjlvalue}, false);
+                {T_pjlvalue, T_pjlvalue, getInt32Ty(C)}, false);
     },
     nullptr,
 };
@@ -1000,13 +1020,12 @@ static const auto jlmethod_func = new JuliaFunction<>{
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction<>{
-    XSTR(jl_generic_function_def),
+    XSTR(jl_declare_const_gf),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue}, false);
+        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
@@ -1048,30 +1067,64 @@ static const auto jlunlockfield_func = new JuliaFunction<>{
 };
 static const auto jlenter_func = new JuliaFunction<>{
     XSTR(jl_enter_handler),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getPointerTy(C)}, false); },
     nullptr,
 };
 static const auto jl_current_exception_func = new JuliaFunction<>{
     XSTR(jl_current_exception),
-    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), false); },
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_pjlvalue_ty(C)}, false); },
     nullptr,
 };
 static const auto jlleave_func = new JuliaFunction<>{
     XSTR(jl_pop_handler),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt32Ty(C)}, false); },
-    nullptr,
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getInt32Ty(C)}, false); },
+    [](LLVMContext &C) {
+            auto FnAttrs = AttrBuilder(C);
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            FnAttrs.addAttribute(Attribute::NoUnwind);
+            auto RetAttrs = AttrBuilder(C);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None);
+        },
+};
+static const auto jlleave_noexcept_func = new JuliaFunction<>{
+    XSTR(jl_pop_handler_noexcept),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getInt32Ty(C)}, false); },
+    [](LLVMContext &C) {
+            auto FnAttrs = AttrBuilder(C);
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            FnAttrs.addAttribute(Attribute::NoUnwind);
+            auto RetAttrs = AttrBuilder(C);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None);
+        },
 };
 static const auto jl_restore_excstack_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_restore_excstack),
-    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
-            {T_size}, false); },
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_size}, false); },
     nullptr,
 };
 static const auto jl_excstack_state_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_excstack_state),
-    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size, false); },
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(T_size, {T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlegalx_func = new JuliaFunction<TypeFnContextAndSizeT>{
@@ -1098,9 +1151,9 @@ static const auto jl_alloc_obj_func = new JuliaFunction<TypeFnContextAndSizeT>{
     [](LLVMContext &C, Type *T_size) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_ppjlvalue = PointerType::get(PointerType::get(T_jlvalue, 0), 0);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
         return FunctionType::get(T_prjlvalue,
-                {T_ppjlvalue, T_size, T_prjlvalue}, false);
+                {T_pjlvalue, T_size, T_prjlvalue}, false);
     },
     [](LLVMContext &C) {
         auto FnAttrs = AttrBuilder(C);
@@ -1125,7 +1178,7 @@ static const auto jl_newbits_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue, getInt8PtrTy(C)}, false);
+                {T_prjlvalue, getPointerTy(C)}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
@@ -1225,9 +1278,9 @@ static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
     [](LLVMContext &C, const Triple &T) {
         if (T.isOSWindows())
             return FunctionType::get(getInt32Ty(C),
-                {getInt8PtrTy(C)}, false);
+                {getPointerTy(C)}, false);
         return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt32Ty(C)}, false);
+            {getPointerTy(C), getInt32Ty(C)}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReturnsTwice}),
@@ -1237,7 +1290,7 @@ static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
 static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(memcmp),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), T_size}, false); },
+            {getPointerTy(C), getPointerTy(C), T_size}, false); },
     [](LLVMContext &C) {
         AttrBuilder FnAttrs(C);
 #if JL_LLVM_VERSION >= 160000
@@ -1256,13 +1309,13 @@ static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
 static const auto jldlsym_func = new JuliaFunction<>{
     XSTR(jl_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), PointerType::get(getInt8PtrTy(C), 0)}, false); },
+            {getPointerTy(C), getPointerTy(C), PointerType::get(getPointerTy(C), 0)}, false); },
     nullptr,
 };
 static const auto jllazydlsym_func = new JuliaFunction<>{
     XSTR(jl_lazy_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
-            {JuliaType::get_prjlvalue_ty(C), getInt8PtrTy(C)}, false); },
+            {JuliaType::get_prjlvalue_ty(C), getPointerTy(C)}, false); },
     nullptr,
 };
 static const auto jltypeassert_func = new JuliaFunction<>{
@@ -1331,9 +1384,9 @@ static const auto jlgetcfunctiontrampoline_func = new JuliaFunction<>{
             {
                 T_prjlvalue, // f (object)
                 T_pjlvalue, // result
-                getInt8PtrTy(C), // cache
+                getPointerTy(C), // cache
                 T_pjlvalue, // fill
-                FunctionType::get(getInt8PtrTy(C), { getInt8PtrTy(C), T_ppjlvalue }, false)->getPointerTo(), // trampoline
+                FunctionType::get(getPointerTy(C), { getPointerTy(C), T_ppjlvalue }, false)->getPointerTo(), // trampoline
                 T_pjlvalue, // env
                 T_pprjlvalue, // vals
             }, false);
@@ -1442,7 +1495,10 @@ static const auto gc_preserve_end_func = new JuliaFunction<> {
 };
 static const auto except_enter_func = new JuliaFunction<>{
     "julia.except_enter",
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C), false); },
+    [](LLVMContext &C) {
+         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+         auto RT = StructType::get(getInt32Ty(C), getPointerTy(C));
+         return FunctionType::get(RT, {T_pjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReturnsTwice}),
             AttributeSet(),
@@ -1579,9 +1635,9 @@ static const auto &builtin_func_map() {
           { jl_f_memoryref_addr,          new JuliaFunction<>{XSTR(jl_f_memoryref), get_func_sig, get_func_attrs} },
           { jl_f_memoryrefoffset_addr,    new JuliaFunction<>{XSTR(jl_f_memoryrefoffset), get_func_sig, get_func_attrs} },
           { jl_f_memoryrefset_addr,       new JuliaFunction<>{XSTR(jl_f_memoryrefset), get_func_sig, get_func_attrs} },
-          { jl_f_memoryrefswap_addr,      new JuliaFunction<>{XSTR(jl_f_memoryswapset), get_func_sig, get_func_attrs} },
-          { jl_f_memoryrefreplace_addr,   new JuliaFunction<>{XSTR(jl_f_memoryreplaceset), get_func_sig, get_func_attrs} },
-          { jl_f_memoryrefmodify_addr,    new JuliaFunction<>{XSTR(jl_f_memorymodifyset), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefswap_addr,      new JuliaFunction<>{XSTR(jl_f_memoryrefswap), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefreplace_addr,   new JuliaFunction<>{XSTR(jl_f_memoryrefreplace), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefmodify_addr,    new JuliaFunction<>{XSTR(jl_f_memoryrefmodify), get_func_sig, get_func_attrs} },
           { jl_f_memoryrefsetonce_addr,   new JuliaFunction<>{XSTR(jl_f_memoryrefsetonce), get_func_sig, get_func_attrs} },
           { jl_f_memoryref_isassigned_addr,new JuliaFunction<>{XSTR(jl_f_memoryref_isassigned), get_func_sig, get_func_attrs} },
           { jl_f_apply_type_addr,         new JuliaFunction<>{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
@@ -1932,6 +1988,7 @@ class jl_codectx_t {
 
     Value *pgcstack = NULL;
     Instruction *topalloca = NULL;
+    Value *world_age_at_entry = NULL; // Not valid to use in toplevel code
 
     bool use_cache = false;
     bool external_linkage = false;
@@ -2054,13 +2111,13 @@ static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed
 static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments=nullptr, size_t *args_begin=nullptr);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign);
+                                     jl_binding_t **pbnd, bool assign, bool alloc);
 static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa);
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
 static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg);
 static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
-static Value *get_last_age_field(jl_codectx_t &ctx);
+static Value *get_tls_world_age_field(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
                              ArrayRef<jl_cgval_t> args, size_t nargs, JuliaFunction<> *trampoline);
@@ -2099,14 +2156,25 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
             proto->setInitializer(G->getInitializer());
         }
         proto->copyAttributesFrom(G);
-        // DLLImport only needs to be set for the shadow module
-        // it just gets annoying in the JIT
-        proto->setDLLStorageClass(GlobalValue::DefaultStorageClass);
         return proto;
     }
     return cast<GlobalVariable>(local);
 }
 
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, size_t byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), base, byte_offset);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, Value *byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), base, byte_offset, Name);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
 
 // --- convenience functions for tagging llvm values with julia types ---
 
@@ -2135,15 +2203,15 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con
             gv = get_gv(gvname);
         }
     }
-    assert(gv->getName().startswith(name.str()));
+    assert(gv->getName().starts_with(name.str()));
     assert(val == gv->getInitializer());
     return gv;
 }
 
-static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align)
 {
     ++EmittedAllocas;
-    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca);
+    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "", /*InsertBefore=*/ctx.topalloca);
 }
 
 static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
@@ -2156,9 +2224,8 @@ static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *st
         return;
     size_t i, np = sty->layout->npointers;
     auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx.builder.getContext());
-    ptr = ctx.builder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace()));
     for (i = 0; i < np; i++) {
-        Value *fld = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
+        Value *fld = emit_ptrgep(ctx, ptr, jl_ptr_offset(sty, i) * sizeof(jl_value_t*));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
     }
@@ -2171,8 +2238,12 @@ static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty)
     if (auto I = dyn_cast<PtrToIntInst>(v)) {
         auto ptr = I->getOperand(0);
         if (ty->getPointerAddressSpace() == ptr->getType()->getPointerAddressSpace())
-            return ctx.builder.CreateBitCast(ptr, ty);
+            return ptr;
+        #if JL_LLVM_VERSION >= 170000
+        else
+        #else
         else if (cast<PointerType>(ty)->hasSameElementTypeAs(cast<PointerType>(ptr->getType())))
+        #endif
             return ctx.builder.CreateAddrSpaceCast(ptr, ty);
     }
     ++EmittedIntToPtrs;
@@ -2192,6 +2263,8 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ)
         // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort
         jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
         constant.constant = jl_tparam0(typ);
+        if (typ == (jl_value_t*)jl_typeofbottom_type->super)
+            constant.isghost = true;
         return constant;
     }
     return jl_cgval_t(typ);
@@ -2204,7 +2277,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_datatype_t *typ)
 static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
 {
     jl_value_t *typ;
-    if (jl_is_type(jv)) {
+    if (jl_is_type(jv) && jv != jl_bottom_type) {
         typ = (jl_value_t*)jl_wrap_Type(jv); // TODO: gc-root this?
     }
     else {
@@ -2250,7 +2323,7 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_
         loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), Align(julia_alignment(typ)), "_j_const", *jl_Module);
     }
     else {
-        loc = emit_static_alloca(ctx, v->getType());
+        loc = emit_static_alloca(ctx, v->getType(), Align(julia_alignment(typ)));
         ctx.builder.CreateStore(v, loc);
     }
     return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack);
@@ -2269,7 +2342,8 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
             // replace T::Type{T} with T
             return ghostValue(ctx, typ);
         }
-    } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
+    }
+    else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
         // no need to explicitly load/store a constant/ghost value
         return ghostValue(ctx, typ);
     }
@@ -2361,7 +2435,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 {
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     if (vi.usedUndef) {
-        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()));
+        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()), Align(1));
         setName(ctx.emission_context, vi.defFlag, "isdefined");
         store_def_flag(ctx, vi, false);
     }
@@ -2545,7 +2619,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 tbaa = oldv.tbaa;
                 slotv = ctx.builder.CreateSelect(isboxv,
                             decay_derived(ctx, boxv),
-                            decay_derived(ctx, emit_bitcast(ctx, slotv, boxv->getType())));
+                            decay_derived(ctx, slotv));
             }
             jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa);
             assert(boxv->getType() == ctx.types().T_prjlvalue);
@@ -2713,7 +2787,7 @@ static void jl_init_function(Function *F, const Triple &TT)
     F->addFnAttrs(attr);
 }
 
-static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t *rettype, bool prefer_specsig)
+static bool uses_specsig(jl_value_t *sig, bool needsparams, jl_value_t *rettype, bool prefer_specsig)
 {
     if (needsparams)
         return false;
@@ -2723,10 +2797,8 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
         return false;
     if (jl_nparams(sig) == 0)
         return false;
-    if (va) {
-        if (jl_is_vararg(jl_tparam(sig, jl_nparams(sig) - 1)))
-            return false;
-    }
+    if (jl_vararg_kind(jl_tparam(sig, jl_nparams(sig) - 1)) == JL_VARARG_UNBOUND)
+        return false;
     // not invalid, consider if specialized signature is worthwhile
     if (prefer_specsig)
         return true;
@@ -2757,7 +2829,6 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
 
 static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
 {
-    int va = lam->def.method->isva;
     jl_value_t *sig = lam->specTypes;
     bool needsparams = false;
     if (jl_is_method(lam->def.method)) {
@@ -2768,7 +2839,7 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
                 needsparams = true;
         }
     }
-    return std::make_pair(uses_specsig(sig, needsparams, va, rettype, prefer_specsig), needsparams);
+    return std::make_pair(uses_specsig(sig, needsparams, rettype, prefer_specsig), needsparams);
 }
 
 
@@ -2782,7 +2853,7 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha
 {
     Value *pv = ConstantExpr::getIntToPtr(
         ConstantInt::get(ctx.types().T_size, (uintptr_t)ptr),
-        getInt64PtrTy(ctx.builder.getContext()));
+        getPointerTy(ctx.builder.getContext()));
     Value *v = ctx.builder.CreateLoad(getInt64Ty(ctx.builder.getContext()), pv, true, name);
     v = ctx.builder.CreateAdd(v, addend);
     ctx.builder.CreateStore(v, pv, true); // volatile, not atomic, so this might be an underestimate,
@@ -2883,10 +2954,11 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     if (jl_is_globalref(ex)) {
         s = jl_globalref_name(ex);
         jl_binding_t *b = jl_get_binding(jl_globalref_mod(ex), s);
-        if (b && b->constp) {
+        jl_value_t *v = jl_get_binding_value_if_const(b);
+        if (v) {
             if (b->deprecated)
                 cg_bdw(ctx, s, b);
-            return jl_atomic_load_relaxed(&b->value);
+            return v;
         }
         return NULL;
     }
@@ -2905,10 +2977,11 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                     s = (jl_sym_t*)static_eval(ctx, jl_exprarg(e, 2));
                     if (s && jl_is_symbol(s)) {
                         jl_binding_t *b = jl_get_binding(m, s);
-                        if (b && b->constp) {
+                        jl_value_t *v = jl_get_binding_value_if_const(b);
+                        if (v) {
                             if (b->deprecated)
                                 cg_bdw(ctx, s, b);
-                            return jl_atomic_load_relaxed(&b->value);
+                            return v;
                         }
                     }
                 }
@@ -3123,7 +3196,7 @@ static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
     if (jl_is_globally_rooted(val))
         return val;
     jl_method_t *m = ctx.linfo->def.method;
-    if (jl_is_method(m)) {
+    if (!jl_options.strip_ir && jl_is_method(m)) {
         // the method might have a root for this already; use it if so
         JL_LOCK(&m->writelock);
         if (m->roots) {
@@ -3145,73 +3218,106 @@ static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
 
 static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
 {
-    jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false);
-    if (bp == NULL)
-        return jl_cgval_t();
-    bp = julia_binding_pvalue(ctx, bp);
-    if (bnd) {
-        jl_value_t *v = jl_atomic_load_acquire(&bnd->value); // acquire value for ty
-        if (v != NULL) {
-            if (bnd->constp)
-                return mark_julia_const(ctx, v);
-            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-            setName(ctx.emission_context, v, jl_symbol_name(name));
-            v->setOrdering(order);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-            return mark_julia_type(ctx, v, true, ty);
+    jl_binding_t *bnd = jl_get_module_binding(mod, name, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bnd, ctx.max_world);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+        // try to look this up now.
+        // TODO: This is bad and we'd like to delete it.
+        jl_get_binding(mod, name);
+    }
+    assert(bnd);
+    Value *bp = NULL;
+    // bpart was updated in place - this will change with full partition
+    pku = jl_atomic_load_acquire(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+        // Redo the lookup at runtime
+        bp = julia_binding_gv(ctx, bnd);
+        Value *v = ctx.builder.CreateCall(prepare_call(jlgetbindingvalue_func), { bp });
+        undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, (jl_value_t*)mod);
+        return mark_julia_type(ctx, v, true, jl_any_type);
+    } else {
+        while (true) {
+            if (!bpart)
+                break;
+            if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+                break;
+            if (bnd->deprecated) {
+                cg_bdw(ctx, name, bnd);
+            }
+            bnd = (jl_binding_t*)decode_restriction_value(pku);
+            bpart = jl_get_binding_partition(bnd, ctx.max_world);
+            pku = jl_atomic_load_acquire(&bpart->restriction);
+        }
+        if (bpart && jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_value_t *constval = decode_restriction_value(pku);
+            if (!constval) {
+                undef_var_error_ifnot(ctx, ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), name, (jl_value_t*)mod);
+                return jl_cgval_t();
+            }
+            return mark_julia_const(ctx, constval);
         }
     }
-    // todo: use type info to avoid undef check
-    return emit_checked_var(ctx, bp, name, (jl_value_t*)mod, false, ctx.tbaa().tbaa_binding);
+    bp = julia_binding_gv(ctx, bnd);
+    if (bnd->deprecated) {
+        cg_bdw(ctx, name, bnd);
+    }
+    assert(decode_restriction_kind(pku) == BINDING_KIND_GLOBAL);
+    jl_value_t *ty = decode_restriction_value(pku);
+    bp = julia_binding_pvalue(ctx, bp);
+    if (ty == nullptr)
+        ty = (jl_value_t*)jl_any_type;
+    return update_julia_type(ctx, emit_checked_var(ctx, bp, name, (jl_value_t*)mod, false, ctx.tbaa().tbaa_binding), ty);
 }
 
 static jl_cgval_t emit_globalop(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, jl_cgval_t rval, const jl_cgval_t &cmp,
                                 AtomicOrdering Order, AtomicOrdering FailOrder,
                                 bool issetglobal, bool isreplaceglobal, bool isswapglobal, bool ismodifyglobal, bool issetglobalonce,
-                                const jl_cgval_t *modifyop)
+                                const jl_cgval_t *modifyop, bool alloc)
 {
     jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
+    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true, alloc);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bnd, ctx.max_world);
     if (bp == NULL)
         return jl_cgval_t();
-    if (bnd && !bnd->constp) {
-        jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-        if (ty != nullptr) {
-            const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
-            if (!ismodifyglobal) {
-                // TODO: use typeassert in jl_check_binding_wr too
-                emit_typecheck(ctx, rval, ty, "typeassert");
-                rval = update_julia_type(ctx, rval, ty);
-                if (rval.typ == jl_bottom_type)
-                    return jl_cgval_t();
-            }
-            bool isboxed = true;
-            bool maybe_null = jl_atomic_load_relaxed(&bnd->value) == NULL;
-            return typed_store(ctx,
-                               julia_binding_pvalue(ctx, bp),
-                               rval, cmp, ty,
-                               ctx.tbaa().tbaa_binding,
-                               nullptr,
-                               bp,
-                               isboxed,
-                               Order,
-                               FailOrder,
-                               0,
-                               nullptr,
-                               issetglobal,
-                               isreplaceglobal,
-                               isswapglobal,
-                               ismodifyglobal,
-                               issetglobalonce,
-                               maybe_null,
-                               modifyop,
-                               fname,
-                               mod,
-                               sym);
+    if (bpart) {
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        if (!jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_value_t *ty = decode_restriction_value(pku);
+            if (ty != nullptr) {
+                const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
+                if (!ismodifyglobal) {
+                    // TODO: use typeassert in jl_check_binding_wr too
+                    emit_typecheck(ctx, rval, ty, "typeassert");
+                    rval = update_julia_type(ctx, rval, ty);
+                    if (rval.typ == jl_bottom_type)
+                        return jl_cgval_t();
+                }
+                bool isboxed = true;
+                bool maybe_null = jl_atomic_load_relaxed(&bnd->value) == NULL;
+                return typed_store(ctx,
+                                julia_binding_pvalue(ctx, bp),
+                                rval, cmp, ty,
+                                ctx.tbaa().tbaa_binding,
+                                nullptr,
+                                bp,
+                                isboxed,
+                                Order,
+                                FailOrder,
+                                0,
+                                nullptr,
+                                issetglobal,
+                                isreplaceglobal,
+                                isswapglobal,
+                                ismodifyglobal,
+                                issetglobalonce,
+                                maybe_null,
+                                modifyop,
+                                fname,
+                                mod,
+                                sym);
 
+            }
         }
     }
     Value *m = literal_pointer_val(ctx, (jl_value_t*)mod);
@@ -3320,6 +3426,61 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
     return phi;
 }
 
+struct egal_desc {
+    size_t offset;
+    size_t nrepeats;
+    size_t data_bytes;
+    size_t padding_bytes;
+};
+
+template <typename callback>
+static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty, egal_desc &current_desc)
+{
+    // Memcmp, but with masked padding
+    size_t data_bytes = 0;
+    size_t padding_bytes = 0;
+    size_t nfields = jl_datatype_nfields(aty);
+    size_t total_size = jl_datatype_size(aty);
+    assert(aty->layout->flags.isbitsegal);
+    for (size_t i = 0; i < nfields; ++i) {
+        size_t offset = jl_field_offset(aty, i);
+        size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1);
+        size_t fsz = jl_field_size(aty, i);
+        jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i);
+        assert(jl_is_datatype(fty)); // union fields should never reach here
+        assert(fty->layout->flags.isbitsegal);
+        if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) {
+            // The field has no internal padding
+            data_bytes += fsz;
+            if (offset + fsz == fend) {
+                // The field has no padding after. Merge this into the current
+                // comparison range and go to next field.
+            } else {
+                padding_bytes = fend - offset - fsz;
+                // Found padding. Either merge this into the current comparison
+                // range, or emit the old one and start a new one.
+                if (current_desc.data_bytes == data_bytes &&
+                        current_desc.padding_bytes == padding_bytes) {
+                    // Same as the previous range, just note that down, so we
+                    // emit this as a loop.
+                    current_desc.nrepeats += 1;
+                } else {
+                    if (current_desc.nrepeats != 0)
+                        emit_desc(current_desc);
+                    current_desc.nrepeats = 1;
+                    current_desc.data_bytes = data_bytes;
+                    current_desc.padding_bytes = padding_bytes;
+                }
+                data_bytes = 0;
+            }
+        } else {
+            // The field may have internal padding. Recurse this.
+            data_bytes += emit_masked_bits_compare(emit_desc, fty, current_desc);
+        }
+    }
+    return data_bytes;
+}
+
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
 {
     ++EmittedBitsCompares;
@@ -3358,7 +3519,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
     if (at->isAggregateType()) { // Struct or Array
         jl_datatype_t *sty = (jl_datatype_t*)arg1.typ;
         size_t sz = jl_datatype_size(sty);
-        if (sz > 512 && !sty->layout->flags.haspadding) {
+        if (sz > 512 && !sty->layout->flags.haspadding && sty->layout->flags.isbitsegal) {
             Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
                 value_to_pointer(ctx, arg1).V;
             Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
@@ -3373,8 +3534,8 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             gc_uses.append(get_gc_roots_for(ctx, arg2));
             OperandBundleDef OpBundle("jl_roots", gc_uses);
             auto answer = ctx.builder.CreateCall(prepare_call(memcmp_func), {
-                        ctx.builder.CreateBitCast(varg1, getInt8PtrTy(ctx.builder.getContext())),
-                        ctx.builder.CreateBitCast(varg2, getInt8PtrTy(ctx.builder.getContext())),
+                        varg1,
+                        varg2,
                         ConstantInt::get(ctx.types().T_size, sz) },
                     ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
 
@@ -3395,6 +3556,92 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             }
             return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
+        else if (sz > 512 && jl_struct_try_layout(sty) && sty->layout->flags.isbitsegal) {
+            Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
+                value_to_pointer(ctx, arg1).V;
+            Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
+                value_to_pointer(ctx, arg2).V;
+            varg1 = emit_pointer_from_objref(ctx, varg1);
+            varg2 = emit_pointer_from_objref(ctx, varg2);
+
+            // See above for why we want to do this
+            SmallVector<Value*, 0> gc_uses;
+            gc_uses.append(get_gc_roots_for(ctx, arg1));
+            gc_uses.append(get_gc_roots_for(ctx, arg2));
+            OperandBundleDef OpBundle("jl_roots", gc_uses);
+
+            Value *answer = nullptr;
+            auto emit_desc = [&](egal_desc desc) {
+                Value *ptr1 = varg1;
+                Value *ptr2 = varg2;
+                if (desc.offset != 0) {
+                    ptr1 = emit_ptrgep(ctx, ptr1, desc.offset);
+                    ptr2 = emit_ptrgep(ctx, ptr2, desc.offset);
+                }
+
+                Value *new_ptr1 = ptr1;
+                Value *endptr1 = nullptr;
+                BasicBlock *postBB = nullptr;
+                BasicBlock *loopBB = nullptr;
+                PHINode *answerphi = nullptr;
+                if (desc.nrepeats != 1) {
+                    // Set up loop
+                    endptr1 = emit_ptrgep(ctx, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));;
+
+                    BasicBlock *currBB = ctx.builder.GetInsertBlock();
+                    loopBB = BasicBlock::Create(ctx.builder.getContext(), "egal_loop", ctx.f);
+                    postBB = BasicBlock::Create(ctx.builder.getContext(), "post", ctx.f);
+                    ctx.builder.CreateBr(loopBB);
+
+                    ctx.builder.SetInsertPoint(loopBB);
+                    Type *TInt1 = getInt1Ty(ctx.builder.getContext());
+                    answerphi = ctx.builder.CreatePHI(TInt1, 2);
+                    answerphi->addIncoming(answer ? answer : ConstantInt::get(TInt1, 1), currBB);
+                    answer = answerphi;
+
+                    PHINode *itr1 = ctx.builder.CreatePHI(ptr1->getType(), 2);
+                    PHINode *itr2 = ctx.builder.CreatePHI(ptr2->getType(), 2);
+
+                    new_ptr1 = emit_ptrgep(ctx, itr1, desc.data_bytes + desc.padding_bytes);
+                    itr1->addIncoming(ptr1, currBB);
+                    itr1->addIncoming(new_ptr1, loopBB);
+
+                    Value *new_ptr2 = emit_ptrgep(ctx, itr2, desc.data_bytes + desc.padding_bytes);
+                    itr2->addIncoming(ptr2, currBB);
+                    itr2->addIncoming(new_ptr2, loopBB);
+
+                    ptr1 = itr1;
+                    ptr2 = itr2;
+                }
+
+                // Emit memcmp. TODO: LLVM has a pass to expand this for additional
+                // performance.
+                Value *this_answer = ctx.builder.CreateCall(prepare_call(memcmp_func),
+                    { ptr1,
+                      ptr2,
+                      ConstantInt::get(ctx.types().T_size, desc.data_bytes) },
+                    ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
+                this_answer = ctx.builder.CreateICmpEQ(this_answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                answer = answer ? ctx.builder.CreateAnd(answer, this_answer) : this_answer;
+                if (endptr1) {
+                    answerphi->addIncoming(answer, loopBB);
+                    Value *loopend = ctx.builder.CreateICmpEQ(new_ptr1, endptr1);
+                    ctx.builder.CreateCondBr(loopend, postBB, loopBB);
+                    ctx.builder.SetInsertPoint(postBB);
+                }
+            };
+            egal_desc current_desc = {0};
+            size_t trailing_data_bytes = emit_masked_bits_compare(emit_desc, sty, current_desc);
+            assert(current_desc.nrepeats != 0);
+            emit_desc(current_desc);
+            if (trailing_data_bytes != 0) {
+                current_desc.nrepeats = 1;
+                current_desc.data_bytes = trailing_data_bytes;
+                current_desc.padding_bytes = 0;
+                emit_desc(current_desc);
+            }
+            return answer;
+        }
         else {
             jl_svec_t *types = sty->types;
             Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
@@ -3438,8 +3685,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (arg1.constant && arg2.constant)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), jl_egal(arg1.constant, arg2.constant));
 
-    jl_value_t *rt1 = arg1.typ;
-    jl_value_t *rt2 = arg2.typ;
+    jl_value_t *rt1 = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
+    jl_value_t *rt2 = (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ);
     if (jl_is_concrete_type(rt1) && jl_is_concrete_type(rt2) && !jl_is_kind(rt1) && !jl_is_kind(rt2) && rt1 != rt2) {
         // disjoint concrete leaf types are never equal (quick test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
@@ -3461,8 +3708,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
             // (which was probably caught above, but just to be safe, we repeat it here explicitly)
             return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.Vboxed;
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.Vboxed;
         // rooting these values isn't needed since we won't load this pointer
         // and we know at least one of them is a unique Singleton
         // which is already enough to ensure pointer uniqueness for this test
@@ -3482,8 +3729,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             if (typ == jl_bool_type) { // aka jl_pointer_egal
                 // some optimizations for bool, since pointer comparison may be better
                 if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
-                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
-                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.Vboxed;
+                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.Vboxed;
                     return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
                 }
             }
@@ -3581,7 +3828,8 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                  isswapglobal,
                                  ismodifyglobal,
                                  issetglobalonce,
-                                 modifyop);
+                                 modifyop,
+                                 false);
             return true;
         }
     }
@@ -3766,6 +4014,8 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     bool needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
     size_t elsz = layout->size;
     size_t al = layout->alignment;
+    if (al > JL_HEAP_ALIGNMENT)
+        al = JL_HEAP_ALIGNMENT;
     if (isatomic == (order == jl_memory_order_notatomic)) {
         emit_atomic_error(ctx,
                 issetmemory ?
@@ -3828,22 +4078,19 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
         Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
         Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-        data = emit_bitcast(ctx, data, AT->getPointerTo());
         // compute tindex from val
         Value *ptindex;
         if (elsz == 0) {
             ptindex = data;
         }
         else {
-            data = emit_bitcast(ctx, data, AT->getPointerTo());
             // isbits union selector bytes are stored after mem->length
             ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
             data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
         }
-        ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0);
+        ptindex = emit_ptrgep(ctx, ptindex, idx0);
         *ret = union_store(ctx, data, ptindex, val, cmp, ety,
-            ctx.tbaa().tbaa_arraybuf, nullptr, ctx.tbaa().tbaa_arrayselbyte,
+            ctx.tbaa().tbaa_arraybuf, ctx.tbaa().tbaa_arrayselbyte,
             Order, FailOrder,
             nullptr, issetmemory, isreplacememory, isswapmemory, ismodifymemory, issetmemoryonce,
             modifyop, fname);
@@ -3856,8 +4103,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             assert(ptr);
             lock = ptr;
             // ptr += sizeof(lock);
-            ptr = emit_bitcast(ctx, ptr, getInt8PtrTy(ctx.builder.getContext()));
-            ptr = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+            ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
         }
         Value *data_owner = NULL; // owner object against which the write barrier must check
         if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
@@ -3894,11 +4140,10 @@ static jl_llvm_functions_t
         orc::ThreadSafeModule &TSM,
         jl_method_instance_t *lam,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
-        jl_codegen_params_t &params,
-        size_t min_world, size_t max_world);
+        jl_value_t *rettype,
+        jl_codegen_params_t &params);
 
-static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *type, jl_cgval_t name);
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name);
 
 static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                               ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt,
@@ -3973,7 +4218,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 #ifdef _P64
                 nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
+                Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
                 return true;
@@ -4094,6 +4339,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             bool isunion = layout->flags.arrayelem_isunion;
             size_t elsz = layout->size;
             size_t al = layout->alignment;
+            if (al > JL_HEAP_ALIGNMENT)
+                al = JL_HEAP_ALIGNMENT;
             bool needlock = isatomic && !isboxed && elsz > MAX_ATOMIC_SIZE;
             AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
                                     ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
@@ -4117,13 +4364,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
                 else {
                     Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-                    data = emit_bitcast(ctx, data, AT->getPointerTo());
                     // isbits union selector bytes are stored after mem->length bytes
                     ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
                     data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
                 }
-                ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0);
+                ptindex = emit_ptrgep(ctx, ptindex, idx0);
                 size_t elsz_c = 0, al_c = 0;
                 int union_max = jl_islayout_inline(ety, &elsz_c, &al_c);
                 assert(union_max && LLT_ALIGN(elsz_c, al_c) == elsz && al_c == al);
@@ -4136,8 +4381,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     assert(ptr);
                     lock = ptr;
                     // ptr += sizeof(lock);
-                    ptr = emit_bitcast(ctx, ptr, getInt8PtrTy(ctx.builder.getContext()));
-                    ptr = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                    ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
                     emit_lockstate_value(ctx, lock, true);
                 }
                 *ret = typed_load(ctx, ptr, nullptr, ety,
@@ -4228,12 +4472,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 if (needlock) {
                     // n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null
                     // elem += sizeof(lock);
-                    elem = emit_bitcast(ctx, elem, getInt8PtrTy(ctx.builder.getContext()));
-                    elem = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                    elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
                 }
-                elem = emit_bitcast(ctx, elem, ctx.types().T_pprjlvalue);
                 if (!isboxed)
-                    elem = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, elem, layout->first_ptr);
+                    elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
                 // emit this using the same type as jl_builtin_memoryrefget
                 // so that LLVM may be able to load-load forward them and fold the result
                 auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;
@@ -4321,7 +4563,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field)
-                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)),
+                                emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)),
                                 NULL, NULL);
                         Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
@@ -4406,7 +4648,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     assert(jl_svec_len(fn) == 1);
                     Value *typ_sym = literal_pointer_val(ctx, jl_svecref(fn, 0));
                     Value *cond = ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, typ_sym), mark_callee_rooted(ctx, boxed(ctx, fld)));
-                    emit_hasnofield_error_ifnot(ctx, cond, utt->name->name, fld);
+                    emit_hasnofield_error_ifnot(ctx, cond, utt, fld);
                     *ret = emit_getfield_knownidx(ctx, obj, 0, utt, order);
                     return true;
                 }
@@ -4414,7 +4656,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     Value *index = ctx.builder.CreateCall(prepare_call(jlfieldindex_func),
                             {emit_typeof(ctx, obj, false, false), boxed(ctx, fld), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)});
                     Value *cond = ctx.builder.CreateICmpNE(index, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), -1));
-                    emit_hasnofield_error_ifnot(ctx, cond, utt->name->name, fld);
+                    emit_hasnofield_error_ifnot(ctx, cond, utt, fld);
                     Value *idx2 = ctx.builder.CreateAdd(ctx.builder.CreateIntCast(index, ctx.types().T_size, false), ConstantInt::get(ctx.types().T_size, 1)); // getfield_unknown is 1 based
                     if (emit_getfield_unknownidx(ctx, ret, obj, idx2, utt, jl_false, order))
                         return true;
@@ -4519,7 +4761,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 if (nargs == 3)
                     emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
                 emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck);
-                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx);
+                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, types_svec), idx);
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
                 Value *fieldtyp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
                 setName(ctx.emission_context, fieldtyp, "fieldtype");
@@ -4546,7 +4788,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 return true;
             }
             // String and SimpleVector's length fields have the same layout
-            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), ctx.types().T_size->getPointerTo());
+            auto ptr = boxed(ctx, obj);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             Value *len = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr));
             MDBuilder MDB(ctx.builder.getContext());
@@ -4660,7 +4902,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             *ret = jl_cgval_t(); // unreachable
             return true;
         }
-        else if (fieldidx < nf - stt->name->n_uninitialized) {
+        else if (!field_may_be_null(obj, stt, fieldidx)) {
             *ret = mark_julia_const(ctx, jl_true);
         }
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
@@ -4670,8 +4912,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 auto tbaa = best_field_tbaa(ctx, obj, stt, fieldidx, offs);
                 if (!jl_field_isptr(stt, fieldidx))
                     offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr;
-                Value *ptr = emit_bitcast(ctx, data_pointer(ctx, obj), ctx.types().T_pprjlvalue);
-                Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs);
+                Value *ptr = data_pointer(ctx, obj);
+                Value *addr = emit_ptrgep(ctx, ptr, offs * sizeof(jl_value_t*));
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -4766,17 +5008,13 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline);
 }
 
-static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, jl_returninfo_t &returninfo, jl_code_instance_t *fromexternal,
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs)
 {
     ++EmittedSpecfunCalls;
     // emit specialized call site
     bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
     FunctionType *cft = returninfo.decl.getFunctionType();
-    *cc = returninfo.cc;
-    *return_roots = returninfo.return_roots;
-
     size_t nfargs = cft->getNumParams();
     SmallVector<Value *, 0> argvals(nfargs);
     unsigned idx = 0;
@@ -4787,23 +5025,20 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-        assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
+        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType(), Align(julia_alignment(jlretty)));
         argvals[idx] = result;
         idx++;
         break;
     case jl_returninfo_t::Union:
-        result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes));
+        result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes), Align(returninfo.union_align));
         setName(ctx.emission_context, result, "sret_box");
-        if (returninfo.union_align > 1)
-            result->setAlignment(Align(returninfo.union_align));
         argvals[idx] = result;
         idx++;
         break;
     }
 
     if (returninfo.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots));
+        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots), Align(alignof(jl_value_t*)));
         argvals[idx] = return_roots;
         idx++;
     }
@@ -4816,35 +5051,30 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
         // n.b.: specTypes is required to be a datatype by construction for specsig
         jl_cgval_t arg = argv[i];
         if (is_opaque_closure && i == 0) {
-            Type *at = cft->getParamType(idx);
-            // Special optimization for opaque closures: We know that specsig opaque
-            // closures don't look at their type tag (they are fairly quickly discarded
-            // for their environments). Therefore, we can just pass these as a pointer,
-            // rather than a boxed value.
+            // Special implementation for opaque closures: their jt and thus
+            // julia_type_to_llvm values are likely wrong, so override the
+            // behavior here to directly pass the expected pointer based instead
+            // just on passing arg as a pointer
             arg = value_to_pointer(ctx, arg);
-            argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
+            argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
         }
         else if (is_uniquerep_Type(jt)) {
             continue;
-        } else {
+        }
+        else {
             bool isboxed = deserves_argbox(jt);
             Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
             if (type_is_ghost(et))
                 continue;
             assert(idx < nfargs);
-            Type *at = cft->getParamType(idx);
             if (isboxed) {
-                assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
                 argvals[idx] = boxed(ctx, arg);
             }
             else if (et->isAggregateType()) {
                 arg = value_to_pointer(ctx, arg);
-                // can lazy load on demand, no copy needed
-                assert(at == PointerType::get(et, AddressSpace::Derived));
-                argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
+                argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
             }
             else {
-                assert(at == et);
                 Value *val = emit_unbox(ctx, et, arg, jt);
                 if (!val) {
                     // There was a type mismatch of some sort - exit early
@@ -4897,7 +5127,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
                 ctx.builder.CreateICmpEQ(
                         ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                decay_derived(ctx, ctx.builder.CreateBitCast(argvals[0], ctx.types().T_pjlvalue)),
+                decay_derived(ctx, result),
                 decay_derived(ctx, box)
             );
             retval = mark_julia_slot(derived,
@@ -4911,6 +5141,19 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
             break;
     }
+    return retval;
+}
+
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *nreturn_roots, jl_value_t *inferred_retty)
+{
+    ++EmittedSpecfunCalls;
+    // emit specialized call site
+    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
+    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
+    *cc = returninfo.cc;
+    *nreturn_roots = returninfo.return_roots;
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, specTypes, jlretty, returninfo, fromexternal, argv, nargs);
     // see if inference has a different / better type for the call than the lambda
     return update_julia_type(ctx, retval, inferred_retty);
 }
@@ -5018,30 +5261,26 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR
                     }
 
                     // Check if it is already compiled (either JIT or externally)
-                    if (cache_valid) {
+                    if (need_to_emit && cache_valid) {
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
-                        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-                        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                        if (fptr) {
-                            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                                jl_cpu_pause();
-                            }
-                            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-                            if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
-                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-                                if (ctx.external_linkage) {
-                                    // TODO: Add !specsig support to aotcompile.cpp
-                                    // Check that the codeinst is containing native code
-                                    if (specsig && jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b100) {
-                                        external = true;
-                                        need_to_emit = false;
-                                    }
-                                }
-                                else { // ctx.use_cache
+                        uint8_t specsigflags;
+                        jl_callptr_t invoke;
+                        void *fptr;
+                        jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+                        if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) {
+                            protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+                            if (ctx.external_linkage) {
+                                // TODO: Add !specsig support to aotcompile.cpp
+                                // Check that the codeinst is containing native code
+                                if (specsig && (specsigflags & 0b100)) {
+                                    external = true;
                                     need_to_emit = false;
                                 }
                             }
+                            else { // ctx.use_cache
+                                need_to_emit = false;
+                            }
                         }
                     }
                     if (need_to_emit) {
@@ -5067,8 +5306,12 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR
         Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2);
         result = mark_julia_type(ctx, r, true, rt);
     }
-    if (result.typ == jl_bottom_type)
+    if (result.typ == jl_bottom_type) {
+#ifndef JL_NDEBUG
+        emit_error(ctx, "(Internal Error - IR Validity): Returned from function we expected not to.");
+#endif
         CreateTrap(ctx.builder);
+    }
     return result;
 }
 
@@ -5204,7 +5447,7 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
         if (f.typ == (jl_value_t*)jl_intrinsic_type) {
             fptr = prepare_call(jlintrinsic_func);
             F = f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V;
-            F = decay_derived(ctx, maybe_bitcast(ctx, F, ctx.types().T_pjlvalue));
+            F = decay_derived(ctx, F);
             cc = julia_call3;
         }
         else {
@@ -5223,7 +5466,7 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
         jl_value_t *oc_rett = jl_tparam1(f.typ);
         if (jl_is_datatype(oc_argt) && jl_tupletype_length_compat(oc_argt, nargs-1)) {
             jl_value_t *sigtype = jl_argtype_with_function_type((jl_value_t*)f.typ, (jl_value_t*)oc_argt);
-            if (uses_specsig(sigtype, false, true, oc_rett, true)) {
+            if (uses_specsig(sigtype, false, oc_rett, true)) {
                 JL_GC_PUSH1(&sigtype);
                 jl_cgval_t r = emit_specsig_oc_call(ctx, f.typ, sigtype, argv, nargs);
                 JL_GC_POP();
@@ -5239,7 +5482,7 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
 
 // --- accessing and assigning variables ---
 
-static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *type, jl_cgval_t name)
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name)
 {
     ++EmittedUndefVarErrors;
     assert(name.typ == (jl_value_t*)jl_symbol_type);
@@ -5259,19 +5502,23 @@ static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *
 // if the reference currently bound or assign == true,
 //   pbnd will also be assigned with the binding address
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign)
+                                     jl_binding_t **pbnd, bool assign, bool alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, s, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, ctx.max_world);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
     if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) == NULL)
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
             // not yet declared
             b = NULL;
     }
     else {
-        b = jl_atomic_load_relaxed(&b->owner);
-        if (b == NULL)
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
             // try to look this up now
             b = jl_get_binding(m, s);
+            bpart = jl_get_binding_partition(b, ctx.max_world);
+        }
+        pku = jl_walk_binding_inplace(&b, &bpart, ctx.max_world);
     }
     if (b == NULL) {
         // var not found. switch to delayed lookup.
@@ -5289,9 +5536,17 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
         ctx.builder.CreateCondBr(iscached, have_val, not_found);
         not_found->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(not_found);
-        Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func),
-                { literal_pointer_val(ctx, (jl_value_t*)m),
-                  literal_pointer_val(ctx, (jl_value_t*)s) });
+        Value *bval = nullptr;
+        if (assign) {
+            bval = ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                    literal_pointer_val(ctx, (jl_value_t*)s),
+                    ConstantInt::get(getInt32Ty(ctx.builder.getContext()), alloc)});
+        } else {
+            bval = ctx.builder.CreateCall(prepare_call(jlgetbindingorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                    literal_pointer_val(ctx, (jl_value_t*)s)});
+        }
         setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found");
         ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
         ctx.builder.CreateBr(have_val);
@@ -5304,11 +5559,12 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
         return p;
     }
     if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) != b) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
             // this will fail at runtime, so defer to the runtime to create the error
             ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
                     { literal_pointer_val(ctx, (jl_value_t*)m),
-                      literal_pointer_val(ctx, (jl_value_t*)s) });
+                      literal_pointer_val(ctx, (jl_value_t*)s),
+                      ConstantInt::get(getInt32Ty(ctx.builder.getContext()), alloc) });
             CreateTrap(ctx.builder);
             return NULL;
         }
@@ -5345,10 +5601,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
         }
     }
     assert(ctx.spvals_ptr != NULL);
-    Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue,
-            ctx.spvals_ptr,
-            i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+    Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
     setName(ctx.emission_context, sp, "sparam");
@@ -5362,7 +5615,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
     return mark_julia_type(ctx, sp, true, jl_any_type);
 }
 
-static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
+static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_import)
 {
     Value *isnull = NULL;
     if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
@@ -5401,10 +5654,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
             }
         }
         assert(ctx.spvals_ptr != NULL);
-        Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-                ctx.types().T_prjlvalue,
-                ctx.spvals_ptr,
-                i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+        Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
         isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
@@ -5421,9 +5671,11 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
             modu = ctx.module;
             name = (jl_sym_t*)sym;
         }
-        jl_binding_t *bnd = jl_get_binding(modu, name);
-        if (bnd) {
-            if (jl_atomic_load_relaxed(&bnd->value) != NULL)
+        jl_binding_t *bnd = allow_import ? jl_get_binding(modu, name) : jl_get_module_binding(modu, name, 0);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(bnd, ctx.min_world);
+        jl_ptr_kind_union_t pku = bpart ? jl_atomic_load_relaxed(&bpart->restriction) : encode_restriction(NULL, BINDING_KIND_GUARD);
+        if (decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            if (jl_get_binding_value_if_const(bnd))
                 return mark_julia_const(ctx, jl_true);
             Value *bp = julia_binding_gv(ctx, bnd);
             bp = julia_binding_pvalue(ctx, bp);
@@ -5436,7 +5688,8 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
         else {
             Value *v = ctx.builder.CreateCall(prepare_call(jlboundp_func), {
                     literal_pointer_val(ctx, (jl_value_t*)modu),
-                    literal_pointer_val(ctx, (jl_value_t*)name)
+                    literal_pointer_val(ctx, (jl_value_t*)name),
+                    ConstantInt::get(getInt32Ty(ctx.builder.getContext()), allow_import)
                 });
             isnull = ctx.builder.CreateICmpNE(v, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
@@ -5472,7 +5725,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
             else {
                 const DataLayout &DL = jl_Module->getDataLayout();
                 uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value(), varslot->getAlign().value());
+                emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign(), varslot->getAlign());
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
@@ -5501,8 +5754,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
             if (vi.usedUndef)
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
             if (v.V) { // v.V will be null if it is a union of all ghost values
-                v.V = ctx.builder.CreateSelect(load_unbox, emit_bitcast(ctx,
-                    decay_derived(ctx, v.V), boxed->getType()), decay_derived(ctx, boxed));
+                v.V = ctx.builder.CreateSelect(load_unbox, decay_derived(ctx, v.V), decay_derived(ctx, boxed));
             } else
                 v.V = boxed;
             v.Vboxed = boxed;
@@ -5560,9 +5812,6 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
                     if (vi.pTIndex) // TODO: use lifetime-end here instead
                         ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
                     Type *store_ty = julia_type_to_llvm(ctx, rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
-                    Type *dest_ty = store_ty->getPointerTo();
-                    if (dest_ty != dest->getType())
-                        dest = emit_bitcast(ctx, dest, dest_ty);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                     ai.decorateInst(ctx.builder.CreateStore(
                                       emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
@@ -5580,8 +5829,9 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
                 // This check should probably mostly catch the relevant situations.
                 if (vi.value.V != rval_info.V) {
                     Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
+                    Align alignment(julia_alignment(rval_info.typ));
                     emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes,
-                                julia_alignment(rval_info.typ), julia_alignment(rval_info.typ), vi.isVolatile);
+                                alignment, alignment, vi.isVolatile);
                 }
             }
             else {
@@ -5632,9 +5882,9 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
             ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, dest->getAlign(), nbytes, false);
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
-                maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())),
-                maybe_bitcast(ctx, decay_derived(ctx, phi), getInt8PtrTy(ctx.builder.getContext())));
-            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); // XXX: this TBAA is wrong for ptr_phi
+                decay_derived(ctx, ptr_phi),
+                decay_derived(ctx, phi));
+            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType));
             val.Vboxed = ptr_phi;
             ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r));
             ctx.SAvalues[idx] = val;
@@ -5667,11 +5917,10 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) {
         // the value will be moved into dest in the predecessor critical block.
         // here it's moved into phi in the successor (from dest)
-        dest = emit_static_alloca(ctx, vtype);
-        Value *phi = emit_static_alloca(ctx, vtype);
-        ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)),
-             dest, dest->getAlign(),
-             jl_datatype_size(phiType), false);
+        Align align(julia_alignment(phiType));
+        dest = emit_static_alloca(ctx, vtype, align);
+        Value *phi = emit_static_alloca(ctx, vtype, align);
+        ctx.builder.CreateMemCpy(phi, align, dest, align, jl_datatype_size(phiType), false);
         ctx.builder.CreateLifetimeEnd(dest);
         slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack);
     }
@@ -5803,17 +6052,20 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
 
     jl_module_t *mod;
     jl_sym_t *sym;
+    bool toplevel = jl_is_module(ctx.linfo->def.value);
+    bool alloc = toplevel;
     if (jl_is_symbol(l)) {
         mod = ctx.module;
         sym = (jl_sym_t*)l;
     }
     else {
         assert(jl_is_globalref(l));
+        alloc &= jl_globalref_mod(l) == ctx.module;
         mod = jl_globalref_mod(l);
         sym = jl_globalref_name(l);
     }
     emit_globalop(ctx, mod, sym, rval_info, jl_cgval_t(), AtomicOrdering::Release, AtomicOrdering::NotAtomic,
-                  true, false, false, false, false, nullptr);
+                  true, false, false, false, false, nullptr, alloc);
     // Global variable. Does not need debug info because the debugger knows about
     // its memory location.
 }
@@ -5961,8 +6213,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
                 hand_n_leave += 1;
             }
         }
-        ctx.builder.CreateCall(prepare_call(jlleave_func),
-                           ConstantInt::get(getInt32Ty(ctx.builder.getContext()), hand_n_leave));
+        ctx.builder.CreateCall(prepare_call(jlleave_noexcept_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), hand_n_leave)});
         if (scope_to_restore) {
             jl_aliasinfo_t scope_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
             scope_ai.decorateInst(
@@ -5972,7 +6223,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
         assert(excstack_state.V && excstack_state.V->getType() == ctx.types().T_size);
-        ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
+        ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), {get_current_task(ctx), excstack_state.V});
         return;
     }
     else {
@@ -5995,8 +6246,18 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     }
     sigtype = jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
-    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.min_world, ctx.max_world);
+    jl_method_instance_t *mi;
+    jl_code_instance_t *ci;
+
+    if (closure_method->source) {
+        mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
+        ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.min_world, ctx.max_world);
+    }
+    else {
+        mi = (jl_method_instance_t*)jl_atomic_load_relaxed(&closure_method->specializations);
+        assert(jl_is_method_instance(mi));
+        ci = jl_atomic_load_relaxed(&mi->cache);
+    }
 
     if (ci == NULL || (jl_value_t*)ci == jl_nothing) {
         JL_GC_POP();
@@ -6018,7 +6279,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         orc::ThreadSafeModule closure_m = jl_create_ts_module(
                 name_from_method_instance(mi), ctx.emission_context.tsctx,
                 jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple()));
-        jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context, ctx.min_world, ctx.max_world);
+        jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context);
         JL_GC_POP();
         it = ctx.emission_context.compiled_functions.insert(std::make_pair(ci, std::make_pair(std::move(closure_m), std::move(closure_decls)))).first;
     }
@@ -6035,7 +6296,8 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         closure_decls.specFunctionObject;
     if (GlobalValue *V = jl_Module->getNamedValue(fname)) {
         F = cast<Function>(V);
-    } else {
+    }
+    else {
         F = Function::Create(get_func_sig(ctx.builder.getContext()),
                              Function::ExternalLinkage,
                              fname, jl_Module);
@@ -6046,7 +6308,8 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     Function *specF = NULL;
     if (!isspecsig) {
         specF = F;
-    } else {
+    }
+    else {
         //emission context holds context lock so can get module
         specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
         if (specF) {
@@ -6114,8 +6377,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     // however, this is a good way to do it because it should *not* be easy
     // to add new node types.
     if (head == jl_isdefined_sym) {
-        assert(nargs == 1);
-        return emit_isdefined(ctx, args[0]);
+        assert(nargs == 1 || nargs == 2);
+        int allow_import = 1;
+        if (nargs == 2) {
+            assert(jl_is_bool(args[1]));
+            allow_import = args[1] == jl_true;
+        }
+        return emit_isdefined(ctx, args[0], allow_import);
     }
     else if (head == jl_throw_undef_if_not_sym) {
         assert(nargs == 2);
@@ -6184,7 +6452,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     else if (head == jl_method_sym) {
         if (nargs == 1) {
             jl_value_t *mn = args[0];
-            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn));
+            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn) || jl_is_globalref(mn));
 
             Value *bp = NULL, *name;
             jl_binding_t *bnd = NULL;
@@ -6203,7 +6471,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                     bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
                 }
                 JL_CATCH {
-                    jl_value_t *e = jl_current_exception();
+                    jl_value_t *e = jl_current_exception(jl_current_task);
                     // errors. boo. :(
                     JL_GC_PUSH1(&e);
                     e = jl_as_global_root(e, 1);
@@ -6212,20 +6480,11 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                     return ghostValue(ctx, jl_nothing_type);
                 }
                 bp = julia_binding_gv(ctx, bnd);
-                bp = julia_binding_pvalue(ctx, bp);
-            }
-            else if (jl_is_slotnumber(mn) || jl_is_argument(mn)) {
-                // XXX: eval_methoddef does not have this code branch
-                int sl = jl_slot_number(mn)-1;
-                jl_varinfo_t &vi = ctx.slots[sl];
-                bp = vi.boxroot;
-                name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
-            }
-            if (bp) {
-                Value *mdargs[] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp, literal_pointer_val(ctx, bnd) };
                 jl_cgval_t gf = mark_julia_type(
                         ctx,
-                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), ArrayRef<Value*>(mdargs)),
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), { bp,
+                            literal_pointer_val(ctx, (jl_value_t*)mod), name
+                        }),
                         true,
                         jl_function_type);
                 return gf;
@@ -6250,7 +6509,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         return meth;
     }
     else if (head == jl_const_sym) {
-        assert(nargs == 1);
+        assert(nargs <= 2);
         jl_sym_t *sym = (jl_sym_t*)args[0];
         jl_module_t *mod = ctx.module;
         if (jl_is_globalref(sym)) {
@@ -6258,12 +6517,28 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             sym = jl_globalref_name(sym);
         }
         if (jl_is_symbol(sym)) {
-            jl_binding_t *bnd = NULL;
-            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
-            if (bp)
-                ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
-                        { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym) });
+            jl_binding_t *bnd = jl_get_module_binding(mod, sym, 1);
+            if (nargs == 2) {
+                jl_cgval_t rhs = emit_expr(ctx, args[1]);
+                ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
+                        { julia_binding_gv(ctx, bnd), literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), boxed(ctx, rhs) });
+            } else {
+                ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
+                        { julia_binding_gv(ctx, bnd), literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), ConstantPointerNull::get(cast<PointerType>(ctx.types().T_prjlvalue)) });
+            }
+        }
+    }
+    else if (head == jl_globaldecl_sym) {
+        assert(nargs == 2);
+        jl_sym_t *sym = (jl_sym_t*)args[0];
+        jl_module_t *mod = ctx.module;
+        if (jl_is_globalref(sym)) {
+            mod = jl_globalref_mod(sym);
+            sym = jl_globalref_name(sym);
         }
+        jl_cgval_t typ = emit_expr(ctx, args[1]);
+        ctx.builder.CreateCall(prepare_call(jldeclareglobal_func),
+                { literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), boxed(ctx, typ) });
     }
     else if (head == jl_new_sym) {
         bool is_promotable = false;
@@ -6303,15 +6578,16 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
     else if (head == jl_new_opaque_closure_sym) {
-        assert(nargs >= 4 && "Not enough arguments in new_opaque_closure");
-        SmallVector<jl_cgval_t, 4> argv(nargs, jl_cgval_t());
+        assert(nargs >= 5 && "Not enough arguments in new_opaque_closure");
+        SmallVector<jl_cgval_t, 5> argv(nargs, jl_cgval_t());
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
         const jl_cgval_t &argt = argv[0];
         const jl_cgval_t &lb = argv[1];
         const jl_cgval_t &ub = argv[2];
-        const jl_cgval_t &source = argv[3];
+        // argv[3] - constprop marker not used here
+        const jl_cgval_t &source = argv[4];
         if (source.constant == NULL) {
             // For now, we require non-constant source to be handled by using
             // eval. This should probably be a verifier error and an abort here.
@@ -6329,12 +6605,18 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             jl_value_t *env_t = NULL;
             JL_GC_PUSH2(&closure_t, &env_t);
 
-            SmallVector<jl_value_t *, 0> env_component_ts(nargs-4);
-            for (size_t i = 0; i < nargs - 4; ++i) {
-                env_component_ts[i] = argv[4+i].typ;
+            size_t ncapture_args = nargs-5;
+            SmallVector<jl_value_t *, 0> env_component_ts(ncapture_args);
+            for (size_t i = 0; i < ncapture_args; ++i) {
+                jl_value_t *typ = argv[nargs-ncapture_args+i].typ;
+                if (typ == jl_bottom_type) {
+                    JL_GC_POP();
+                    return jl_cgval_t();
+                }
+                env_component_ts[i] = typ;
             }
 
-            env_t = jl_apply_tuple_type_v(env_component_ts.data(), nargs-4);
+            env_t = jl_apply_tuple_type_v(env_component_ts.data(), ncapture_args);
             // we need to know the full env type to look up the right specialization
             if (jl_is_concrete_type(env_t)) {
                 jl_tupletype_t *argt_typ = (jl_tupletype_t*)argt.constant;
@@ -6343,7 +6625,9 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 if (F) {
                     jl_cgval_t jlcall_ptr = mark_julia_type(ctx, F, false, jl_voidpointer_type);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-                    Instruction *I = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_last_age_field(ctx), ctx.types().alignof_ptr);
+                    bool not_toplevel = (ctx.linfo && jl_is_method(ctx.linfo->def.method));
+                    Instruction *I = not_toplevel ? cast<Instruction>(ctx.world_age_at_entry) :
+                                     ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_tls_world_age_field(ctx), ctx.types().alignof_ptr);
                     jl_cgval_t world_age = mark_julia_type(ctx, ai.decorateInst(I), false, jl_long_type);
                     jl_cgval_t fptr;
                     if (specF)
@@ -6352,7 +6636,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                         fptr = mark_julia_type(ctx, Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
 
                     // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
-                    jl_cgval_t env = emit_new_struct(ctx, env_t, nargs-4, ArrayRef<jl_cgval_t>(argv).drop_front(4));
+                    jl_cgval_t env = emit_new_struct(ctx, env_t, ncapture_args, ArrayRef<jl_cgval_t>(argv).drop_front(nargs-ncapture_args));
 
                     jl_cgval_t closure_fields[5] = {
                         env,
@@ -6379,7 +6663,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     else if (head == jl_exc_sym) {
         assert(nargs == 0);
         return mark_julia_type(ctx,
-                ctx.builder.CreateCall(prepare_call(jl_current_exception_func)),
+                ctx.builder.CreateCall(prepare_call(jl_current_exception_func), {get_current_task(ctx)}),
                 true, jl_any_type);
     }
     else if (head == jl_copyast_sym) {
@@ -6483,37 +6767,29 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack);
+    return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
+    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
 // Get the address of the world age of the current task
-static Value *get_last_age_field(jl_codectx_t &ctx)
+static Value *get_tls_world_age_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_size,
-            ctx.builder.CreateBitCast(ct, ctx.types().T_size->getPointerTo()),
-            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr),
-            "world_age");
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, world_age), "world_age");
 }
 
 static Value *get_scope_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            ctx.builder.CreateBitCast(ct, ctx.types().T_prjlvalue->getPointerTo()),
-            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, scope) / ctx.types().sizeof_ptr),
-            "current_scope");
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "current_scope");
 }
 
-static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
+static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t &params)
 {
     ++EmittedToJLInvokes;
     jl_codectx_t ctx(M->getContext(), params, codeinst);
@@ -6530,11 +6806,8 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     ctx.builder.SetInsertPoint(b0);
     Function *theFunc;
     Value *theFarg;
-    auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-    bool cache_valid = params.cache;
 
-    if (cache_valid && invoke != NULL && invoke != &jl_fptr_wait_for_compiled) {
-        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
+    if (!theFptrName.empty()) {
         theFunc = cast<Function>(
             M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(ctx.builder.getContext())).getCallee());
         theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
@@ -6551,14 +6824,6 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     return f;
 }
 
-static Type *get_returnroots_type(jl_codectx_t &ctx, unsigned rootcount) {
-    return ArrayType::get(ctx.types().T_prjlvalue, rootcount);
-}
-
-static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
-    return ArrayType::get(getInt8Ty(C), unionbytes);
-}
-
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
         jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
@@ -6610,15 +6875,13 @@ static void emit_cfunc_invalidate(
         else {
             Value *arg_v = &*AI;
             ++AI;
-            Type *at = arg_v->getType();
             if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) {
                 myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
             }
             else {
-                assert(at == et);
+                assert(arg_v->getType() == et);
                 myargs[i] = mark_julia_type(ctx, arg_v, isboxed, jt);
             }
-            (void)at;
         }
     }
     assert(AI == gf_thunk->arg_end());
@@ -6638,7 +6901,6 @@ static void emit_cfunc_invalidate(
             ctx.builder.CreateRetVoid();
         }
         else {
-            gf_ret = emit_bitcast(ctx, gf_ret, gfrt->getPointerTo());
             ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gfrt, gf_ret, Align(julia_alignment(rettype))));
         }
         break;
@@ -6646,12 +6908,12 @@ static void emit_cfunc_invalidate(
     case jl_returninfo_t::SRet: {
         if (return_roots) {
             Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
-            assert(cast<PointerType>(root1->getType())->isOpaqueOrPointeeTypeMatches(get_returnroots_type(ctx, return_roots)));
-            root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0);
+            // store the whole object in the first slot
             ctx.builder.CreateStore(gf_ret, root1);
         }
+        Align alignment(julia_alignment(rettype));
         emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret,
-                    jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), julia_alignment(rettype));
+                    jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), Align(alignment), Align(alignment));
         ctx.builder.CreateRetVoid();
         break;
     }
@@ -6700,6 +6962,7 @@ static Function* gen_cfun_wrapper(
     bool nest = (!ff || unionall_env);
     jl_value_t *astrt = (jl_value_t*)jl_any_type;
     void *callptr = NULL;
+    jl_callptr_t invoke = NULL;
     int calltype = 0;
     if (aliasname)
         name = aliasname;
@@ -6708,16 +6971,10 @@ static Function* gen_cfun_wrapper(
     if (lam && params.cache) {
         // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
         codeinst = jl_compile_method_internal(lam, world);
-        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
-        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+        uint8_t specsigflags;
+        void *fptr;
+        jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
         assert(invoke);
-        if (fptr) {
-            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                jl_cpu_pause();
-            }
-            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-        }
-        // WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
         if (invoke == jl_fptr_args_addr) {
             callptr = fptr;
             calltype = 1;
@@ -6727,7 +6984,7 @@ static Function* gen_cfun_wrapper(
             callptr = (void*)codeinst->rettype_const;
             calltype = 2;
         }
-        else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
+        else if (specsigflags & 0b1) {
             callptr = fptr;
             calltype = 3;
         }
@@ -6817,11 +7074,11 @@ static Function* gen_cfun_wrapper(
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0, true);
 
-    Value *world_age_field = get_last_age_field(ctx);
+    auto world_age_field = get_tls_world_age_field(ctx);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
     Value *last_age = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
-
+    ctx.world_age_at_entry = last_age;
     Value *world_v = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
         prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
     cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
@@ -6830,10 +7087,7 @@ static Function* gen_cfun_wrapper(
     if (calltype) {
         LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(
-                    ctx.types().T_size,
-                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), ctx.types().T_size->getPointerTo()),
-                    offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr),
+                emit_ptrgep(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), offsetof(jl_code_instance_t, max_world)),
                 ctx.types().alignof_ptr);
         age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
     }
@@ -6886,7 +7140,7 @@ static Function* gen_cfun_wrapper(
         if (aref) {
             if (jargty == (jl_value_t*)jl_any_type) {
                 inputarg = mark_julia_type(ctx,
-                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, emit_bitcast(ctx, val, ctx.types().T_pprjlvalue), Align(sizeof(void*))),
+                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, val, Align(sizeof(void*))),
                         true, jl_any_type);
             }
             else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed
@@ -6898,14 +7152,13 @@ static Function* gen_cfun_wrapper(
                     inputarg = ghostValue(ctx, jargty);
                 }
                 else {
-                    val = emit_bitcast(ctx, val, T->getPointerTo());
                     val = ctx.builder.CreateAlignedLoad(T, val, Align(1)); // make no alignment assumption about pointer from C
                     inputarg = mark_julia_type(ctx, val, false, jargty);
                 }
             }
             else if (static_at || (!jl_is_typevar(jargty) && !jl_is_immutable_datatype(jargty))) {
                 // must be a jl_value_t* (because it's mutable or contains gc roots)
-                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, emit_bitcast(ctx, val, ctx.types().T_prjlvalue)), true, jargty_proper);
+                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, val), true, jargty_proper);
             }
             else {
                 // allocate val into a new box, if it might not be boxed
@@ -6915,7 +7168,7 @@ static Function* gen_cfun_wrapper(
                     *closure_types = jl_alloc_vec_any(0);
                 jl_array_ptr_1d_push(*closure_types, jargty);
                 Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)),
+                        emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                         Align(sizeof(void*)));
                 BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw);
                 BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw);
@@ -6925,16 +7178,15 @@ static Function* gen_cfun_wrapper(
                 Value *isrtboxed = ctx.builder.CreateIsNull(val); // XXX: this is the wrong condition and should be inspecting runtime_dt instead
                 ctx.builder.CreateCondBr(isrtboxed, boxedBB, loadBB);
                 ctx.builder.SetInsertPoint(boxedBB);
-                Value *p1 = ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue);
+                Value *p1 = val;
                 p1 = track_pjlvalue(ctx, p1);
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(loadBB);
                 Value *isrtany = ctx.builder.CreateICmpEQ(
-                        literal_pointer_val(ctx, (jl_value_t*)jl_any_type),
-                        ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue));
+                        literal_pointer_val(ctx, (jl_value_t*)jl_any_type), val);
                 ctx.builder.CreateCondBr(isrtany, isanyBB, unboxedBB);
                 ctx.builder.SetInsertPoint(isanyBB);
-                Value *p2 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateBitCast(val, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+                Value *p2 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, val, Align(sizeof(void*)));
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(unboxedBB);
                 Value *p3 = emit_new_bits(ctx, runtime_dt, val);
@@ -6982,7 +7234,7 @@ static Function* gen_cfun_wrapper(
                         *closure_types = jl_alloc_vec_any(0);
                     jl_array_ptr_1d_push(*closure_types, jargty);
                     Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)),
+                            emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                             Align(sizeof(void*)));
                     Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty);
                     inputarg = mark_julia_type(ctx, strct, true, jargty_proper);
@@ -7007,7 +7259,7 @@ static Function* gen_cfun_wrapper(
         jlfunc_sret = false;
         Function *theFptr = NULL;
         if (calltype == 1) {
-            StringRef fname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
+            StringRef fname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, invoke, codeinst);
             theFptr = cast_or_null<Function>(jl_Module->getNamedValue(fname));
             if (!theFptr) {
                 theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage,
@@ -7051,74 +7303,9 @@ static Function* gen_cfun_wrapper(
         bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
         assert(calltype == 3);
         // emit a specsig call
+        StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, invoke, codeinst);
         bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-        StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
         jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
-        FunctionType *cft = returninfo.decl.getFunctionType();
-        jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
-
-        // TODO: Can use use emit_call_specfun_other here?
-        SmallVector<Value*, 0> args;
-        Value *result = nullptr;
-        if (jlfunc_sret || returninfo.cc == jl_returninfo_t::Union) {
-            // fuse the two sret together, or emit an alloca to hold it
-            if (sig.sret && jlfunc_sret) {
-                result = emit_bitcast(ctx, sretPtr, cft->getParamType(0));
-            }
-            else {
-                if (jlfunc_sret) {
-                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-                    setName(ctx.emission_context, result, "sret");
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                } else {
-                    result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
-                    setName(ctx.emission_context, result, "result_union");
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                }
-            }
-            args.push_back(result);
-        }
-        if (returninfo.return_roots) {
-            AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
-            setName(ctx.emission_context, return_roots, "return_roots");
-            args.push_back(return_roots);
-        }
-        if (gcstack_arg)
-            args.push_back(ctx.pgcstack);
-        for (size_t i = 0; i < nargs + 1; i++) {
-            // figure out how to repack the arguments
-            jl_cgval_t &inputarg = inputargs[i];
-            Value *arg;
-            jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
-                jl_nth_slot_type(lam->specTypes, i);
-            // n.b. specTypes is required to be a datatype by construction for specsig
-            bool isboxed = deserves_argbox(spect);
-            Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
-            if (is_uniquerep_Type(spect)) {
-                continue;
-            }
-            else if (isboxed) {
-                arg = boxed(ctx, inputarg);
-            }
-            else if (type_is_ghost(T)) {
-                continue; // ghost types are skipped by the specsig method signature
-            }
-            else if (T->isAggregateType()) {
-                // aggregate types are passed by pointer
-                inputarg = value_to_pointer(ctx, inputarg);
-                arg = maybe_bitcast(ctx, decay_derived(ctx, data_pointer(ctx, inputarg)),
-                    T->getPointerTo());
-            }
-            else {
-                arg = emit_unbox(ctx, T, inputarg, spect);
-                assert(!isa<UndefValue>(arg));
-            }
-
-            // add to argument list
-            args.push_back(arg);
-        }
-        Value *theFptr = returninfo.decl.getCallee();
-        assert(theFptr);
         if (age_ok) {
             funcName += "_gfthunk";
             Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
@@ -7130,47 +7317,17 @@ static Function* gen_cfun_wrapper(
             // but which has the signature of a specsig
             emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context,
                 min_world, max_world);
-            theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
+            returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk));
         }
-
-        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
-        CallInst *call = ctx.builder.CreateCall(
-            returninfo.decl.getFunctionType(),
-            theFptr, ArrayRef<Value*>(args));
-        call->setAttributes(returninfo.attrs);
-        if (gcstack_arg)
-            call->setCallingConv(CallingConv::Swift);
-
-        switch (returninfo.cc) {
-            case jl_returninfo_t::Boxed:
-                retval = mark_julia_type(ctx, call, true, astrt);
-                break;
-            case jl_returninfo_t::Register:
-                retval = mark_julia_type(ctx, call, false, astrt);
-                break;
-            case jl_returninfo_t::SRet:
-                retval = mark_julia_slot(result, astrt, NULL, ctx.tbaa().tbaa_stack);
-                break;
-            case jl_returninfo_t::Union: {
-                Value *box = ctx.builder.CreateExtractValue(call, 0);
-                Value *tindex = ctx.builder.CreateExtractValue(call, 1);
-                Value *derived = ctx.builder.CreateSelect(
-                    ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
-                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                    decay_derived(ctx, ctx.builder.CreateBitCast(result, ctx.types().T_pjlvalue)),
-                    decay_derived(ctx, box));
-                retval = mark_julia_slot(derived,
-                                         astrt,
-                                         tindex,
-                                         ctx.tbaa().tbaa_stack);
-                assert(box->getType() == ctx.types().T_prjlvalue);
-                retval.Vboxed = box;
-                break;
-            }
-            case jl_returninfo_t::Ghosts:
-                retval = mark_julia_slot(NULL, astrt, call, ctx.tbaa().tbaa_stack);
-                break;
+        retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, codeinst->rettype, returninfo, nullptr, inputargs, nargs + 1);
+        jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
+        if (jlfunc_sret && sig.sret) {
+            // fuse the two sret together
+            assert(retval.ispointer());
+            AllocaInst *result = cast<AllocaInst>(retval.V);
+            retval.V = sretPtr;
+            result->replaceAllUsesWith(sretPtr);
+            result->eraseFromParent();
         }
     }
 
@@ -7210,14 +7367,17 @@ static Function* gen_cfun_wrapper(
     ctx.builder.ClearInsertionPoint();
 
     if (aliasname) {
-        GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
+        auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
                             GlobalValue::ExternalLinkage, aliasname, cw, M);
+        if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
+            alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
+        }
     }
 
     if (nest) {
         funcName += "make";
         Function *cw_make = Function::Create(
-                FunctionType::get(getInt8PtrTy(ctx.builder.getContext()), { getInt8PtrTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
+                FunctionType::get(getPointerTy(ctx.builder.getContext()), { getPointerTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
                 GlobalVariable::ExternalLinkage,
                 funcName, M);
         jl_init_function(cw_make, ctx.emission_context.TargetTriple);
@@ -7232,8 +7392,8 @@ static Function* gen_cfun_wrapper(
         Function *adjust_trampoline = Intrinsic::getDeclaration(cw_make->getParent(), Intrinsic::adjust_trampoline);
         cwbuilder.CreateCall(init_trampoline, {
                 Tramp,
-                cwbuilder.CreateBitCast(cw, getInt8PtrTy(ctx.builder.getContext())),
-                cwbuilder.CreateBitCast(NVal, getInt8PtrTy(ctx.builder.getContext()))
+                cw,
+                NVal,
             });
         cwbuilder.CreateRet(cwbuilder.CreateCall(adjust_trampoline, { Tramp }));
         cw = cw_make;
@@ -7337,6 +7497,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     if (ctx.emission_context.TargetTriple.isAArch64() || ctx.emission_context.TargetTriple.isARM() || ctx.emission_context.TargetTriple.isPPC64()) {
         if (nest) {
             emit_error(ctx, "cfunction: closures are not supported on this platform");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
@@ -7374,7 +7535,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         F = ctx.builder.CreateCall(prepare_call(jlgetcfunctiontrampoline_func), {
                  fobj,
                  literal_pointer_val(ctx, output_type),
-                 ctx.builder.CreateBitCast(cache, getInt8PtrTy(ctx.builder.getContext())),
+                 cache,
                  literal_pointer_val(ctx, (jl_value_t*)fill),
                  F,
                  closure_types ? literal_pointer_val(ctx, (jl_value_t*)unionall_env) : Constant::getNullValue(ctx.types().T_pjlvalue),
@@ -7389,7 +7550,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
             assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
             Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type, true);
             setName(ctx.emission_context, strct, "cfun_result");
-            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo());
+            Value *derived_strct = decay_derived(ctx, strct);
             MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.decorateInst(ctx.builder.CreateStore(F, derived_strct));
@@ -7465,7 +7626,7 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
 }
 
 // generate a julia-callable function that calls f (AKA lam)
-static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName,
+static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, StringRef funcName,
         Module *M, jl_codegen_params_t &params)
 {
     ++GeneratedInvokeWrappers;
@@ -7493,122 +7654,37 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0);
 
-    // TODO: replace this with emit_call_specfun_other?
-    FunctionType *ftype = const_cast<llvm::FunctionCallee&>(f.decl).getFunctionType();
-    size_t nfargs = ftype->getNumParams();
-    SmallVector<Value *, 0> args(nfargs);
-    unsigned idx = 0;
-    AllocaInst *result = NULL;
-    switch (f.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()));
-        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType());
-        setName(ctx.emission_context, result, "sret");
-        args[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = ctx.builder.CreateAlloca(ArrayType::get(getInt8Ty(ctx.builder.getContext()), f.union_bytes));
-        if (f.union_align > 1)
-            result->setAlignment(Align(f.union_align));
-        args[idx] = result;
-        idx++;
-        setName(ctx.emission_context, result, "result_union");
-        break;
-    }
-    if (f.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots));
-        setName(ctx.emission_context, return_roots, "return_roots");
-        args[idx] = return_roots;
-        idx++;
-    }
-    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    if (gcstack_arg) {
-        args[idx] = ctx.pgcstack;
-        idx++;
-    }
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
-    for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    for (size_t i = 0; i < nargs; ++i) {
         jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
             jl_nth_slot_type(lam->specTypes, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
-        bool isboxed = deserves_argbox(ty);
-        Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
-        if (type_is_ghost(lty) || is_uniquerep_Type(ty))
-            continue;
         Value *theArg;
         if (i == 0) {
-            // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers
-            // come in as ::Tracked, but specsig expected ::Derived.
-            if (is_opaque_closure)
-                theArg = decay_derived(ctx, funcArg);
-            else
-                theArg = funcArg;
+            theArg = funcArg;
         }
         else {
-            Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
             theArg = ai.decorateInst(maybe_mark_load_dereferenceable(
                     ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                     false,
                     ty));
         }
-        if (!isboxed) {
-            theArg = decay_derived(ctx, emit_bitcast(ctx, theArg, PointerType::get(lty, 0)));
-            if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty)));
-        }
-        assert(!isa<UndefValue>(theArg));
-        args[idx] = theArg;
-        idx++;
+        argv[i] = mark_julia_type(ctx, theArg, true, ty);
     }
-    CallInst *call = ctx.builder.CreateCall(f.decl, args);
-    call->setAttributes(f.attrs);
-    if (gcstack_arg)
-        call->setCallingConv(CallingConv::Swift);
-    jl_cgval_t retval;
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, jlretty, f, nullptr, argv, nargs);
     if (retarg != -1) {
         Value *theArg;
         if (retarg == 0)
             theArg = funcArg;
         else
             theArg = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, retarg - 1),
+                    emit_ptrgep(ctx, argArray, (retarg - 1) * ctx.types().sizeof_ptr),
                     Align(sizeof(void*)));
         retval = mark_julia_type(ctx, theArg, true, jl_any_type);
     }
-    else {
-        switch (f.cc) {
-        case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, jlretty);
-            break;
-        case jl_returninfo_t::Register:
-            retval = mark_julia_type(ctx, call, false, jlretty);
-            break;
-        case jl_returninfo_t::SRet:
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
-            break;
-        case jl_returninfo_t::Union:
-            // result is technically not right here, but `boxed` will only look at it
-            // for the unboxed values, so it's ok.
-            retval = mark_julia_slot(result,
-                                     jlretty,
-                                     ctx.builder.CreateExtractValue(call, 1),
-                                     ctx.tbaa().tbaa_stack);
-            retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
-            assert(retval.Vboxed->getType() == ctx.types().T_prjlvalue);
-            break;
-        case jl_returninfo_t::Ghosts:
-            retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
-            break;
-        }
-    }
     ctx.builder.CreateRet(boxed(ctx, retval));
-    return w;
 }
 
 static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments, size_t *arg_offset)
@@ -7655,6 +7731,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
             if (tracked.count && !tracked.all)
                 props.return_roots = tracked.count;
             props.cc = jl_returninfo_t::SRet;
+            props.union_bytes = jl_datatype_size(jlrettype);
+            props.union_align = props.union_minalign = jl_datatype_align(jlrettype);
             // sret is always passed from alloca
             assert(M);
             fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace()));
@@ -7696,7 +7774,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
-        fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
+        fsig.push_back(ctx.types().T_ptr);
         argnames.push_back("return_roots");
     }
 
@@ -7720,22 +7798,21 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         bool isboxed = false;
         Type *ty = NULL;
         if (i == 0 && is_opaque_closure) {
-            ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-            isboxed = true; // true-ish anyway - we might not have the type tag
+            ty = nullptr; // special token to avoid computing this unnecessarily
         }
         else {
             if (is_uniquerep_Type(jt))
                 continue;
             isboxed = deserves_argbox(jt);
             ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            if (type_is_ghost(ty))
+                continue;
         }
-        if (type_is_ghost(ty))
-            continue;
         AttrBuilder param(ctx.builder.getContext());
-        if (ty->isAggregateType()) { // aggregate types are passed by pointer
+        if (ty == nullptr || ty->isAggregateType()) { // aggregate types are passed by pointer
             param.addAttribute(Attribute::NoCapture);
             param.addAttribute(Attribute::ReadOnly);
-            ty = PointerType::get(ty, AddressSpace::Derived);
+            ty = ctx.builder.getPtrTy(AddressSpace::Derived);
         }
         else if (isboxed && jl_is_immutable_datatype(jt)) {
             param.addAttribute(Attribute::ReadOnly);
@@ -7782,8 +7859,6 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
     else {
         if (fval->getType()->isIntegerTy())
             fval = emit_inttoptr(ctx, fval, ftype->getPointerTo());
-        else
-            fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
     }
     if (auto F = dyn_cast<Function>(fval)) {
         if (gcstack_arg)
@@ -7798,11 +7873,9 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
     return props;
 }
 
-static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count)
+static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, unsigned count)
 {
-    if (isptr && !cast<PointerType>(Src->getType())->isOpaqueOrPointeeTypeMatches(T))
-        Src = ctx.builder.CreateBitCast(Src, T->getPointerTo(Src->getType()->getPointerAddressSpace()));
-    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering??
+    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ctx.builder); //This comes from Late-GC-Lowering??
     assert(emitted == count); (void)emitted; (void)count;
 }
 
@@ -7867,11 +7940,12 @@ static jl_llvm_functions_t
         jl_method_instance_t *lam,
         jl_code_info_t *src,
         jl_value_t *jlrettype,
-        jl_codegen_params_t &params,
-        size_t min_world, size_t max_world)
+        jl_codegen_params_t &params)
 {
     ++EmittedFunctions;
     // step 1. unpack AST and allocate codegen context for this function
+    size_t min_world = src->min_world;
+    size_t max_world = src->max_world;
     jl_llvm_functions_t declarations;
     jl_codectx_t ctx(*params.tsctx.getContext(), params, min_world, max_world);
     jl_datatype_t *vatyp = NULL;
@@ -7884,19 +7958,15 @@ static jl_llvm_functions_t
     ctx.module = jl_is_method(lam->def.method) ? lam->def.method->module : lam->def.module;
     ctx.linfo = lam;
     ctx.name = TSM.getModuleUnlocked()->getModuleIdentifier().data();
-    size_t nreq = 0;
-    int va = 0;
-    if (jl_is_method(lam->def.method)) {
-        ctx.nargs = nreq = lam->def.method->nargs;
-        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
-        if ((nreq > 0 && jl_is_method(lam->def.value) && lam->def.method->isva)) {
-            assert(nreq > 0);
-            nreq--;
-            va = 1;
-        }
+    size_t nreq = src->nargs;
+    int va = src->isva;
+    ctx.nargs = nreq;
+    if (va) {
+        assert(nreq > 0);
+        nreq--;
     }
-    else {
-        ctx.nargs = 0;
+    if (jl_is_method(lam->def.value)) {
+        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
     }
     ctx.nReqArgs = nreq;
     if (va) {
@@ -7925,10 +7995,14 @@ static jl_llvm_functions_t
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
     }
-    else if (jl_array_nrows(src->linetable) > 0) {
-        jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, 0);
-        ctx.file = jl_symbol_name((jl_sym_t*)jl_fieldref_noalloc(locinfo, 2));
-        toplineno = jl_unbox_int32(jl_fieldref(locinfo, 3));
+    else if ((jl_value_t*)src->debuginfo != jl_nothing) {
+        // look for the file and line info of the original start of this block, as reported by lowering
+        jl_debuginfo_t *debuginfo = src->debuginfo;
+        while ((jl_value_t*)debuginfo->linetable != jl_nothing)
+            debuginfo = debuginfo->linetable;
+        ctx.file = jl_debuginfo_file(debuginfo);
+        struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+        toplineno = std::max((int32_t)0, lineidx.line);
     }
     if (ctx.file.empty())
         ctx.file = "<missing>";
@@ -7956,7 +8030,7 @@ static jl_llvm_functions_t
 
     // step 3. some variable analysis
     size_t i;
-    for (i = 0; i < nreq; i++) {
+    for (i = 0; i < nreq && i < vinfoslen; i++) {
         jl_varinfo_t &varinfo = ctx.slots[i];
         varinfo.isArgument = true;
         jl_sym_t *argname = slot_symbol(ctx, i);
@@ -8011,8 +8085,8 @@ static jl_llvm_functions_t
     //Safe because params holds ctx lock
     Module *M = TSM.getModuleUnlocked();
     M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level);
-    jl_debugcache_t debuginfo;
-    debuginfo.initialize(M);
+    jl_debugcache_t debugcache;
+    debugcache.initialize(M);
     jl_returninfo_t returninfo = {};
     Function *f = NULL;
     bool has_sret = false;
@@ -8088,7 +8162,8 @@ static jl_llvm_functions_t
         std::string wrapName;
         raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         declarations.functionObject = wrapName;
-        (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
+        size_t nparams = jl_nparams(lam->specTypes);
+        gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, declarations.functionObject, M, ctx.emission_context);
         // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType)
         // TODO: add attributes: dereferenceable<sizeof(void*) * nreq>
         // TODO: (if needsparams) add attributes: dereferenceable<sizeof(void*) * length(sp)>, readonly, nocapture
@@ -8164,11 +8239,11 @@ static jl_llvm_functions_t
         topfile = dbuilder.createFile(ctx.file, ".");
         DISubroutineType *subrty;
         if (ctx.emission_context.debug_level <= 1)
-            subrty = debuginfo.jl_di_func_null_sig;
+            subrty = debugcache.jl_di_func_null_sig;
         else if (!specsig)
-            subrty = debuginfo.jl_di_func_sig;
+            subrty = debugcache.jl_di_func_sig;
         else
-            subrty = get_specsig_di(ctx, debuginfo, jlrettype, lam->specTypes, dbuilder);
+            subrty = get_specsig_di(ctx, debugcache, jlrettype, lam->specTypes, dbuilder);
         SP = dbuilder.createFunction(nullptr
                                      ,dbgFuncName      // Name
                                      ,f->getName()     // LinkageName
@@ -8199,7 +8274,7 @@ static jl_llvm_functions_t
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line
                     // Variable type
-                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debugcache, varinfo.value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -8210,7 +8285,7 @@ static jl_llvm_functions_t
                     has_sret + nreq + 1,                // Argument number (1-based)
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, debuginfo, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debugcache, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -8225,7 +8300,7 @@ static jl_llvm_functions_t
                     jl_symbol_name(s),       // Variable name
                     topfile,                 // File
                     toplineno == -1 ? 0 : toplineno, // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false), // Variable type
+                    julia_type_to_di(ctx, debugcache, varinfo.value.typ, &dbuilder, false), // Variable type
                     AlwaysPreserve,          // May be deleted if optimized out
                     DINode::FlagZero         // Flags (TODO: Do we need any)
                     );
@@ -8258,16 +8333,15 @@ static jl_llvm_functions_t
     // step 6. set up GC frame
     allocate_gc_frame(ctx, b0);
     Value *last_age = NULL;
-    Value *world_age_field = get_last_age_field(ctx);
-    if (toplevel || ctx.is_opaque_closure) {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-        last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-            ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
-    }
+    auto world_age_field = get_tls_world_age_field(ctx);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
+               ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
+    ctx.world_age_at_entry = last_age; // Load world age for use in get_tls_world_age
 
     // step 7. allocate local variables slots
     // must be in the first basic block for the llvm mem2reg pass to work
-    auto allocate_local = [&](jl_varinfo_t &varinfo, jl_sym_t *s) {
+    auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled, M](jl_varinfo_t &varinfo, jl_sym_t *s, int i) {
         jl_value_t *jt = varinfo.value.typ;
         assert(!varinfo.boxroot); // variables shouldn't have memory locs already
         if (varinfo.value.constant) {
@@ -8275,10 +8349,10 @@ static jl_llvm_functions_t
             alloc_def_flag(ctx, varinfo);
             return;
         }
-        else if (varinfo.isArgument && !(specsig && i == (size_t)ctx.vaSlot)) {
-            // if we can unbox it, just use the input pointer
-            if (i != (size_t)ctx.vaSlot && jl_is_concrete_immutable(jt))
-                return;
+        else if (varinfo.isArgument && (!va || ctx.vaSlot == -1 || i != ctx.vaSlot)) {
+            // just use the input pointer, if we have it
+            // (we will need to attach debuginfo later to it)
+            return;
         }
         else if (jl_is_uniontype(jt)) {
             bool allunbox;
@@ -8287,16 +8361,18 @@ static jl_llvm_functions_t
             if (lv) {
                 lv->setName(jl_symbol_name(s));
                 varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
-                varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1));
                 setName(ctx.emission_context, varinfo.pTIndex, "tindex");
+                // TODO: attach debug metadata to this variable
             }
             else if (allunbox) {
                 // all ghost values just need a selector allocated
-                AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1));
                 lv->setName(jl_symbol_name(s));
                 varinfo.pTIndex = lv;
                 varinfo.value.tbaa = NULL;
                 varinfo.value.isboxed = false;
+                // TODO: attach debug metadata to this variable
             }
             if (lv || allunbox)
                 alloc_def_flag(ctx, varinfo);
@@ -8316,36 +8392,28 @@ static jl_llvm_functions_t
             varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
             alloc_def_flag(ctx, varinfo);
             if (debug_enabled && varinfo.dinfo) {
-                assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt);
+                assert((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt);
                 dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(),
                                        topdebugloc,
                                        ctx.builder.GetInsertBlock());
             }
             return;
         }
-        if (!varinfo.isArgument || // always need a slot if the variable is assigned
-            specsig || // for arguments, give them stack slots if they aren't in `argArray` (otherwise, will use that pointer)
-            (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
-            i == 0) { // or it is the first argument (which isn't in `argArray`)
-            AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(),
-                nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-            StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
-            SI->insertAfter(ctx.topalloca);
-            varinfo.boxroot = av;
-            if (debug_enabled && varinfo.dinfo) {
-                DIExpression *expr;
-                if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) {
-                    expr = dbuilder.createExpression();
-                }
-                else {
-                    SmallVector<uint64_t, 8> addr;
-                    addr.push_back(llvm::dwarf::DW_OP_deref);
-                    expr = dbuilder.createExpression(addr);
-                }
-                dbuilder.insertDeclare(av, varinfo.dinfo, expr,
-                                            topdebugloc,
-                                ctx.builder.GetInsertBlock());
-            }
+        // otherwise give it a boxroot in this function
+        AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(),
+            nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
+        StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
+        SI->insertAfter(ctx.topalloca);
+        varinfo.boxroot = av;
+        if (debug_enabled && varinfo.dinfo) {
+            SmallVector<uint64_t, 1> addr;
+            DIExpression *expr;
+            if ((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
+                addr.push_back(llvm::dwarf::DW_OP_deref);
+            expr = dbuilder.createExpression(addr);
+            dbuilder.insertDeclare(av, varinfo.dinfo, expr,
+                                        topdebugloc,
+                            ctx.builder.GetInsertBlock());
         }
     };
 
@@ -8359,7 +8427,7 @@ static jl_llvm_functions_t
             varinfo.usedUndef = false;
             continue;
         }
-        allocate_local(varinfo, s);
+        allocate_local(varinfo, s, (int)i);
     }
 
     std::map<int, int> upsilon_to_phic;
@@ -8402,7 +8470,7 @@ static jl_llvm_functions_t
                 vi.used = true;
                 vi.isVolatile = true;
                 vi.value = mark_julia_type(ctx, (Value*)NULL, false, typ);
-                allocate_local(vi, jl_symbol("phic"));
+                allocate_local(vi, jl_symbol("phic"), -1);
             }
         }
     }
@@ -8476,7 +8544,7 @@ static jl_llvm_functions_t
         AttrBuilder param(ctx.builder.getContext());
         attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
     }
-    for (i = 0; i < nreq; i++) {
+    for (i = 0; i < nreq && i < vinfoslen; i++) {
         jl_sym_t *s = slot_symbol(ctx, i);
         jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
         // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
@@ -8509,19 +8577,16 @@ static jl_llvm_functions_t
             if (i == 0 && ctx.is_opaque_closure) {
                 // Load closure world
                 Value *oc_this = decay_derived(ctx, &*AI++);
-                Value *argaddr = emit_bitcast(ctx, oc_this, getInt8PtrTy(ctx.builder.getContext()));
-                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world)));
+                Value *argaddr = oc_this;
+                Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world));
 
                 jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
                     nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
-                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr.value());
+                ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure
+                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr);
 
                 // Load closure env
-                Value *envaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures)));
+                Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures));
 
                 jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
                     nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
@@ -8536,18 +8601,18 @@ static jl_llvm_functions_t
                     theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
                 }
                 else {
-                    Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
+                    Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
                     Value *load = ai.decorateInst(maybe_mark_load_dereferenceable(
                             ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                             false, vi.value.typ));
                     theArg = mark_julia_type(ctx, load, true, vi.value.typ);
-                    if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) {
+                    if (debug_enabled && vi.dinfo && !vi.boxroot) {
                         SmallVector<uint64_t, 8> addr;
                         addr.push_back(llvm::dwarf::DW_OP_deref);
                         addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
                         addr.push_back((i - 1) * sizeof(void*));
-                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
+                        if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
                             addr.push_back(llvm::dwarf::DW_OP_deref);
                         dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr),
                                         topdebugloc,
@@ -8561,21 +8626,15 @@ static jl_llvm_functions_t
                 assert(vi.value.V == NULL && "unexpected variable slot created for argument");
                 // keep track of original (possibly boxed) value to avoid re-boxing or moving
                 vi.value = theArg;
-                if (specsig && theArg.V && debug_enabled && vi.dinfo) {
-                    SmallVector<uint64_t, 8> addr;
-                    Value *parg;
+                if (debug_enabled && vi.dinfo && theArg.V) {
                     if (theArg.ispointer()) {
-                        parg = theArg.V;
-                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
-                            addr.push_back(llvm::dwarf::DW_OP_deref);
+                        dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                               topdebugloc, ctx.builder.GetInsertBlock());
                     }
                     else {
-                        parg = ctx.builder.CreateAlloca(theArg.V->getType(), NULL, jl_symbol_name(s));
-                        ctx.builder.CreateStore(theArg.V, parg);
+                        dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                                         topdebugloc, ctx.builder.GetInsertBlock());
                     }
-                    dbuilder.insertDeclare(parg, vi.dinfo, dbuilder.createExpression(addr),
-                                                topdebugloc,
-                                                ctx.builder.GetInsertBlock());
                 }
             }
             else {
@@ -8584,7 +8643,6 @@ static jl_llvm_functions_t
             }
         }
     }
-
     // step 9. allocate rest argument
     CallInst *restTuple = NULL;
     if (va && ctx.vaSlot != -1) {
@@ -8619,10 +8677,8 @@ static jl_llvm_functions_t
             restTuple =
                 ctx.builder.CreateCall(F,
                         { Constant::getNullValue(ctx.types().T_prjlvalue),
-                          ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
-                                  ConstantInt::get(ctx.types().T_size, nreq - 1)),
-                          ctx.builder.CreateSub(argCount,
-                                  ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
+                          emit_ptrgep(ctx, argArray, (nreq - 1) * sizeof(jl_value_t*)),
+                          ctx.builder.CreateSub(argCount, ctx.builder.getInt32(nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
             ctx.builder.CreateStore(restTuple, vi.boxroot);
         }
@@ -8639,8 +8695,8 @@ static jl_llvm_functions_t
         return (!jl_is_submodule(mod, jl_base_module) &&
                 !jl_is_submodule(mod, jl_core_module));
     };
-    auto in_tracked_path = [] (StringRef file) {
-        return jl_options.tracked_path != NULL && file.startswith(jl_options.tracked_path);
+    auto in_tracked_path = [] (StringRef file) { // falls within an explicitly set file or directory
+        return jl_options.tracked_path != NULL && file.starts_with(jl_options.tracked_path);
     };
     bool mod_is_user_mod = in_user_mod(ctx.module);
     bool mod_is_tracked = in_tracked_path(ctx.file);
@@ -8648,82 +8704,108 @@ static jl_llvm_functions_t
         DebugLoc loc;
         StringRef file;
         ssize_t line;
+        ssize_t line0; // if this represents pc=1, then also cover the entry to the function (pc=0)
         bool is_user_code;
-        bool is_tracked; // falls within an explicitly set file or directory
-        unsigned inlined_at;
-        bool operator ==(const DebugLineTable &other) const {
-            return other.loc == loc && other.file == file && other.line == line && other.is_user_code == is_user_code && other.is_tracked == is_tracked && other.inlined_at == inlined_at;
-        }
+        int32_t edgeid;
+        bool sameframe(const DebugLineTable &other) const {
+            // detect if the line info for this frame is unchanged (equivalent to loc == other.loc ignoring the inlined_at field)
+            return other.edgeid == edgeid && other.line == line;
+        };
     };
-    SmallVector<DebugLineTable, 0> linetable;
-    { // populate the linetable data format
-        assert(jl_is_array(src->linetable));
-        size_t nlocs = jl_array_nrows(src->linetable);
-        std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
-        linetable.resize(nlocs + 1);
-        DebugLineTable &topinfo = linetable[0];
-        topinfo.file = ctx.file;
-        topinfo.line = toplineno;
-        topinfo.is_user_code = mod_is_user_mod;
-        topinfo.is_tracked = mod_is_tracked;
-        topinfo.inlined_at = 0;
-        topinfo.loc = topdebugloc;
-        for (size_t i = 0; i < nlocs; i++) {
-            // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int32, inlined_at::Int32)
-            jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, i);
-            DebugLineTable &info = linetable[i + 1];
-            assert(jl_typetagis(locinfo, jl_lineinfonode_type));
-            jl_module_t *module = (jl_module_t*)jl_fieldref_noalloc(locinfo, 0);
-            jl_value_t *method = jl_fieldref_noalloc(locinfo, 1);
-            jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 2);
-            info.line = jl_unbox_int32(jl_fieldref(locinfo, 3));
-            info.inlined_at = jl_unbox_int32(jl_fieldref(locinfo, 4));
-            assert(info.inlined_at <= i);
-            info.file = jl_symbol_name(filesym);
-            if (info.file.empty())
-                info.file = "<missing>";
-            if (module == ctx.module)
-                info.is_user_code = mod_is_user_mod;
-            else
-                info.is_user_code = in_user_mod(module);
-            info.is_tracked = in_tracked_path(info.file);
-            if (debug_enabled) {
-                StringRef fname;
-                if (jl_is_method_instance(method))
-                    method = ((jl_method_instance_t*)method)->def.value;
-                if (jl_is_method(method))
-                    method = (jl_value_t*)((jl_method_t*)method)->name;
-                if (jl_is_symbol(method))
-                    fname = jl_symbol_name((jl_sym_t*)method);
-                if (fname.empty())
-                    fname = "macro expansion";
-                if (info.inlined_at == 0 && info.file == ctx.file) { // if everything matches, emit a toplevel line number
-                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
+    DebugLineTable topinfo;
+    topinfo.file = ctx.file;
+    topinfo.line = toplineno;
+    topinfo.line0 = 0;
+    topinfo.is_user_code = mod_is_user_mod;
+    topinfo.loc = topdebugloc;
+    topinfo.edgeid = 0;
+    std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
+    SmallVector<DebugLineTable, 0> prev_lineinfo, new_lineinfo;
+    auto update_lineinfo = [&] (size_t pc) {
+        std::function<bool(jl_debuginfo_t*, jl_value_t*, size_t, size_t)> append_lineinfo =
+                [&] (jl_debuginfo_t *debuginfo, jl_value_t *func, size_t to, size_t pc) -> bool {
+            while (1) {
+                if (!jl_is_symbol(debuginfo->def)) // this is a path
+                    func = debuginfo->def; // this is inlined
+                struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, pc);
+                size_t i = lineidx.line;
+                if (i < 0) // pc out of range: broken debuginfo?
+                    return false;
+                if (i == 0 && lineidx.to == 0) // no update
+                    return false;
+                if (pc > 0 && (jl_value_t*)debuginfo->linetable != jl_nothing) {
+                    // indirection node
+                    if (!append_lineinfo(debuginfo->linetable, func, to, i))
+                        return false; // no update
                 }
-                else { // otherwise, describe this as an inlining frame
-                    DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
-                    if (inl_SP == NULL) {
-                        DIFile *difile = dbuilder.createFile(info.file, ".");
-                        inl_SP = dbuilder.createFunction(difile
-                                                     ,std::string(fname) + ";" // Name
-                                                     ,fname            // LinkageName
-                                                     ,difile           // File
-                                                     ,0                // LineNo
-                                                     ,debuginfo.jl_di_func_null_sig // Ty
-                                                     ,0                // ScopeLine
-                                                     ,DINode::FlagZero // Flags
-                                                     ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
-                                                     ,nullptr          // Template Parameters
-                                                     ,nullptr          // Template Declaration
-                                                     ,nullptr          // ThrownTypes
-                                                     );
+                else {
+                    // actual node
+                    DebugLineTable info;
+                    info.edgeid = to;
+                    jl_module_t *modu = func ? jl_debuginfo_module1(func) : NULL;
+                    if (modu == NULL)
+                        modu = ctx.module;
+                    info.file = jl_debuginfo_file1(debuginfo);
+                    info.line = i;
+                    info.line0 = 0;
+                    if (pc == 1) {
+                        struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+                        assert(lineidx.to == 0 && lineidx.pc == 0);
+                        if (lineidx.line > 0 && info.line != lineidx.line)
+                            info.line0 = lineidx.line;
                     }
-                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : linetable[info.inlined_at].loc;
-                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
+                    if (info.file.empty())
+                        info.file = "<missing>";
+                    if (modu == ctx.module)
+                        info.is_user_code = mod_is_user_mod;
+                    else
+                        info.is_user_code = in_user_mod(modu);
+                    if (debug_enabled) {
+                        StringRef fname = jl_debuginfo_name(func);
+                        if (new_lineinfo.empty() && info.file == ctx.file) { // if everything matches, emit a toplevel line number
+                            info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
+                        }
+                        else { // otherwise, describe this as an inlining frame
+                            DebugLoc inl_loc = new_lineinfo.empty() ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : new_lineinfo.back().loc;
+                            DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
+                            if (inl_SP == NULL) {
+                                DIFile *difile = dbuilder.createFile(info.file, ".");
+                                inl_SP = dbuilder.createFunction(difile
+                                                             ,std::string(fname) + ";" // Name
+                                                             ,fname            // LinkageName
+                                                             ,difile           // File
+                                                             ,0                // LineNo
+                                                             ,debugcache.jl_di_func_null_sig // Ty
+                                                             ,0                // ScopeLine
+                                                             ,DINode::FlagZero // Flags
+                                                             ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
+                                                             ,nullptr          // Template Parameters
+                                                             ,nullptr          // Template Declaration
+                                                             ,nullptr          // ThrownTypes
+                                                             );
+                            }
+                            info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
+                        }
+                    }
+                    new_lineinfo.push_back(info);
                 }
+                to = lineidx.to;
+                if (to == 0)
+                    return true;
+                pc = lineidx.pc;
+                debuginfo = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, to - 1);
+                func = NULL;
             }
-        }
-    }
+        };
+        prev_lineinfo.resize(0);
+        std::swap(prev_lineinfo, new_lineinfo);
+        bool updated = append_lineinfo(src->debuginfo, (jl_value_t*)lam, 0, pc + 1);
+        if (!updated)
+            std::swap(prev_lineinfo, new_lineinfo);
+        else
+            assert(new_lineinfo.size() > 0);
+        return updated;
+    };
 
     SmallVector<MDNode*, 0> aliasscopes;
     MDNode* current_aliasscope = nullptr;
@@ -8829,44 +8911,67 @@ static jl_llvm_functions_t
                 (in_user_code && malloc_log_mode == JL_LOG_USER) ||
                 (is_tracked && malloc_log_mode == JL_LOG_PATH)));
     };
-    SmallVector<unsigned, 0> current_lineinfo, new_lineinfo;
-    auto coverageVisitStmt = [&] (size_t dbg) {
-        if (dbg == 0 || dbg >= linetable.size())
-            return;
-        // Compute inlining stack for current line, inner frame first
-        while (dbg) {
-            new_lineinfo.push_back(dbg);
-            dbg = linetable[dbg].inlined_at;
-        }
+    auto coverageVisitStmt = [&] () {
         // Visit frames which differ from previous statement as tracked in
-        // current_lineinfo (tracked outer frame first).
-        current_lineinfo.resize(new_lineinfo.size(), 0);
+        // prev_lineinfo (tracked outer frame first).
+        size_t dbg;
         for (dbg = 0; dbg < new_lineinfo.size(); dbg++) {
-            unsigned newdbg = new_lineinfo[new_lineinfo.size() - dbg - 1];
-            if (newdbg != current_lineinfo[dbg]) {
-                current_lineinfo[dbg] = newdbg;
-                const auto &info = linetable[newdbg];
-                if (do_coverage(info.is_user_code, info.is_tracked))
-                    coverageVisitLine(ctx, info.file, info.line);
+            if (dbg >= prev_lineinfo.size() || !new_lineinfo[dbg].sameframe(prev_lineinfo[dbg]))
+                break;
+        }
+        for (; dbg < new_lineinfo.size(); dbg++) {
+            const auto &newdbg = new_lineinfo[dbg];
+            bool is_tracked = in_tracked_path(newdbg.file);
+            if (do_coverage(newdbg.is_user_code, is_tracked)) {
+                if (newdbg.line0 != 0 && (dbg >= prev_lineinfo.size() || newdbg.edgeid != prev_lineinfo[dbg].edgeid || newdbg.line0 != prev_lineinfo[dbg].line))
+                    coverageVisitLine(ctx, newdbg.file, newdbg.line0);
+                coverageVisitLine(ctx, newdbg.file, newdbg.line);
             }
         }
-        new_lineinfo.clear();
     };
-    auto mallocVisitStmt = [&] (unsigned dbg, Value *sync) {
-        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || dbg == 0) {
+    auto mallocVisitStmt = [&] (Value *sync, bool have_dbg_update) {
+        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || !have_dbg_update) {
+            // TODD: add || new_lineinfo[0].sameframe(prev_lineinfo[0])) above, but currently this breaks the test for it (by making an optimization better)
             if (do_malloc_log(true, mod_is_tracked) && sync)
                 ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync});
             return;
         }
-        while (linetable[dbg].inlined_at)
-            dbg = linetable[dbg].inlined_at;
-        mallocVisitLine(ctx, ctx.file, linetable[dbg].line, sync);
+        mallocVisitLine(ctx, new_lineinfo[0].file, new_lineinfo[0].line, sync);
     };
     if (coverage_mode != JL_LOG_NONE) {
         // record all lines that could be covered
-        for (const auto &info : linetable)
-            if (do_coverage(info.is_user_code, info.is_tracked))
-                jl_coverage_alloc_line(info.file, info.line);
+        std::function<void(jl_debuginfo_t *debuginfo, jl_value_t *func)> record_line_exists = [&](jl_debuginfo_t *debuginfo, jl_value_t *func) {
+            if (!jl_is_symbol(debuginfo->def)) // this is a path
+                func = debuginfo->def; // this is inlined
+            for (size_t i = 0; i < jl_svec_len(debuginfo->edges); i++) {
+                jl_debuginfo_t *edge = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, i);
+                record_line_exists(edge, NULL);
+            }
+            while ((jl_value_t*)debuginfo->linetable != jl_nothing)
+                debuginfo = debuginfo->linetable;
+            jl_module_t *modu = func ? jl_debuginfo_module1(func) : NULL;
+            if (modu == NULL)
+                modu = ctx.module;
+            StringRef file = jl_debuginfo_file1(debuginfo);
+            if (file.empty())
+                file = "<missing>";
+            bool is_user_code;
+            if (modu == ctx.module)
+                is_user_code = mod_is_user_mod;
+            else
+                is_user_code = in_user_mod(modu);
+            bool is_tracked = in_tracked_path(file);
+            if (do_coverage(is_user_code, is_tracked)) {
+                for (size_t pc = 0; 1; pc++) {
+                    struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, pc);
+                    if (lineidx.line == -1)
+                        break;
+                    if (lineidx.line > 0)
+                        jl_coverage_alloc_line(file, lineidx.line);
+                }
+            }
+        };
+        record_line_exists(src->debuginfo, (jl_value_t*)lam);
     }
 
     come_from_bb[0] = ctx.builder.GetInsertBlock();
@@ -8918,26 +9023,20 @@ static jl_llvm_functions_t
         BB[label] = bb;
     }
 
+    new_lineinfo.push_back(topinfo);
     Value *sync_bytes = nullptr;
     if (do_malloc_log(true, mod_is_tracked))
         sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
-    { // coverage for the function definition line number
-        const auto &topinfo = linetable[0];
-        if (linetable.size() > 1) {
-            if (topinfo == linetable[1])
-                current_lineinfo.push_back(1);
-        }
-        if (do_coverage(topinfo.is_user_code, topinfo.is_tracked))
-            coverageVisitLine(ctx, topinfo.file, topinfo.line);
-    }
+    // coverage for the function definition line number (topinfo)
+    coverageVisitStmt();
 
     find_next_stmt(0);
     while (cursor != -1) {
-        int32_t debuginfoloc = jl_array_data(src->codelocs, int32_t)[cursor];
-        if (debuginfoloc > 0) {
+        bool have_dbg_update = update_lineinfo(cursor);
+        if (have_dbg_update) {
             if (debug_enabled)
-                ctx.builder.SetCurrentDebugLocation(linetable[debuginfoloc].loc);
-            coverageVisitStmt(debuginfoloc);
+                ctx.builder.SetCurrentDebugLocation(new_lineinfo.back().loc);
+            coverageVisitStmt();
         }
         ctx.noalias().aliasscope.current = aliasscopes[cursor];
         jl_value_t *stmt = jl_array_ptr_ref(stmts, cursor);
@@ -9019,12 +9118,13 @@ static jl_llvm_functions_t
                 if (retvalinfo.ispointer()) {
                     if (returninfo.return_roots) {
                         Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ);
-                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
+                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, returninfo.return_roots);
                     }
                     if (returninfo.cc == jl_returninfo_t::SRet) {
                         assert(jl_is_concrete_type(jlrettype));
+                        Align alignment(julia_alignment(jlrettype));
                         emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo,
-                                    jl_datatype_size(jlrettype), julia_alignment(jlrettype), julia_alignment(jlrettype));
+                                    jl_datatype_size(jlrettype), alignment, alignment);
                     }
                     else { // must be jl_returninfo_t::Union
                         emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);
@@ -9032,20 +9132,17 @@ static jl_llvm_functions_t
                 }
                 else {
                     Type *store_ty = retvalinfo.V->getType();
-                    Type *dest_ty = store_ty->getPointerTo();
                     Value *Val = retvalinfo.V;
                     if (returninfo.return_roots) {
                         assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty);
-                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
+                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, returninfo.return_roots);
                     }
-                    if (dest_ty != sret->getType())
-                        sret = emit_bitcast(ctx, sret, dest_ty);
                     ctx.builder.CreateAlignedStore(Val, sret, Align(julia_alignment(retvalinfo.typ)));
                     assert(retvalinfo.TIndex == NULL && "unreachable"); // unimplemented representation
                 }
             }
 
-            mallocVisitStmt(debuginfoloc, sync_bytes);
+            mallocVisitStmt(sync_bytes, have_dbg_update);
             if (toplevel || ctx.is_opaque_closure)
                 ctx.builder.CreateStore(last_age, world_age_field);
             assert(type_is_ghost(retty) || returninfo.cc == jl_returninfo_t::SRet ||
@@ -9075,7 +9172,7 @@ static jl_llvm_functions_t
             jl_value_t *cond = jl_gotoifnot_cond(stmt);
             int lname = jl_gotoifnot_label(stmt);
             Value *isfalse = emit_condition(ctx, cond, "if");
-            mallocVisitStmt(debuginfoloc, nullptr);
+            mallocVisitStmt(nullptr, have_dbg_update);
             come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
             workstack.push_back(lname - 1);
             BasicBlock *ifnot = BB[lname];
@@ -9121,15 +9218,17 @@ static jl_llvm_functions_t
             if (lname) {
                 // Save exception stack depth at enter for use in pop_exception
                 Value *excstack_state =
-                    ctx.builder.CreateCall(prepare_call(jl_excstack_state_func));
+                    ctx.builder.CreateCall(prepare_call(jl_excstack_state_func), {get_current_task(ctx)});
                 assert(!ctx.ssavalue_assigned[cursor]);
                 ctx.SAvalues[cursor] = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
                 ctx.ssavalue_assigned[cursor] = true;
                 // Actually enter the exception frame
-                CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
+                auto ct = get_current_task(ctx);
+                CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func), {ct});
                 // We need to mark this on the call site as well. See issue #6757
                 sj->setCanReturnTwice();
-                Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                Value *isz = ctx.builder.CreateICmpEQ(ctx.builder.CreateExtractValue(sj, 0), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                Value *ehbuf = ctx.builder.CreateExtractValue(sj, 1);
                 BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
                 BasicBlock *catchpop = BasicBlock::Create(ctx.builder.getContext(), "catch_pop", f);
                 BasicBlock *handlr = NULL;
@@ -9139,8 +9238,7 @@ static jl_llvm_functions_t
                 ctx.builder.CreateCondBr(isz, tryblk, catchpop);
                 ctx.builder.SetInsertPoint(catchpop);
                 {
-                    ctx.builder.CreateCall(prepare_call(jlleave_func),
-                                    ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1));
+                    ctx.builder.CreateCall(prepare_call(jlleave_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)});
                     if (old_scope) {
                         scope_ai.decorateInst(
                             ctx.builder.CreateAlignedStore(old_scope, scope_ptr, ctx.types().alignof_ptr));
@@ -9148,11 +9246,13 @@ static jl_llvm_functions_t
                     ctx.builder.CreateBr(handlr);
                 }
                 ctx.builder.SetInsertPoint(tryblk);
+                auto ehptr = emit_ptrgep(ctx, ct, offsetof(jl_task_t, eh));
+                ctx.builder.CreateAlignedStore(ehbuf, ehptr, ctx.types().alignof_ptr);
             }
         }
         else {
             emit_stmtpos(ctx, stmt, cursor);
-            mallocVisitStmt(debuginfoloc, nullptr);
+            mallocVisitStmt(nullptr, have_dbg_update);
         }
         find_next_stmt(cursor + 1);
     }
@@ -9258,7 +9358,7 @@ static jl_llvm_functions_t
                     // load of val) if the runtime type of val isn't phiType
                     Value *isvalid = emit_isa_and_defined(ctx, val, phiType);
                     emit_guarded_test(ctx, isvalid, nullptr, [&] {
-                        emit_unbox_store(ctx, update_julia_type(ctx, val, phiType), dest, ctx.tbaa().tbaa_stack, julia_alignment(phiType));
+                        emit_unbox_store(ctx, update_julia_type(ctx, val, phiType), dest, ctx.tbaa().tbaa_stack, Align(julia_alignment(phiType)));
                         return nullptr;
                     });
                 }
@@ -9275,7 +9375,7 @@ static jl_llvm_functions_t
                     RTindex = UndefValue::get(getInt8Ty(ctx.builder.getContext()));
                 }
                 else if (jl_is_concrete_type(val.typ) || val.constant) {
-                    size_t tindex = get_box_tindex((jl_datatype_t*)val.typ, phiType);
+                    size_t tindex = get_box_tindex((jl_datatype_t*)(val.constant ? jl_typeof(val.constant) : val.typ), phiType);
                     if (tindex == 0) {
                         if (VN)
                             V = boxed(ctx, val);
@@ -9285,7 +9385,7 @@ static jl_llvm_functions_t
                         if (VN)
                             V = Constant::getNullValue(ctx.types().T_prjlvalue);
                         if (dest)
-                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, julia_alignment(val.typ));
+                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, Align(julia_alignment(val.typ)));
                         RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex);
                     }
                 }
@@ -9385,7 +9485,7 @@ static jl_llvm_functions_t
                     // make sure that anything we attempt to call has some inlining info, just in case optimization messed up
                     // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
                     Function *F = call->getCalledFunction();
-                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().startswith("julia.") || &I == restTuple)) {
+                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().starts_with("julia.") || &I == restTuple)) {
                         I.setDebugLoc(topdebugloc);
                     }
                 }
@@ -9474,9 +9574,7 @@ jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &m,
         jl_method_instance_t *li,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
-        jl_codegen_params_t &params,
-        size_t min_world, size_t max_world)
+        jl_codegen_params_t &params)
 {
     JL_TIMING(CODEGEN, CODEGEN_LLVM);
     jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK);
@@ -9486,7 +9584,7 @@ jl_llvm_functions_t jl_emit_code(
         compare_cgparams(params.params, &jl_default_cgparams)) &&
         "functions compiled with custom codegen params must not be cached");
     JL_TRY {
-        decls = emit_function(m, li, src, jlrettype, params, min_world, max_world);
+        decls = emit_function(m, li, src, src->rettype, params);
         auto stream = *jl_ExecutionEngine->get_dump_emitted_mi_name_stream();
         if (stream) {
             jl_printf(stream, "%s\t", decls.specFunctionObject.c_str());
@@ -9507,12 +9605,9 @@ jl_llvm_functions_t jl_emit_code(
         decls.functionObject = "";
         decls.specFunctionObject = "";
         jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname.c_str());
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(jl_current_task));
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
-#ifndef JL_NDEBUG
-        abort();
-#endif
     }
 
     return decls;
@@ -9526,7 +9621,7 @@ static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codeg
     std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
     jl_llvm_functions_t declarations;
     declarations.functionObject = "jl_f_opaque_closure_call";
-    if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
+    if (uses_specsig(mi->specTypes, false, rettype, true)) {
         jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
         Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
         jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
@@ -9572,8 +9667,8 @@ jl_llvm_functions_t jl_emit_codeinst(
             return jl_llvm_functions_t(); // failed
         }
     }
-    jl_llvm_functions_t decls = jl_emit_code(m, codeinst->def, src, codeinst->rettype, params,
-        jl_atomic_load_relaxed(&codeinst->min_world), jl_atomic_load_relaxed(&codeinst->max_world));
+    assert(jl_egal((jl_value_t*)jl_atomic_load_relaxed(&codeinst->debuginfo), (jl_value_t*)src->debuginfo) && "trying to generate code for a codeinst for an incompatible src");
+    jl_llvm_functions_t decls = jl_emit_code(m, codeinst->def, src, params);
 
     const std::string &specf = decls.specFunctionObject;
     const std::string &f = decls.functionObject;
@@ -9605,6 +9700,7 @@ jl_llvm_functions_t jl_emit_codeinst(
                 jl_options.debug_level > 1) {
                 // update the stored code
                 if (inferred != (jl_value_t*)src) {
+                    // TODO: it is somewhat unclear what it means to be mutating this
                     if (jl_is_method(def)) {
                         src = (jl_code_info_t*)jl_compress_ir(def, src);
                         assert(jl_is_string(src));
@@ -9620,13 +9716,13 @@ jl_llvm_functions_t jl_emit_codeinst(
             else if (jl_is_method(def) && // don't delete toplevel code
                         def->source != NULL && // don't delete code from optimized opaque closures that can't be reconstructed
                         inferred != jl_nothing && // and there is something to delete (test this before calling jl_ir_inlining_cost)
-                        !effects_foldable(codeinst->ipo_purity_bits) && // don't delete code we may want for irinterp
+                        !effects_foldable(jl_atomic_load_relaxed(&codeinst->ipo_purity_bits)) && // don't delete code we may want for irinterp
                         ((jl_ir_inlining_cost(inferred) == UINT16_MAX) || // don't delete inlineable code
                         jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) && // unless it is constant
                         !(params.imaging_mode || jl_options.incremental)) { // don't delete code when generating a precompile file
                 // Never end up in a situation where the codeinst has no invoke, but also no source, so we never fall
                 // through the cracks of SOURCE_MODE_ABI.
-                jl_atomic_store_release(&codeinst->invoke, &jl_fptr_wait_for_compiled);
+                jl_atomic_store_release(&codeinst->invoke, jl_fptr_wait_for_compiled_addr);
                 jl_atomic_store_release(&codeinst->inferred, jl_nothing);
             }
         }
@@ -9652,35 +9748,30 @@ void jl_compile_workqueue(
         // try to emit code for this item from the workqueue
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
-        bool cache_valid = params.cache;
-        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-        if (cache_valid && invoke != NULL && invoke != &jl_fptr_wait_for_compiled) {
-            auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-            if (fptr) {
-                while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                    jl_cpu_pause();
-                }
-                // in case we are racing with another thread that is emitting this function
-                invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-            }
+        jl_callptr_t invoke = NULL;
+        if (params.cache) {
+            // WARNING: this correctness is protected by an outer lock
+            uint8_t specsigflags;
+            void *fptr;
+            jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+            //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr)
             if (invoke == jl_fptr_args_addr) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
             }
-            else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
+            else if (specsigflags & 0b1) {
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
                 preal_specsig = true;
             }
         }
-        else {
+        if (preal_decl.empty()) {
             auto it = params.compiled_functions.find(codeinst);
             if (it == params.compiled_functions.end()) {
                 // Reinfer the function. The JIT came along and removed the inferred
                 // method body. See #34993
                 if (policy != CompilationPolicy::Default &&
                     jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
-                    // Codegen lock is held, so SOURCE_MODE_FORCE_SOURCE_UNCACHED is not required
-                    codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), 0, SOURCE_MODE_FORCE_SOURCE);
+                    // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
+                    codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE);
                 }
                 if (codeinst) {
                     orc::ThreadSafeModule result_m =
@@ -9709,7 +9800,10 @@ void jl_compile_workqueue(
             // expected specsig
             if (!preal_specsig) {
                 // emit specsig-to-(jl)invoke conversion
-                Function *preal = emit_tojlinvoke(codeinst, mod, params);
+                StringRef invokeName;
+                if (invoke != NULL)
+                    invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst);
+                Function *preal = emit_tojlinvoke(codeinst, invokeName, mod, params);
                 proto.decl->setLinkage(GlobalVariable::InternalLinkage);
                 //protodecl->setAlwaysInline();
                 jl_init_function(proto.decl, params.TargetTriple);
@@ -9726,7 +9820,10 @@ void jl_compile_workqueue(
             // expected non-specsig
             if (preal_decl.empty() || preal_specsig) {
                 // emit jlcall1-to-(jl)invoke conversion
-                preal_decl = emit_tojlinvoke(codeinst, mod, params)->getName();
+                StringRef invokeName;
+                if (invoke != NULL)
+                    invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst);
+                preal_decl = emit_tojlinvoke(codeinst, invokeName, mod, params)->getName();
             }
         }
         if (!preal_decl.empty()) {
@@ -9795,7 +9892,6 @@ static void init_jit_functions(void)
     add_named_global(memcmp_func, &memcmp);
     add_named_global(jltypeerror_func, &jl_type_error);
     add_named_global(jlcheckassign_func, &jl_checked_assignment);
-    add_named_global(jldeclareconst_func, &jl_declare_constant);
     add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
     add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr);
     add_named_global(jlboundp_func, &jl_boundp);
@@ -9809,9 +9905,10 @@ static void init_jit_functions(void)
     add_named_global(jlcopyast_func, &jl_copy_ast);
     //add_named_global(jlnsvec_func, &jl_svec);
     add_named_global(jlmethod_func, &jl_method_def);
-    add_named_global(jlgenericfunction_func, &jl_generic_function_def);
+    add_named_global(jlgenericfunction_func, &jl_declare_const_gf);
     add_named_global(jlenter_func, &jl_enter_handler);
     add_named_global(jl_current_exception_func, &jl_current_exception);
+    add_named_global(jlleave_noexcept_func, &jl_pop_handler_noexcept);
     add_named_global(jlleave_func, &jl_pop_handler);
     add_named_global(jl_restore_excstack_func, &jl_restore_excstack);
     add_named_global(jl_excstack_state_func, &jl_excstack_state);
@@ -9885,7 +9982,9 @@ char jl_using_perf_jitevents = 0;
 
 int jl_is_timing_passes = 0;
 
+#if JL_LLVM_VERSION < 170000
 int jl_opaque_ptrs_set = 0;
+#endif
 
 extern "C" void jl_init_llvm(void)
 {
@@ -9936,6 +10035,7 @@ extern "C" void jl_init_llvm(void)
     if (clopt && clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "4", 1);
 
+    #if JL_LLVM_VERSION < 170000
     // we want the opaque-pointers to be opt-in, per LLVMContext, for this release
     // so change the default value back to pre-14.x, without changing the NumOccurrences flag for it
     clopt = llvmopts.lookup("opaque-pointers");
@@ -9944,6 +10044,7 @@ extern "C" void jl_init_llvm(void)
     } else {
         jl_opaque_ptrs_set = 1;
     }
+    #endif
 
     clopt = llvmopts.lookup("time-passes");
     if (clopt && clopt->getNumOccurrences() > 0)
@@ -10023,8 +10124,6 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
     if (jl_ExecutionEngine)
         jl_ExecutionEngine->printTimers();
     PrintStatistics();
-    JL_LOCK(&jl_codegen_lock); // TODO: If this lock gets removed reconsider
-                                    // LLVM global state/destructors (maybe a rwlock)
 }
 
 // the rest of this file are convenience functions
diff --git a/src/coverage.cpp b/src/coverage.cpp
index 100a4c66322bd..685370198ff13 100644
--- a/src/coverage.cpp
+++ b/src/coverage.cpp
@@ -77,11 +77,10 @@ JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line)
     return allocLine(mallocData[filename], line);
 }
 
-// Resets the malloc counts.
-extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+static void clear_log_data(logdata_t &logData, int resetValue)
 {
-    logdata_t::iterator it = mallocData.begin();
-    for (; it != mallocData.end(); it++) {
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
         SmallVector<logdata_block*, 0> &bytes = (*it).second;
         SmallVector<logdata_block*, 0>::iterator itb;
         for (itb = bytes.begin(); itb != bytes.end(); itb++) {
@@ -89,7 +88,7 @@ extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
                 logdata_block &data = **itb;
                 for (int i = 0; i < logdata_blocksize; i++) {
                     if (data[i] > 0)
-                        data[i] = 1;
+                        data[i] = resetValue;
                 }
             }
         }
@@ -97,6 +96,18 @@ extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
     jl_gc_sync_total_bytes(0);
 }
 
+// Resets the malloc counts.
+extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+{
+    clear_log_data(mallocData, 1);
+}
+
+// Resets the code coverage
+extern "C" JL_DLLEXPORT void jl_clear_coverage_data(void)
+{
+    clear_log_data(coverageData, 0);
+}
+
 static void write_log_data(logdata_t &logData, const char *extension)
 {
     std::string base = std::string(jl_options.julia_bindir);
@@ -196,7 +207,7 @@ extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
 {
     if (output) {
         StringRef output_pattern(output);
-        if (output_pattern.endswith(".info"))
+        if (output_pattern.ends_with(".info"))
             write_lcov_data(coverageData, jl_format_filename(output_pattern.str().c_str()));
     }
     else {
diff --git a/src/crc32c.c b/src/crc32c.c
index 1d667b3dbf656..50d2acc603359 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -178,6 +178,9 @@ JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len)
     return crc32c_sse42(crc, buf, len);
 }
 #  else
+#if defined(JL_CRC32C_USE_IFUNC) && defined(_COMPILER_CLANG_)
+JL_UNUSED
+#endif
 static crc32c_func_t crc32c_dispatch(void)
 {
     // When used in ifunc, we cannot call external functions (i.e. jl_cpuid)
diff --git a/src/datatype.c b/src/datatype.c
index f9f7dec3d1b13..c78b00fdd2245 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
+#include <stdalign.h>
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
@@ -19,23 +20,21 @@ extern "C" {
 
 // allocating TypeNames -----------------------------------------------------------
 
-static int is10digit(char c) JL_NOTSAFEPOINT
-{
-    return (c >= '0' && c <= '9');
-}
-
 static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT
 {
     char *n = jl_symbol_name(s);
     if (n[0] != '#')
         return s;
-    char *end = strrchr(n, '#');
+    char *end = strchr(&n[1], '#');
+    // handle `#f...##...#...`
+    if (end != NULL && end[1] == '#')
+        end = strchr(&end[2], '#');
     int32_t len;
-    if (end == n || end == n+1)
+    if (end == NULL || end == n+1)
         len = strlen(n) - 1;
     else
         len = (end-n) - 1;  // extract `f` from `#f#...`
-    if (is10digit(n[1]))
+    if (isdigit(n[1]) || is_canonicalized_anonfn_typename(n))
         return _jl_symbol(n, len+1);
     return _jl_symbol(&n[1], len);
 }
@@ -82,6 +81,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->atomicfields = NULL;
     tn->constfields = NULL;
     tn->max_methods = 0;
+    tn->constprop_heustic = 0;
     return tn;
 }
 
@@ -179,6 +179,7 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
                                            uint32_t npointers,
                                            uint32_t alignment,
                                            int haspadding,
+                                           int isbitsegal,
                                            int arrayelem,
                                            jl_fielddesc32_t desc[],
                                            uint32_t pointers[]) JL_NOTSAFEPOINT
@@ -225,6 +226,7 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
     flddesc->nfields = nfields;
     flddesc->alignment = alignment;
     flddesc->flags.haspadding = haspadding;
+    flddesc->flags.isbitsegal = isbitsegal;
     flddesc->flags.fielddesc_type = fielddesc_type;
     flddesc->flags.arrayelem_isboxed = arrayelem == 1;
     flddesc->flags.arrayelem_isunion = arrayelem == 2;
@@ -353,6 +355,8 @@ int jl_struct_try_layout(jl_datatype_t *dt)
 
 int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
 {
+    if (jl_typeofbottom_type && ty == jl_typeofbottom_type->super)
+        ty = jl_typeofbottom_type;
     if (ty->name->mayinlinealloc && jl_struct_try_layout(ty)) {
         if (ty->layout->npointers > 0) {
             if (pointerfree)
@@ -503,6 +507,7 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
     int isunboxed = jl_islayout_inline(eltype, &elsz, &al) && (kind != (jl_value_t*)jl_atomic_sym || jl_is_datatype(eltype));
     int isunion = isunboxed && jl_is_uniontype(eltype);
     int haspadding = 1; // we may want to eventually actually compute this more precisely
+    int isbitsegal = 0;
     int nfields = 0; // aka jl_is_layout_opaque
     int npointers = 1;
     int zi;
@@ -561,7 +566,7 @@ void jl_get_genericmemory_layout(jl_datatype_t *st)
     else
         arrayelem = 0;
     assert(!st->layout);
-    st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, arrayelem, NULL, pointers);
+    st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, isbitsegal, arrayelem, NULL, pointers);
     st->zeroinit = zi;
     //st->has_concrete_subtype = 1;
     //st->isbitstype = 0;
@@ -620,18 +625,17 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         // if we have no fields, we can trivially skip the rest
         if (st == jl_symbol_type || st == jl_string_type) {
             // opaque layout - heap-allocated blob
-            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, {0}};
+            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
             st->layout = &opaque_byte_layout;
             return;
         }
         else if (st == jl_simplevector_type || st == jl_module_type) {
-            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), {0}};
+            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
             st->layout = &opaque_ptr_layout;
             return;
         }
         else {
-            // reuse the same layout for all singletons
-            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, {0}};
+            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
             st->layout = &singleton_layout;
         }
     }
@@ -672,6 +676,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         size_t alignm = 1;
         int zeroinit = 0;
         int haspadding = 0;
+        int isbitsegal = 1;
         int homogeneous = 1;
         int needlock = 0;
         uint32_t npointers = 0;
@@ -686,19 +691,30 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     throw_ovf(should_malloc, desc, st, fsz);
                 desc[i].isptr = 0;
                 if (jl_is_uniontype(fld)) {
-                    haspadding = 1;
                     fsz += 1; // selector byte
                     zeroinit = 1;
+                    // TODO: Some unions could be bits comparable.
+                    isbitsegal = 0;
                 }
                 else {
                     uint32_t fld_npointers = ((jl_datatype_t*)fld)->layout->npointers;
                     if (((jl_datatype_t*)fld)->layout->flags.haspadding)
                         haspadding = 1;
+                    if (!((jl_datatype_t*)fld)->layout->flags.isbitsegal)
+                        isbitsegal = 0;
                     if (i >= nfields - st->name->n_uninitialized && fld_npointers &&
                         fld_npointers * sizeof(void*) != fsz) {
-                        // field may be undef (may be uninitialized and contains pointer),
-                        // and contains non-pointer fields of non-zero sizes.
-                        haspadding = 1;
+                        // For field types that contain pointers, we allow inlinealloc
+                        // as long as the field type itself is always fully initialized.
+                        // In such a case, we use the first pointer in the inlined field
+                        // as the #undef marker (if it is zero, we treat the whole inline
+                        // struct as #undef). However, we do not zero-initialize the whole
+                        // struct, so the non-pointer parts of the inline allocation may
+                        // be arbitrary, but still need to compare egal (because all #undef)
+                        // representations are egal. Because of this, we cannot bitscompare
+                        // them.
+                        // TODO: Consider zero-initializing the whole struct.
+                        isbitsegal = 0;
                     }
                     if (!zeroinit)
                         zeroinit = ((jl_datatype_t*)fld)->zeroinit;
@@ -714,8 +730,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 zeroinit = 1;
                 npointers++;
                 if (!jl_pointer_egal(fld)) {
-                    // this somewhat poorly named flag says whether some of the bits can be non-unique
-                    haspadding = 1;
+                    isbitsegal = 0;
                 }
             }
             if (isatomic && fsz > MAX_ATOMIC_SIZE)
@@ -776,7 +791,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             }
         }
         assert(ptr_i == npointers);
-        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, 0, desc, pointers);
+        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, isbitsegal, 0, desc, pointers);
         if (should_malloc) {
             free(desc);
             if (npointers)
@@ -793,14 +808,6 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     return;
 }
 
-static int is_anonfn_typename(char *name)
-{
-    if (name[0] != '#' || name[1] == '#')
-        return 0;
-    char *other = strrchr(name, '#');
-    return other > &name[1] && is10digit(other[1]);
-}
-
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
         jl_sym_t *name,
         jl_module_t *module,
@@ -929,6 +936,10 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     uint32_t nbytes = (nbits + 7) / 8;
     uint32_t alignm = next_power_of_two(nbytes);
+# if defined(_CPU_X86_) && !defined(_OS_WINDOWS_)
+    if (alignm == 8)
+        alignm = 4;
+# endif
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
     // memoize isprimitivetype, since it is much easier than checking
@@ -938,7 +949,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
     bt->ismutationfree = 1;
     bt->isidentityfree = 1;
     bt->isbitstype = (parameters == jl_emptysvec);
-    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, 0, NULL, NULL);
+    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, 1, 0, NULL, NULL);
     bt->instance = NULL;
     return bt;
 }
@@ -961,6 +972,7 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     layout->alignment = sizeof(void *);
     layout->npointers = haspointers;
     layout->flags.haspadding = 1;
+    layout->flags.isbitsegal = 0;
     layout->flags.fielddesc_type = 3;
     layout->flags.padding = 0;
     layout->flags.arrayelem_isboxed = 0;
@@ -1012,7 +1024,7 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
 
 #if MAX_POINTERATOMIC_SIZE >= 16
 typedef struct _jl_uint128_t {
-    uint64_t a;
+    alignas(16) uint64_t a;
     uint64_t b;
 } jl_uint128_t;
 #endif
@@ -1242,7 +1254,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
     }
     else if (nb == 1) {
         uint8_t *y8 = (uint8_t*)y;
-        assert(!dt->layout->flags.haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y8 = *(uint8_t*)expected;
             uint8_t z8 = *(uint8_t*)src;
@@ -1255,7 +1267,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
     }
     else if (nb == 2) {
         uint16_t *y16 = (uint16_t*)y;
-        assert(!dt->layout->flags.haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y16 = *(uint16_t*)expected;
             uint16_t z16 = *(uint16_t*)src;
@@ -1273,7 +1285,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
             uint32_t z32 = zext_read32(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
-                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1290,7 +1302,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
             uint64_t z64 = zext_read64(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
-                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1308,7 +1320,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
             jl_uint128_t z128 = zext_read128(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
-                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1356,7 +1368,7 @@ JL_DLLEXPORT int jl_atomic_storeonce_bits(jl_datatype_t *dt, char *dst, const jl
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16) {
-        jl_uint128_t y128 = 0;
+        jl_uint128_t y128 = {0};
         jl_uint128_t z128 = zext_read128(src, nb);
         while (1) {
             success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y128, z128);
@@ -1644,6 +1656,8 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
     jl_task_t *ct = jl_current_task;
     if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
+        if (type == jl_typeofbottom_type->super)
+            return jl_bottom_type; // ::Type{Union{}} is an abstract type, but is also a singleton when used as a field type
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
     if (type->instance != NULL)
@@ -1729,7 +1743,7 @@ JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
         }
     }
     if (err)
-        jl_has_no_field_error(t->name->name, fld);
+        jl_has_no_field_error(t, fld);
     return -1;
 }
 
@@ -2002,7 +2016,7 @@ inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_
         else {
             char *px = lock(p, parent, needlock, isatomic);
             int success = memcmp(px, (char*)r, fsz) == 0;
-            if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
                 success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty);
             if (success) {
                 if (isunion) {
@@ -2117,7 +2131,7 @@ inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value
         success = (rty == jl_typeof(expected));
         if (success) {
             success = memcmp((char*)r, (char*)expected, rsz) == 0;
-            if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
                 success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
         }
         *((uint8_t*)r + fsz) = success ? 1 : 0;
@@ -2176,7 +2190,7 @@ inline int setonce_bits(jl_datatype_t *rty, char *p, jl_value_t *parent, jl_valu
     }
     else {
         char *px = lock(p, parent, needlock, isatomic);
-        success = undefref_check(rty, (jl_value_t*)px) != NULL;
+        success = undefref_check(rty, (jl_value_t*)px) == NULL;
         if (success)
             memassign_safe(hasptr, px, rhs, fsz);
         unlock(p, parent, needlock, isatomic);
@@ -2246,6 +2260,39 @@ JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field)
     return jl_field_offset(ty, field - 1);
 }
 
+jl_value_t *get_nth_pointer(jl_value_t *v, size_t i)
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
+    const jl_datatype_layout_t *ly = dt->layout;
+    uint32_t npointers = ly->npointers;
+    if (i >= npointers)
+        jl_bounds_error_int(v, i);
+    const uint8_t *ptrs8 = (const uint8_t *)jl_dt_layout_ptrs(ly);
+    const uint16_t *ptrs16 = (const uint16_t *)jl_dt_layout_ptrs(ly);
+    const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
+    uint32_t fld;
+    if (ly->flags.fielddesc_type == 0)
+        fld = ptrs8[i];
+    else if (ly->flags.fielddesc_type == 1)
+        fld = ptrs16[i];
+    else
+        fld = ptrs32[i];
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)(&((jl_value_t**)v)[fld]));
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_nth_pointer(jl_value_t *v, size_t i)
+{
+    jl_value_t *ptrf = get_nth_pointer(v, i);
+    if (__unlikely(ptrf == NULL))
+        jl_throw(jl_undefref_exception);
+    return ptrf;
+}
+
+JL_DLLEXPORT int jl_nth_pointer_isdefined(jl_value_t *v, size_t i)
+{
+    return get_nth_pointer(v, i) != NULL;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/debug-registry.h b/src/debug-registry.h
index f30049eb5b210..85a94245ce6aa 100644
--- a/src/debug-registry.h
+++ b/src/debug-registry.h
@@ -99,18 +99,26 @@ class JITDebugInfoRegistry
     };
 private:
 
-    struct ObjectInfo {
-        const llvm::object::ObjectFile *object = nullptr;
-        size_t SectionSize = 0;
-        ptrdiff_t slide = 0;
-        llvm::object::SectionRef Section{};
-        llvm::DIContext *context = nullptr;
+    struct LazyObjectInfo {
+        SmallVector<uint8_t, 0> data;
+        size_t uncompressedsize;
+        std::unique_ptr<const llvm::object::ObjectFile> object;
+        std::unique_ptr<llvm::DIContext> context;
+        LazyObjectInfo() = delete;
+    };
+
+    struct SectionInfo {
+        LazyObjectInfo *object;
+        size_t SectionSize;
+        ptrdiff_t slide;
+        uint64_t SectionIndex;
+        SectionInfo() = delete;
     };
 
     template<typename KeyT, typename ValT>
     using rev_map = std::map<KeyT, ValT, std::greater<KeyT>>;
 
-    typedef rev_map<size_t, ObjectInfo> objectmap_t;
+    typedef rev_map<size_t, SectionInfo> objectmap_t;
     typedef rev_map<uint64_t, objfileentry_t> objfilemap_t;
 
     objectmap_t objectmap{};
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 5cb7401a036d4..84550811072fe 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -7,6 +7,7 @@
 #include <llvm/DebugInfo/DWARF/DWARFContext.h>
 #include <llvm/Object/SymbolSize.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/MemoryBufferRef.h>
 #include <llvm/IR/Function.h>
 #include <llvm/ADT/StringRef.h>
 #include <llvm/ADT/StringMap.h>
@@ -335,8 +336,12 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
 #endif // defined(_OS_X86_64_)
 #endif // defined(_OS_WINDOWS_)
 
+    SmallVector<uint8_t, 0> packed;
+    compression::zlib::compress(ArrayRef<uint8_t>((uint8_t*)Object.getData().data(), Object.getData().size()), packed, compression::zlib::DefaultCompression);
+    jl_jit_add_bytes(packed.size());
+    auto ObjectCopy = new LazyObjectInfo{packed, Object.getData().size()}; // intentionally leaked so that we don't need to ref-count it, intentionally copied so that we exact-size the allocation (since no shrink_to_fit function)
     auto symbols = object::computeSymbolSizes(Object);
-    bool first = true;
+    bool hassection = false;
     for (const auto &sym_size : symbols) {
         const object::SymbolRef &sym_iter = sym_size.first;
         object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
@@ -385,17 +390,17 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         jl_profile_atomic([&]() JL_NOTSAFEPOINT {
             if (mi)
                 linfomap[Addr] = std::make_pair(Size, mi);
-            if (first) {
-                objectmap[SectionLoadAddr] = {&Object,
-                    (size_t)SectionSize,
-                    (ptrdiff_t)(SectionAddr - SectionLoadAddr),
-                    *Section,
-                    nullptr,
-                    };
-                first = false;
-            }
+            hassection = true;
+            objectmap.insert(std::pair{SectionLoadAddr, SectionInfo{
+                ObjectCopy,
+                (size_t)SectionSize,
+                (ptrdiff_t)(SectionAddr - SectionLoadAddr),
+                Section->getIndex()
+                }});
         });
     }
+    if (!hassection) // clang-sa demands that we do this to fool cplusplus.NewDeleteLeaks
+        delete ObjectCopy;
 }
 
 void jl_register_jit_object(const object::ObjectFile &Object,
@@ -1213,11 +1218,33 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
     auto fit = objmap.lower_bound(fptr);
     if (fit != objmap.end() && fptr < fit->first + fit->second.SectionSize) {
         *slide = fit->second.slide;
-        *Section = fit->second.Section;
-        if (context) {
-            if (fit->second.context == nullptr)
-                fit->second.context = DWARFContext::create(*fit->second.object).release();
-            *context = fit->second.context;
+        auto lazyobject = fit->second.object;
+        if (!lazyobject->object && !lazyobject->data.empty()) {
+            if (lazyobject->uncompressedsize) {
+                SmallVector<uint8_t, 0> unpacked;
+                Error E = compression::zlib::decompress(lazyobject->data, unpacked, lazyobject->uncompressedsize);
+                if (E)
+                    lazyobject->data.clear();
+                else
+                    lazyobject->data = std::move(unpacked);
+                jl_jit_add_bytes(lazyobject->data.size() - lazyobject->uncompressedsize);
+                lazyobject->uncompressedsize = 0;
+            }
+            if (!lazyobject->data.empty()) {
+                auto obj = object::ObjectFile::createObjectFile(MemoryBufferRef(StringRef((const char*)lazyobject->data.data(), lazyobject->data.size()), "jit.o"));
+                if (obj)
+                    lazyobject->object = std::move(*obj);
+                else
+                    lazyobject->data.clear();
+            }
+        }
+        if (lazyobject->object) {
+            *Section = *std::next(lazyobject->object->section_begin(), fit->second.SectionIndex);
+            if (context) {
+                if (lazyobject->context == nullptr)
+                    lazyobject->context = DWARFContext::create(*lazyobject->object);
+                *context = lazyobject->context.get();
+            }
         }
         found = 1;
     }
diff --git a/src/debuginfo.h b/src/debuginfo.h
index 5b5cdcb82d534..6cd7528910765 100644
--- a/src/debuginfo.h
+++ b/src/debuginfo.h
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // Declarations for debuginfo.cpp
+void jl_jit_add_bytes(size_t bytes) JL_NOTSAFEPOINT;
 
 int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
         llvm::object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT;
diff --git a/src/disasm.cpp b/src/disasm.cpp
index c2a55f23d12f2..b71503c3f7a77 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -58,7 +58,11 @@
 #include "llvm-version.h"
 
 // for outputting disassembly
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Triple.h>
+#else
 #include <llvm/ADT/Triple.h>
+#endif
 #include <llvm/AsmParser/Parser.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/BinaryFormat/COFF.h>
@@ -495,7 +499,7 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada
     std::string code;
     raw_string_ostream stream(code);
 
-    {
+    if (dump->F) {
         //RAII will release the module
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         //If TSM is not passed in, then the context MUST be locked externally.
@@ -1094,7 +1098,11 @@ static void jl_dump_asm_internal(
                             const MCOperand &OpI = Inst.getOperand(Op);
                             if (OpI.isImm()) {
                                 int64_t imm = OpI.getImm();
+                                #if JL_LLVM_VERSION >= 170000
+                                if (opinfo.operands()[Op].OperandType == MCOI::OPERAND_PCREL)
+                                #else
                                 if (opinfo.OpInfo[Op].OperandType == MCOI::OPERAND_PCREL)
+                                #endif
                                     imm += Fptr + Index;
                                 const char *name = DisInfo.lookupSymbolName(imm);
                                 if (name)
@@ -1196,7 +1204,7 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
 {
     // precise printing via IR assembler
     SmallVector<char, 4096> ObjBufferSV;
-    { // scope block
+    if (dump->F) { // scope block also
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         llvm::raw_svector_ostream asmfile(ObjBufferSV);
         TSM->withModuleDo([&](Module &m) {
@@ -1216,7 +1224,11 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
         addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
         if (emit_mc) {
             raw_svector_ostream obj_OS(ObjBufferSV);
+#if JL_LLVM_VERSION >= 180000
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CodeGenFileType::ObjectFile, false, nullptr))
+#else
             if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+#endif
                 return jl_an_empty_string;
             TSM->withModuleDo([&](Module &m) { PM.run(m); });
         }
diff --git a/src/dlload.c b/src/dlload.c
index 484c36a228886..91980cc4ecbbf 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -309,7 +309,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     */
     if (!abspath && !is_atpath && jl_base_module != NULL) {
         jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"), 0);
-        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
+        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_get_binding_value(b) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
             for (j = 0; j < jl_array_nrows(DL_LOAD_PATH); j++) {
diff --git a/src/engine.cpp b/src/engine.cpp
new file mode 100644
index 0000000000000..6db4dce44e48e
--- /dev/null
+++ b/src/engine.cpp
@@ -0,0 +1,158 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <mutex>
+#include <condition_variable>
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/ADT/DenseSet.h>
+#include <llvm/ADT/SmallVector.h>
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+using namespace llvm;
+
+struct ReservationInfo {
+    int16_t tid = 0;
+    jl_code_instance_t *ci = nullptr;
+};
+
+struct InferKey {
+    jl_method_instance_t *mi = nullptr;
+    jl_value_t *owner = nullptr;
+};
+
+template<> struct llvm::DenseMapInfo<InferKey> {
+  using FirstInfo = DenseMapInfo<jl_method_instance_t*>;
+  using SecondInfo = DenseMapInfo<jl_value_t*>;
+
+  static inline InferKey getEmptyKey() {
+    return InferKey{FirstInfo::getEmptyKey(),
+                    SecondInfo::getEmptyKey()};
+  }
+
+  static inline InferKey getTombstoneKey() {
+    return InferKey{FirstInfo::getTombstoneKey(),
+                    SecondInfo::getTombstoneKey()};
+  }
+
+  static unsigned getHashValue(const InferKey& PairVal) {
+    return detail::combineHashValue(FirstInfo::getHashValue(PairVal.mi),
+                                    SecondInfo::getHashValue(PairVal.owner));
+  }
+
+  static bool isEqual(const InferKey &LHS, const InferKey &RHS) {
+    return LHS.mi == RHS.mi && LHS.owner == RHS.owner;
+  }
+};
+
+static std::mutex engine_lock;
+static std::condition_variable engine_wait;
+// map from MethodInstance to threadid that owns it currently for inference
+static DenseMap<InferKey, ReservationInfo> Reservations;
+// vector of which threads are blocked and which lease they need
+static SmallVector<InferKey, 0> Awaiting; // (this could be merged into ptls also)
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner)
+{
+    jl_task_t *ct = jl_current_task;
+    ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph
+    jl_code_instance_t *ci = jl_new_codeinst_uninit(m, owner); // allocate a placeholder
+    JL_GC_PUSH1(&ci);
+    int8_t gc_state = jl_gc_safe_enter(ct->ptls);
+    InferKey key = {m, owner};
+    std::unique_lock<std::mutex> lock(engine_lock);
+    auto tid = jl_atomic_load_relaxed(&ct->tid);
+    if ((signed)Awaiting.size() < tid + 1)
+        Awaiting.resize(tid + 1);
+    while (1) {
+        auto record = Reservations.find(key);
+        if (record == Reservations.end()) {
+            Reservations[key] = ReservationInfo{tid, ci};
+            lock.unlock();
+            jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint
+            JL_GC_POP();
+            return ci;
+        }
+        // before waiting, need to run deadlock/cycle detection
+        // there is a cycle if the thread holding our lease is blocked
+        // and waiting for (transitively) any lease that is held by this thread
+        auto wait_tid = record->second.tid;
+        while (1) {
+            if (wait_tid == tid) {
+                lock.unlock();
+                jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint
+                JL_GC_POP();
+                ct->ptls->engine_nqueued--;
+                return ci; // break the cycle
+            }
+            if ((signed)Awaiting.size() <= wait_tid)
+                break; // no cycle, since it is running (and this should be unreachable)
+            auto key2 = Awaiting[wait_tid];
+            if (key2.mi == nullptr)
+                break; // no cycle, since it is running
+            auto record2 = Reservations.find(key2);
+            if (record2 == Reservations.end())
+                break; // no cycle, since it is about to resume
+            assert(wait_tid != record2->second.tid);
+            wait_tid = record2->second.tid;
+        }
+        Awaiting[tid] = key;
+        engine_wait.wait(lock);
+        Awaiting[tid] = InferKey{};
+    }
+}
+
+int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner)
+{
+    jl_task_t *ct = jl_current_task;
+    InferKey key = {m, owner};
+    std::unique_lock<std::mutex> lock(engine_lock);
+    auto record = Reservations.find(key);
+    return record != Reservations.end() && record->second.tid == jl_atomic_load_relaxed(&ct->tid);
+}
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+void jl_engine_sweep(jl_ptls_t *gc_all_tls_states)
+{
+    std::unique_lock<std::mutex> lock(engine_lock);
+    bool any = false;
+    for (auto I = Reservations.begin(); I != Reservations.end(); ++I) {
+        jl_code_instance_t *ci = I->second.ci;
+        if (!gc_marked(jl_astaggedvalue(ci)->bits.gc)) {
+            auto tid = I->second.tid;
+            Reservations.erase(I);
+            jl_ptls_t ptls2 = gc_all_tls_states[tid];
+            ptls2->engine_nqueued--;
+            any = true;
+        }
+    }
+    if (any)
+        engine_wait.notify_all();
+}
+
+void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src)
+{
+    jl_task_t *ct = jl_current_task;
+    std::unique_lock<std::mutex> lock(engine_lock);
+    auto record = Reservations.find(InferKey{ci->def, ci->owner});
+    if (record == Reservations.end() || record->second.ci != ci)
+        return;
+    assert(jl_atomic_load_relaxed(&ct->tid) == record->second.tid);
+    ct->ptls->engine_nqueued--; // re-enables finalizers, but doesn't immediately try to run them
+    Reservations.erase(record);
+    engine_wait.notify_all();
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/flisp/cvalues.c b/src/flisp/cvalues.c
index a5635c238ba3c..749b8802dfe82 100644
--- a/src/flisp/cvalues.c
+++ b/src/flisp/cvalues.c
@@ -101,7 +101,7 @@ void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv)
     autorelease(fl_ctx, cv);
 }
 
-static value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
+value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
 {
     cprim_t *pcp = (cprim_t*)alloc_words(fl_ctx, CPRIM_NWORDS-1+NWORDS(sz));
     pcp->type = type;
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index b031e456cd3fe..f8dd1cfd81ed0 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -158,7 +158,7 @@ value_t fl_cons(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_list2(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_listn(fl_context_t *fl_ctx, size_t n, ...) JL_NOTSAFEPOINT;
 value_t symbol(fl_context_t *fl_ctx, const char *str) JL_NOTSAFEPOINT;
-char *symbol_name(fl_context_t *fl_ctx, value_t v);
+char *symbol_name(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
 int fl_is_keyword_name(const char *str, size_t len);
 value_t alloc_vector(fl_context_t *fl_ctx, size_t n, int init);
 size_t llength(value_t v);
@@ -328,6 +328,7 @@ typedef float    fl_float_t;
 typedef value_t (*builtin_t)(fl_context_t*, value_t*, uint32_t);
 
 value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
+value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 value_t cvalue_no_finalizer(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 void add_finalizer(fl_context_t *fl_ctx, cvalue_t *cv);
 void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv);
diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c
index f29e3972755c5..07d074e1fb80b 100644
--- a/src/flisp/julia_extensions.c
+++ b/src/flisp/julia_extensions.c
@@ -76,7 +76,7 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
              wc != 0x233f &&  // notslash
              wc != 0x00a6) || // broken bar
 
-            // math symbol (category Sm) whitelist
+            // math symbol (category Sm) allowlist
             (wc >= 0x2140 && wc <= 0x2a1c &&
              ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
               wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
@@ -405,7 +405,7 @@ value_t fl_string_only_julia_char(fl_context_t *fl_ctx, value_t *args, uint32_t
     uint8_t *s = (uint8_t*)cvalue_data(args[0]);
     size_t len = cv_len((cvalue_t*)ptr(args[0]));
     uint32_t u = _string_only_julia_char(s, len);
-    if (u == (uint32_t)-1)
+    if (u == UINT32_MAX)
         return fl_ctx->F;
     return fl_list2(fl_ctx, fl_ctx->jl_char_sym, mk_uint32(fl_ctx, u));
 }
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
index c7ee32269138a..5b462d48cd2de 100644
--- a/src/gc-alloc-profiler.cpp
+++ b/src/gc-alloc-profiler.cpp
@@ -3,7 +3,6 @@
 #include "gc-alloc-profiler.h"
 
 #include "julia_internal.h"
-#include "gc.h"
 
 #include "llvm/ADT/SmallVector.h"
 
diff --git a/src/gc-common.c b/src/gc-common.c
new file mode 100644
index 0000000000000..ee461b576ea9e
--- /dev/null
+++ b/src/gc-common.c
@@ -0,0 +1,506 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-common.h"
+#include "julia.h"
+#include "julia_atomics.h"
+#include "julia_gcext.h"
+#include "julia_assert.h"
+#include "threading.h"
+#ifdef __GLIBC__
+#include <malloc.h> // for malloc_trim
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+jl_gc_num_t gc_num = {0};
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+jl_gc_callback_list_t *gc_cblist_root_scanner;
+jl_gc_callback_list_t *gc_cblist_task_scanner;
+jl_gc_callback_list_t *gc_cblist_pre_gc;
+jl_gc_callback_list_t *gc_cblist_post_gc;
+jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+jl_gc_callback_list_t *gc_cblist_notify_external_free;
+jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+static void jl_gc_register_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func)
+            return;
+        list = &((*list)->next);
+    }
+    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
+    (*list)->next = NULL;
+    (*list)->func = func;
+}
+
+static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func) {
+            jl_gc_callback_list_t *tmp = *list;
+            (*list) = (*list)->next;
+            free(tmp);
+            return;
+        }
+        list = &((*list)->next);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+}
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+jl_mutex_t finalizers_lock;
+arraylist_t finalizer_list_marked;
+arraylist_t to_finalize;
+JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
+{
+    arraylist_push(&to_finalize, o);
+    arraylist_push(&to_finalize, f);
+    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
+    // release our lock, and that will have a release barrier by then
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
+}
+
+void run_finalizer(jl_task_t *ct, void *o, void *ff)
+{
+    int ptr_finalizer = gc_ptr_tag(o, 1);
+    o = gc_ptr_clear_tag(o, 3);
+    if (ptr_finalizer) {
+        ((void (*)(void*))ff)((void*)o);
+        return;
+    }
+    JL_TRY {
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
+        ct->world_age = last_age;
+    }
+    JL_CATCH {
+        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        jlbacktrace(); // written to STDERR_FILENO
+    }
+}
+
+// if `need_sync` is true, the `list` is the `finalizers` list of another
+// thread and we need additional synchronizations
+static void finalize_object(arraylist_t *list, jl_value_t *o,
+                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
+{
+    // The acquire load makes sure that the first `len` objects are valid.
+    // If `need_sync` is true, all mutations of the content should be limited
+    // to the first `oldlen` elements and no mutation is allowed after the
+    // new length is published with the `cmpxchg` at the end of the function.
+    // This way, the mutation should not conflict with the owning thread,
+    // which only writes to locations later than `len`
+    // and will not resize the buffer without acquiring the lock.
+    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
+    size_t oldlen = len;
+    void **items = list->items;
+    size_t j = 0;
+    for (size_t i = 0; i < len; i += 2) {
+        void *v = items[i];
+        int move = 0;
+        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
+            void *f = items[i + 1];
+            move = 1;
+            arraylist_push(copied_list, v);
+            arraylist_push(copied_list, f);
+        }
+        if (move || __unlikely(!v)) {
+            // remove item
+        }
+        else {
+            if (j < i) {
+                items[j] = items[i];
+                items[j+1] = items[i+1];
+            }
+            j += 2;
+        }
+    }
+    len = j;
+    if (oldlen == len)
+        return;
+    if (need_sync) {
+        // The memset needs to be unconditional since the thread might have
+        // already read the length.
+        // The `memset` (like any other content mutation) has to be done
+        // **before** the `cmpxchg` which publishes the length.
+        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
+        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
+    }
+    else {
+        list->len = len;
+    }
+}
+
+// The first two entries are assumed to be empty and the rest are assumed to
+// be pointers to `jl_value_t` objects
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
+{
+    void **items = list->items;
+    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
+    items[1] = ct->gcstack;
+    ct->gcstack = (jl_gcframe_t*)items;
+}
+
+// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
+// to be hold for the current thread and will release the lock when the
+// function returns.
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
+{
+    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
+    // of `finalizer`) in a finalizer:
+    uint8_t sticky = ct->sticky;
+    // empty out the first two entries for the GC frame
+    arraylist_push(list, list->items[0]);
+    arraylist_push(list, list->items[1]);
+    jl_gc_push_arraylist(ct, list);
+    void **items = list->items;
+    size_t len = list->len;
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    // run finalizers in reverse order they were added, so lower-level finalizers run last
+    for (size_t i = len-4; i >= 2; i -= 2)
+        run_finalizer(ct, items[i], items[i + 1]);
+    // first entries were moved last to make room for GC frame metadata
+    run_finalizer(ct, items[len-2], items[len-1]);
+    // matches the jl_gc_push_arraylist above
+    JL_GC_POP();
+    ct->sticky = sticky;
+}
+
+static uint64_t finalizer_rngState[JL_RNG_SIZE];
+
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
+{
+    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
+}
+
+void run_finalizers(jl_task_t *ct, int finalizers_thread)
+{
+    // Racy fast path:
+    // The race here should be OK since the race can only happen if
+    // another thread is writing to it with the lock held. In such case,
+    // we don't need to run pending finalizers since the writer thread
+    // will flush it.
+    if (to_finalize.len == 0)
+        return;
+    JL_LOCK_NOGC(&finalizers_lock);
+    if (to_finalize.len == 0) {
+        JL_UNLOCK_NOGC(&finalizers_lock);
+        return;
+    }
+    arraylist_t copied_list;
+    memcpy(&copied_list, &to_finalize, sizeof(copied_list));
+    if (to_finalize.items == to_finalize._space) {
+        copied_list.items = copied_list._space;
+    }
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
+    arraylist_new(&to_finalize, 0);
+
+    uint64_t save_rngState[JL_RNG_SIZE];
+    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
+    jl_rng_split(ct->rngState, finalizer_rngState);
+
+    // This releases the finalizers lock.
+    int8_t was_in_finalizer = ct->ptls->in_finalizer;
+    ct->ptls->in_finalizer = !finalizers_thread;
+    jl_gc_run_finalizers_in_list(ct, &copied_list);
+    ct->ptls->in_finalizer = was_in_finalizer;
+    arraylist_free(&copied_list);
+
+    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
+}
+
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0 && ptls->engine_nqueued == 0) {
+        run_finalizers(ct, 0);
+    }
+}
+
+JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
+{
+    if (ptls == NULL)
+        ptls = jl_current_task->ptls;
+    return ptls->finalizers_inhibited;
+}
+
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    ptls->finalizers_inhibited++;
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
+{
+    jl_task_t *ct = jl_current_task;
+#ifdef NDEBUG
+    ct->ptls->finalizers_inhibited--;
+#else
+    jl_gc_enable_finalizers(ct, 1);
+#endif
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    int old_val = ptls->finalizers_inhibited;
+    int new_val = old_val + (on ? -1 : 1);
+    if (new_val < 0) {
+        JL_TRY {
+            jl_error(""); // get a backtrace
+        }
+        JL_CATCH {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
+            // Only print the backtrace once, to avoid spamming the logs
+            static int backtrace_printed = 0;
+            if (backtrace_printed == 0) {
+                backtrace_printed = 1;
+                jlbacktrace(); // written to STDERR_FILENO
+            }
+        }
+        return;
+    }
+    ptls->finalizers_inhibited = new_val;
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
+        jl_gc_run_pending_finalizers(ct);
+    }
+}
+
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
+{
+    return jl_current_task->ptls->in_finalizer;
+}
+
+static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
+{
+    void **items = flist->items;
+    size_t len = flist->len;
+    for(size_t i = 0; i < len; i+=2) {
+        void *v = items[i];
+        void *f = items[i + 1];
+        if (__unlikely(!v))
+            continue;
+        schedule_finalization(v, f);
+    }
+    flist->len = 0;
+}
+
+void jl_gc_run_all_finalizers(jl_task_t *ct)
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    // this is called from `jl_atexit_hook`; threads could still be running
+    // so we have to guard the finalizers' lists
+    JL_LOCK_NOGC(&finalizers_lock);
+    schedule_all_finalizers(&finalizer_list_marked);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            schedule_all_finalizers(&ptls2->finalizers);
+    }
+    // unlock here because `run_finalizers` locks this
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    run_finalizers(ct, 1);
+}
+
+void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
+{
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
+    arraylist_t *a = &ptls->finalizers;
+    // This acquire load and the release store at the end are used to
+    // synchronize with `finalize_object` on another thread. Apart from the GC,
+    // which is blocked by entering a unsafe region, there might be only
+    // one other thread accessing our list in `finalize_object`
+    // (only one thread since it needs to acquire the finalizer lock).
+    // Similar to `finalize_object`, all content mutation has to be done
+    // between the acquire and the release of the length.
+    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
+    if (__unlikely(oldlen + 2 > a->max)) {
+        JL_LOCK_NOGC(&finalizers_lock);
+        // `a->len` might have been modified.
+        // Another possibility is to always grow the array to `oldlen + 2` but
+        // it's simpler this way and uses slightly less memory =)
+        oldlen = a->len;
+        arraylist_grow(a, 2);
+        a->len = oldlen;
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+    void **items = a->items;
+    items[oldlen] = v;
+    items[oldlen + 1] = f;
+    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
+}
+
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
+{
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
+}
+
+// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
+{
+    assert(!gc_ptr_tag(v, 3));
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
+}
+
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
+{
+    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
+        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
+    }
+    else {
+        jl_gc_add_finalizer_(ptls, v, f);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_gc_add_finalizer_th(ptls, v, f);
+}
+
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
+{
+    JL_LOCK_NOGC(&finalizers_lock);
+    // Copy the finalizers into a temporary list so that code in the finalizer
+    // won't change the list as we loop through them.
+    // This list is also used as the GC frame when we are running the finalizers
+    arraylist_t copied_list;
+    arraylist_new(&copied_list, 0);
+    // No need to check the to_finalize list since the user is apparently
+    // still holding a reference to the object
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
+    }
+    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
+    if (copied_list.len > 0) {
+        // This releases the finalizers lock.
+        jl_gc_run_finalizers_in_list(ct, &copied_list);
+    }
+    else {
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+    arraylist_free(&copied_list);
+}
+
+JL_DLLEXPORT void jl_finalize(jl_value_t *o)
+{
+    jl_finalize_th(jl_current_task, o);
+}
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+int gc_n_threads;
+jl_ptls_t* gc_all_tls_states;
+
+// =========================================================================== //
+// MISC
+// =========================================================================== //
+
+const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT
+{
+    return jl_buff_tag;
+}
+
+// callback for passing OOM errors from gmp
+JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
+{
+    jl_throw(jl_memory_exception);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-common.h b/src/gc-common.h
new file mode 100644
index 0000000000000..4d53830442a7d
--- /dev/null
+++ b/src/gc-common.h
@@ -0,0 +1,176 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_COMMON_H
+#define JL_GC_COMMON_H
+
+#include "julia.h"
+#include "julia_internal.h"
+#ifndef _OS_WINDOWS_
+#include <sys/mman.h>
+#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+typedef void (*jl_gc_cb_func_t)(void);
+
+typedef struct _jl_gc_callback_list_t {
+    struct _jl_gc_callback_list_t *next;
+    jl_gc_cb_func_t func;
+} jl_gc_callback_list_t;
+
+extern jl_gc_callback_list_t *gc_cblist_root_scanner;
+extern jl_gc_callback_list_t *gc_cblist_task_scanner;
+extern jl_gc_callback_list_t *gc_cblist_pre_gc;
+extern jl_gc_callback_list_t *gc_cblist_post_gc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_free;
+extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+#define gc_invoke_callbacks(ty, list, args) \
+    do { \
+        for (jl_gc_callback_list_t *cb = list; \
+                cb != NULL; \
+                cb = cb->next) \
+        { \
+            ((ty)(cb->func)) args; \
+        } \
+    } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+// =========================================================================== //
+// malloc wrappers, aligned allocation
+// =========================================================================== //
+
+#if defined(_OS_WINDOWS_)
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+    return _aligned_malloc(sz ? sz : 1, align);
+}
+STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+    (void)oldsz;
+    return _aligned_realloc(p, sz ? sz : 1, align);
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    _aligned_free(p);
+}
+#else
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return malloc(sz);
+#endif
+    void *ptr;
+    if (posix_memalign(&ptr, align, sz))
+        return NULL;
+    return ptr;
+}
+STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return realloc(d, sz);
+#endif
+    void *b = jl_malloc_aligned(sz, align);
+    if (b != NULL) {
+        memcpy(b, d, oldsz > sz ? sz : oldsz);
+        free(d);
+    }
+    return b;
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    free(p);
+}
+#endif
+#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
+#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
+
+// =========================================================================== //
+// Pointer tagging
+// =========================================================================== //
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_OLD) != 0;
+}
+
+STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+{
+    return (tag & ~(uintptr_t)3) | bits;
+}
+
+STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return ((uintptr_t)v) & mask;
+}
+
+STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return (void*)(((uintptr_t)v) & ~mask);
+}
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+extern jl_gc_num_t gc_num;
+
+// =========================================================================== //
+// Stop-the-world for GC
+// =========================================================================== //
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+// Protect all access to `finalizer_list_marked` and `to_finalize`.
+// For accessing `ptls->finalizers`, the lock is needed if a thread
+// is going to realloc the buffer (of its own list) or accessing the
+// list of another thread
+extern jl_mutex_t finalizers_lock;
+// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
+// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
+// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
+//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
+// `to_finalize` should not have tagged pointers.
+extern arraylist_t finalizer_list_marked;
+extern arraylist_t to_finalize;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT;
+void run_finalizer(jl_task_t *ct, void *o, void *ff);
+void run_finalizers(jl_task_t *ct, int finalizers_thread);
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o);
+
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+extern int gc_n_threads;
+extern jl_ptls_t* gc_all_tls_states;
+
+#endif // JL_GC_COMMON_H
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 124b7da74dee1..19dd93af5f236 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
 #include "julia.h"
 #include <inttypes.h>
 #include <stddef.h>
@@ -99,7 +100,7 @@ static arraylist_t bits_save[4];
 static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
 {
     jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
-    jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
+    jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
     jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
     while ((char*)pv <= lim) {
@@ -114,7 +115,7 @@ static void gc_clear_mark_outer(int bits)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             gc_clear_mark_page(pg, bits);
             pg = pg->next;
@@ -134,7 +135,7 @@ static void clear_mark(int bits)
     }
     bigval_t *v;
     for (int i = 0; i < gc_n_threads; i++) {
-        v = gc_all_tls_states[i]->heap.big_objects;
+        v = gc_all_tls_states[i]->gc_tls.heap.young_generation_of_bigvals;
         while (v != NULL) {
             void *gcv = &v->header;
             if (!gc_verifying)
@@ -144,7 +145,7 @@ static void clear_mark(int bits)
         }
     }
 
-    v = big_objects_marked;
+    v = oldest_generation_of_bigvals;
     while (v != NULL) {
         void *gcv = &v->header;
         if (!gc_verifying)
@@ -172,7 +173,7 @@ static void gc_verify_track(jl_ptls_t ptls)
         return;
     do {
         jl_gc_markqueue_t mq;
-        jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+        jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
         ws_queue_t *cq = &mq.chunk_queue;
         ws_queue_t *q = &mq.ptr_queue;
         jl_atomic_store_relaxed(&cq->top, 0);
@@ -232,7 +233,7 @@ void gc_verify(jl_ptls_t ptls)
         return;
     }
     jl_gc_markqueue_t mq;
-    jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq.chunk_queue;
     ws_queue_t *q = &mq.ptr_queue;
     jl_atomic_store_relaxed(&cq->top, 0);
@@ -291,7 +292,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
     jl_ptls_t ptls2 = gc_all_tls_states[t_n];
-    jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
+    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
     int osize = pg->osize;
     char *data = pg->data;
     char *page_begin = data + GC_PAGE_OFFSET;
@@ -353,7 +354,7 @@ static void gc_verify_tags_pagestack(void)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_page_stack_t *pgstk = &ptls2->page_metadata_allocd;
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
         jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
         while (pg != NULL) {
             gc_verify_tags_page(pg);
@@ -369,7 +370,7 @@ void gc_verify_tags(void)
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             // for all pools, iterate its freelist
-            jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *next = p->freelist;
             jl_taggedvalue_t *last = NULL;
             char *allocating = gc_page_data(next);
@@ -536,13 +537,13 @@ static void gc_scrub_task(jl_task_t *ta)
 
     char *low;
     char *high;
-    if (ta->copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
+    if (ta->ctx.copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         low  = (char*)ptls2->stackbase - ptls2->stacksize;
         high = (char*)ptls2->stackbase;
     }
-    else if (ta->stkbuf) {
-        low  = (char*)ta->stkbuf;
-        high = (char*)ta->stkbuf + ta->bufsz;
+    else if (ta->ctx.stkbuf) {
+        low  = (char*)ta->ctx.stkbuf;
+        high = (char*)ta->ctx.stkbuf + ta->ctx.bufsz;
     }
     else
         return;
@@ -576,90 +577,6 @@ void jl_gc_debug_print_status(void)
 }
 #endif
 
-#ifdef OBJPROFILE
-static htable_t obj_counts[3];
-static htable_t obj_sizes[3];
-void objprofile_count(void *ty, int old, int sz)
-{
-    if (gc_verifying) return;
-    if ((intptr_t)ty <= 0x10) {
-        ty = (void*)jl_buff_tag;
-    }
-    else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
-             jl_is_datatype(ty) && jl_is_datatype_singleton((jl_datatype_t*)ty)) {
-        ty = jl_singleton_tag;
-    }
-    void **bp = ptrhash_bp(&obj_counts[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)2;
-    else
-        (*((intptr_t*)bp))++;
-    bp = ptrhash_bp(&obj_sizes[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)(intptr_t)(1 + sz);
-    else
-        *((intptr_t*)bp) += sz;
-}
-
-void objprofile_reset(void)
-{
-    for (int g = 0; g < 3; g++) {
-        htable_reset(&obj_counts[g], 0);
-        htable_reset(&obj_sizes[g], 0);
-    }
-}
-
-static void objprofile_print(htable_t nums, htable_t sizes)
-{
-    for(int i=0; i < nums.size; i+=2) {
-        if (nums.table[i+1] != HT_NOTFOUND) {
-            void *ty = nums.table[i];
-            int num = (intptr_t)nums.table[i + 1] - 1;
-            size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1;
-            static const int ptr_hex_width = 2 * sizeof(void*);
-            if (sz > 2e9) {
-                jl_safe_printf(" %6d : %*.1f GB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e6) {
-                jl_safe_printf(" %6d : %*.1f MB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e3) {
-                jl_safe_printf(" %6d : %*.1f kB of (%*p) ",
-                               num, 6, ((double)sz) / 1024,
-                               ptr_hex_width, ty);
-            }
-            else {
-                jl_safe_printf(" %6d : %*d  B of (%*p) ",
-                          num, 6, (int)sz, ptr_hex_width, ty);
-            }
-            if (ty == (void*)jl_buff_tag)
-                jl_safe_printf("#<buffer>");
-            else if (ty == jl_malloc_tag)
-                jl_safe_printf("#<malloc>");
-            else if (ty == jl_singleton_tag)
-                jl_safe_printf("#<singletons>");
-            else
-                jl_static_show(JL_STDERR, (jl_value_t*)ty);
-            jl_safe_printf("\n");
-        }
-    }
-}
-
-void objprofile_printall(void)
-{
-    jl_safe_printf("Transient mark :\n");
-    objprofile_print(obj_counts[0], obj_sizes[0]);
-    jl_safe_printf("Perm mark :\n");
-    objprofile_print(obj_counts[1], obj_sizes[1]);
-    jl_safe_printf("Remset :\n");
-    objprofile_print(obj_counts[2], obj_sizes[2]);
-}
-#endif
-
 #if defined(GC_TIME) || defined(GC_FINAL_STATS)
 STATIC_INLINE double jl_ns2ms(int64_t t)
 {
@@ -895,8 +812,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
     int64_t remset_nptr = 0;
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        last_remset_len += ptls2->heap.last_remset->len;
-        remset_nptr = ptls2->heap.remset_nptr;
+        last_remset_len += ptls2->gc_tls.heap.last_remset->len;
+        remset_nptr = ptls2->gc_tls.heap.remset_nptr;
     }
     jl_safe_printf("GC mark pause %.2f ms | "
                    "scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
@@ -993,13 +910,6 @@ void jl_gc_debug_init(void)
     arraylist_new(&lostval_parents_done, 0);
 #endif
 
-#ifdef OBJPROFILE
-    for (int g = 0; g < 3; g++) {
-        htable_new(&obj_counts[g], 0);
-        htable_new(&obj_sizes[g], 0);
-    }
-#endif
-
 #ifdef GC_FINAL_STATS
     process_t0 = jl_hrtime();
 #endif
@@ -1008,113 +918,138 @@ void jl_gc_debug_init(void)
 // GC summary stats
 
 #ifdef MEMPROFILE
-// TODO repair this and possibly merge with `gc_count_pool`
-static size_t pool_stats(jl_gc_pool_t *p, size_t *pwaste, size_t *np,
-                         size_t *pnold)
+
+typedef struct _gc_memprofile_stat_t {
+    size_t nfree; // for pool only
+    size_t npgs;  // for pool only
+    size_t nused;
+    size_t nbytes_used;
+    size_t nused_old;
+    size_t nbytes_used_old;
+} gc_memprofile_stat_t;
+
+void gc_stats_all_pool(void)
 {
-    jl_taggedvalue_t *halfpages = p->newpages;
-    size_t osize = p->osize;
-    size_t nused=0, nfree=0, npgs=0, nold=0;
-
-    if (halfpages != NULL) {
-        npgs++;
-        char *v = gc_page_data(halfpages) + GC_PAGE_OFFSET;
-        char *lim = (char*)halfpages - 1;
-        int i = 0;
-        while (v <= lim) {
-            if (!gc_marked(((jl_taggedvalue_t*)v)->bits.gc)) {
-                nfree++;
+    gc_memprofile_stat_t stat[JL_GC_N_POOLS];
+    memset(stat, 0, sizeof(stat));
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
+        while (pg != NULL) {
+            assert(gc_alloc_map_is_set(pg->data));
+            int pool_n = pg->pool_n;
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[pool_n];
+            char *data = pg->data;
+            // compute the start of the data area in this page
+            jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
+            // compute the limit of valid data in this page
+            char *lim = data + GC_PAGE_SZ - pg->osize;
+            char *lim_newpages = data + GC_PAGE_SZ;
+            if (gc_page_data((char*)p->newpages - 1) == data) {
+                lim_newpages = (char*)p->newpages;
             }
-            else {
-                nused++;
-                if (((jl_taggedvalue_t*)v)->bits.gc == GC_OLD_MARKED) {
-                    nold++;
+            char *v = (char*)v0;
+            gc_memprofile_stat_t *stat_n = &stat[pool_n];
+            while (v <= lim) {
+                uint8_t bits = ((jl_taggedvalue_t*)v)->bits.gc;
+                if (!gc_marked(bits) || (char*)v >= lim_newpages) {
+                    stat_n->nfree++;
                 }
+                else {
+                    if (gc_old(bits)) {
+                        assert(bits == GC_OLD_MARKED);
+                        stat_n->nused_old++;
+                        stat_n->nbytes_used_old += pg->osize;
+                    }
+                    else {
+                        stat_n->nused++;
+                        stat_n->nbytes_used += pg->osize;
+                    }
+                }
+                v = v + pg->osize;
             }
-            v = v + osize;
-            i++;
+            stat_n->npgs++;
+            pg = pg->next;
         }
-        // only the first page is allocated on
     }
-    *pwaste = npgs * GC_PAGE_SZ - (nused * p->osize);
-    *np = npgs;
-    *pnold = nold;
-    if (npgs != 0) {
-        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
-                       p->osize,
-                       (long long)nused,
-                       (long long)(nused + nfree),
-                       (long long)(nused ? (nold * 100) / nused : 0),
-                       (long long)npgs,
-                       (long long)((nused * p->osize) / 1024),
-                       (long long)(*pwaste / 1024));
-    }
-    return nused*p->osize;
-}
-
-void gc_stats_all_pool(void)
-{
-    size_t nb=0, w, tw=0, no=0, tp=0, nold=0, noldbytes=0, np, nol;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
-            nb += b;
-            no += (b / ptls2->heap.norm_pools[i].osize);
-            tw += w;
-            tp += np;
-            nold += nol;
-            noldbytes += nol * ptls2->heap.norm_pools[i].osize;
-        }
+        jl_ptls_t ptls = jl_current_task->ptls;
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[i];
+        gc_memprofile_stat_t *s = &stat[i];
+        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
+            p->osize,
+            (long long)(s->nused + s->nused_old),
+            (long long)(s->nused + s->nused_old + s->nfree),
+            (long long)((s->nused + s->nused_old) ? (s->nused_old * 100) / (s->nused + s->nused_old) : 0),
+            (long long)s->npgs,
+            (long long)(((s->nused + s->nused_old) * p->osize) / 1024),
+            (long long)((GC_PAGE_SZ * s->npgs - s->nused * p->osize) / 1024));
     }
-    jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
-                   "%lld total fragments (%lld%% overhead), in %lld pages\n",
-                   (long long)no,
-                   (long long)(no ? (nold * 100) / no : 0),
-                   (long long)(nb / 1024),
-                   (long long)(nb ? (noldbytes * 100) / nb : 0),
-                   (long long)tw,
-                   (long long)(nb ? (tw * 100) / nb : 0),
-                   (long long)tp);
 }
 
 void gc_stats_big_obj(void)
 {
-    size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
+    gc_memprofile_stat_t stat;
+    memset(&stat, 0, sizeof(stat));
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        bigval_t *v = ptls2->heap.big_objects;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        bigval_t *v = ptls2->gc_tls.heap.young_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
             if (gc_marked(v->bits.gc)) {
-                nused++;
-                nbytes += v->sz & ~3;
+                if (gc_old(v->bits.gc)) {
+                    assert(v->bits.gc == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += v->sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += v->sz;
+                }
             }
             v = v->next;
         }
-        v = big_objects_marked;
+        v = oldest_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
-            if (gc_marked(v->bits.gc)) {
-                nused_old++;
-                nbytes_old += v->sz & ~3;
-            }
+            assert(v->bits.gc == GC_OLD_MARKED);
+            stat.nused_old++;
+            stat.nbytes_used_old += v->sz;
             v = v->next;
         }
 
-        mallocarray_t *ma = ptls2->heap.mallocarrays;
+        mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
         while (ma != NULL) {
-            if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
-                nused++;
-                nbytes += jl_genericmemory_nbytes((jl_genericmemory_t*)ma->a);
+            uint8_t bits =jl_astaggedvalue(ma->a)->bits.gc;
+            if (gc_marked(bits)) {
+                jl_genericmemory_t *m = (jl_genericmemory_t*)ma->a;
+                m = (jl_genericmemory_t*)((uintptr_t)m & ~(uintptr_t)1);
+                size_t sz = jl_genericmemory_nbytes(m);
+                if (gc_old(bits)) {
+                    assert(bits == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += sz;
+                }
             }
             ma = ma->next;
         }
     }
-
     jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",
-                   (long long)((nbytes + nbytes_old) / 1024),
-                   (long long)(nbytes + nbytes_old ? (nbytes_old * 100) / (nbytes + nbytes_old) : 0),
-                   (long long)(nused + nused_old),
-                   (long long)(nused + nused_old ? (nused_old * 100) / (nused + nused_old) : 0));
+                   (long long)((stat.nbytes_used + stat.nbytes_used_old) / 1024),
+                   (long long)(stat.nbytes_used + stat.nbytes_used_old ? (stat.nbytes_used_old * 100) / (stat.nbytes_used + stat.nbytes_used_old) : 0),
+                   (long long)(stat.nused + stat.nused_old),
+                   (long long)(stat.nused + stat.nused_old ? (stat.nused_old * 100) / (stat.nused + stat.nused_old) : 0));
 }
 #endif //MEMPROFILE
 
@@ -1146,7 +1081,7 @@ static void gc_count_pool_pagetable(void)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             if (gc_alloc_map_is_set(pg->data)) {
                 gc_count_pool_page(pg);
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
index 5007f88939701..fcda11dad4f8a 100644
--- a/src/gc-heap-snapshot.cpp
+++ b/src/gc-heap-snapshot.cpp
@@ -2,9 +2,9 @@
 
 #include "gc-heap-snapshot.h"
 
+#include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
-#include "gc.h"
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -182,8 +182,10 @@ struct HeapSnapshot {
 // global heap snapshot, mutated by garbage collector
 // when snapshotting is on.
 int gc_heap_snapshot_enabled = 0;
+int gc_heap_snapshot_redact_data = 0;
 HeapSnapshot *g_snapshot = nullptr;
-extern jl_mutex_t heapsnapshot_lock;
+// mutex for gc-heap-snapshot.
+jl_mutex_t heapsnapshot_lock;
 
 void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one);
 void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one);
@@ -194,7 +196,7 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
 
 
 JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
-    ios_t *strings, ios_t *json, char all_one)
+    ios_t *strings, ios_t *json, char all_one, char redact_data)
 {
     HeapSnapshot snapshot;
     snapshot.nodes = nodes;
@@ -206,6 +208,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
 
     // Enable snapshotting
     g_snapshot = &snapshot;
+    gc_heap_snapshot_redact_data = redact_data;
     gc_heap_snapshot_enabled = true;
 
     _add_synthetic_root_entries(&snapshot);
@@ -215,6 +218,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
 
     // Disable snapshotting
     gc_heap_snapshot_enabled = false;
+    gc_heap_snapshot_redact_data = 0;
     g_snapshot = nullptr;
 
     jl_mutex_unlock(&heapsnapshot_lock);
@@ -327,7 +331,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 
     if (jl_is_string(a)) {
         node_type = "String";
-        name = jl_string_data(a);
+        name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
         self_size = jl_string_len(a);
     }
     else if (jl_is_symbol(a)) {
@@ -557,6 +561,13 @@ void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *
                     g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<internal>"));
 }
 
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("binding", from, to,
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<binding>"));
+}
+
+
 void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
     // valid alloc_type values are 0, 1, 2
@@ -606,7 +617,9 @@ void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx,
 void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one)
 {
     // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
+    // also https://github.com/microsoft/vscode-v8-heap-tools/blob/c5b34396392397925ecbb4ecb904a27a2754f2c1/v8-heap-parser/src/decoder.rs#L43-L51
     ios_printf(json, "{\"snapshot\":{");
+
     ios_printf(json, "\"meta\":{");
     ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
     ios_printf(json, "\"node_types\":[");
@@ -617,10 +630,26 @@ void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &sn
     ios_printf(json, "\"edge_types\":[");
     snapshot.edge_types.print_json_array(json, false);
     ios_printf(json, ",");
-    ios_printf(json, "\"string_or_number\",\"from_node\"]");
+    ios_printf(json, "\"string_or_number\",\"from_node\"],");
+    // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "\"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],");
+    ios_printf(json, "\"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],");
+    ios_printf(json, "\"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],");
+    ios_printf(json, "\"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]");
+    // end not used
     ios_printf(json, "},\n"); // end "meta"
+
     ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes);
-    ios_printf(json, "\"edge_count\":%zu", snapshot.num_edges);
-    ios_printf(json, "}\n"); // end "snapshot"
+    ios_printf(json, "\"edge_count\":%zu,", snapshot.num_edges);
+    ios_printf(json, "\"trace_function_count\":0"); // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "},\n"); // end "snapshot"
+
+    // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "\"trace_function_infos\":[],");
+    ios_printf(json, "\"trace_tree\":[],");
+    ios_printf(json, "\"samples\":[],");
+    ios_printf(json, "\"locations\":[]");
+    // end not used
+
     ios_printf(json, "}");
 }
diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h
index 70884f5f62d6a..dc5b22bb72eb1 100644
--- a/src/gc-heap-snapshot.h
+++ b/src/gc-heap-snapshot.h
@@ -32,9 +32,12 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t byt
 void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
 // Used for objects that are reachable from the finalizer list
 void _gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT;
+// Used for objects reachable from the binding partition pointer union
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
 
 extern int gc_heap_snapshot_enabled;
 extern int prev_sweep_full;
+extern jl_mutex_t heapsnapshot_lock;
 
 int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
 int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
@@ -96,6 +99,13 @@ static inline void gc_heap_snapshot_record_internal_array_edge(jl_value_t *from,
     }
 }
 
+static inline void gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_binding_partition_edge(from, to);
+    }
+}
+
 static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
     if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
@@ -121,7 +131,7 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
 // Functions to call from Julia to take heap snapshot
 // ---------------------------------------------------------------------
 JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
-    ios_t *strings, ios_t *json, char all_one);
+    ios_t *strings, ios_t *json, char all_one, char redact_data);
 
 
 #ifdef __cplusplus
diff --git a/src/gc-interface.h b/src/gc-interface.h
new file mode 100644
index 0000000000000..e543b4b5879f1
--- /dev/null
+++ b/src/gc-interface.h
@@ -0,0 +1,256 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  Garbage Collection interface that must be implemented by third-party GCs
+*/
+
+#ifndef JL_GC_INTERFACE_H
+#define JL_GC_INTERFACE_H
+
+#include "dtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _jl_tls_states_t;
+struct _jl_value_t;
+struct _jl_weakref_t;
+struct _jl_datatype_t;
+
+// ========================================================================= //
+// GC Metrics
+// ========================================================================= //
+
+// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
+typedef struct {
+    int64_t allocd;
+    int64_t deferred_alloc;
+    int64_t freed;
+    uint64_t malloc;
+    uint64_t realloc;
+    uint64_t poolalloc;
+    uint64_t bigalloc;
+    uint64_t freecall;
+    uint64_t total_time;
+    uint64_t total_allocd;
+    size_t interval;
+    int pause;
+    int full_sweep;
+    uint64_t max_pause;
+    uint64_t max_memory;
+    uint64_t time_to_safepoint;
+    uint64_t max_time_to_safepoint;
+    uint64_t total_time_to_safepoint;
+    uint64_t sweep_time;
+    uint64_t mark_time;
+    uint64_t total_sweep_time;
+    uint64_t total_mark_time;
+    uint64_t last_full_sweep;
+    uint64_t last_incremental_sweep;
+} jl_gc_num_t;
+
+// ========================================================================= //
+// System-wide Initialization
+// ========================================================================= //
+
+// System-wide initialization function. Responsible for initializing global locks as well as
+// global memory parameters (e.g. target heap size) used by the collector.
+void jl_gc_init(void);
+// Spawns GC threads.
+void jl_start_gc_threads(void);
+
+// ========================================================================= //
+// Per-thread Initialization
+// ========================================================================= //
+
+// Initializes thread-local data structures such as thread-local object pools,
+// thread-local remembered sets and thread-local allocation counters.
+// Should be called exactly once per Julia thread.
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT;
+// Deallocates any memory previously used for thread-local GC data structures.
+// Mostly used to ensure that we perform this memory cleanup for foreign threads that are
+// about to leave Julia.
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls);
+
+// ========================================================================= //
+// Controls
+// ========================================================================= //
+
+typedef enum {
+    JL_GC_AUTO = 0, // use heuristics to determine the collection type
+    JL_GC_FULL = 1, // force a full collection
+    JL_GC_INCREMENTAL = 2, // force an incremental collection
+} jl_gc_collection_t;
+// Enables or disables (depending on the value of the argument) the collector. Returns
+// whether GC was previously enabled.
+JL_DLLEXPORT int jl_gc_enable(int on);
+// Returns whether the collector is enabled.
+JL_DLLEXPORT int jl_gc_is_enabled(void);
+// Sets a soft limit to Julia's heap.
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
+// Runs a GC cycle. This function's parameter determines whether we're running an
+// incremental, full, or automatic (i.e. heuristic driven) collection. Returns whether we
+// should run a collection cycle again (e.g. a full mark right after a full sweep to ensure
+// we do a full heap traversal).
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection);
+
+// ========================================================================= //
+// Metrics
+// ========================================================================= //
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void);
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then),
+// compared to the value at the last time this function was called.
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then)
+// compared to the value at the last time this function was called. The offset parameter
+// is subtracted from this value in order to obtain the return value.
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
+// Returns the number of pool allocated bytes. This could always return 0 for GC
+// implementations that do not use pools.
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void);
+// Returns the number of live bytes at the end of the last collection cycle
+// (doesn't include the number of allocated bytes since then).
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void);
+// Stores the number of live bytes at the end of the last collection cycle plus the number
+// of bytes we allocated since then into the 64-bit integer pointer passed as an argument.
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT;
+// Retrieves the value of Julia's soft heap limit.
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
+// High-resolution (nano-seconds) value of total time spent in GC.
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void);
+
+// ========================================================================= //
+// Allocation
+// ========================================================================= //
+
+// Allocates small objects and increments Julia allocation counterst. Size of the object
+// header must be included in the object size. The (possibly unused in some implementations)
+// offset to the arena in which we're allocating is passed in the second parameter, and the
+// object size in the third parameter. If thread-local allocators are used, then this
+// function should allocate in the thread-local allocator of the thread referenced by the
+// jl_ptls_t argument. An additional (last) parameter containing information about the type
+// of the object being allocated may be used to record an allocation of that type in the
+// allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_small_alloc(struct _jl_tls_states_t *ptls,
+                                                   int offset, int osize,
+                                                   struct _jl_value_t *type);
+// Description: Allocates large objects and increments Julia allocation counters. Size of
+// the object header must be included in the object size. If thread-local allocators are
+// used, then this function should allocate in the thread-local allocator of the thread
+// referenced by the jl_ptls_t argument. An additional (last) parameter containing
+// information about the type of the object being allocated may be used to record an
+// allocation of that type in the allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_big_alloc(struct _jl_tls_states_t *ptls, size_t sz,
+                                                 struct _jl_value_t *type);
+// Wrapper around Libc malloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
+// Wrapper around Libc calloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz);
+// Wrapper around Libc free that updates Julia allocation counters.
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz);
+// Wrapper around Libc realloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz);
+// Wrapper around Libc malloc that allocates a memory region with a few additional machine
+// words before the actual payload that are used to record the size of the requested
+// allocation. Also updates Julia allocation counters. The function returns a pointer to the
+// payload as a result of the allocation.
+JL_DLLEXPORT void *jl_malloc(size_t sz);
+// Wrapper around Libc calloc that allocates a memory region with a few additional machine
+// words before the actual payload that are used to record the size of the requested
+// allocation. Also updates Julia allocation counters. The function returns a pointer to the
+// payload as a result of the allocation.
+JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz);
+// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated
+// with jl_malloc or jl_calloc, and uses the size information stored in the first machine
+// words of the memory buffer update Julia allocation counters, and then frees the
+// corresponding memory buffer.
+JL_DLLEXPORT void jl_free(void *p);
+// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or
+// jl_calloc, and uses the size information stored in the first machine words of the memory
+// buffer to update Julia allocation counters, reallocating the corresponding memory buffer
+// in the end.
+JL_DLLEXPORT void *jl_realloc(void *p, size_t sz);
+// Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and
+// Strings. It increments Julia allocation counters and should check whether we're close to
+// the Julia heap target, and therefore, whether we should run a collection. Note that this
+// doesn't record the size of the allocation request in a side metadata (i.e. a few words in
+// front of the memory payload): this function is used for Julia object allocations, and we
+// assume that there is already a field in the Julia object being allocated that we may use
+// to store the size of the memory buffer.
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the thread referenced by the first jl_ptls_t argument.
+JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls,
+                                                        struct _jl_value_t *value);
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the current thread.
+JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value);
+// Allocates an object whose size is specified by the function argument and increments Julia
+// allocation counters. If thread-local allocators are used, then this function should
+// allocate in the thread-local allocator of the current thread.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz);
+// Permanently allocates a memory slot of the size specified by the first parameter. This
+// block of memory is allocated in an immortal region that is never swept. The second
+// parameter specifies whether the memory should be filled with zeros. The third and fourth
+// parameters specify the alignment and an offset in bytes, respectively. Specifically, the
+// pointer obtained by advancing the result of this function by the number of bytes
+// specified in the fourth parameter will be aligned according to the value given by the
+// third parameter in bytes.
+JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align,
+                                    unsigned offset) JL_NOTSAFEPOINT;
+// Permanently allocates an object of the size specified by the first parameter. Size of the
+// object header must be included in the object size. This object is allocated in an
+// immortal region that is never swept. The second parameter specifies the type of the
+// object being allocated and will be used to set the object header.
+struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT;
+
+// ========================================================================= //
+// Runtime Write-Barriers
+// ========================================================================= //
+
+// Write barrier slow-path. If a generational collector is used,
+// it may enqueue an old object into the remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+// In a generational collector is used, this function walks over the fields of the
+// object specified by the second parameter (as defined by the data type in the third
+// parameter). If a field points to a young object, the first parameter is enqueued into the
+// remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
+// If a generational collector is used, checks whether the function argument points to an
+// old object, and if so, calls the write barrier slow path above. In most cases, this
+// function is used when its caller has verified that there is a young reference in the
+// object that's being passed as an argument to this function.
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT;
+// Write barrier function that must be used after pointer writes to heap-allocated objects –
+// the value of the field being written must also point to a heap-allocated object.
+// If a generational collector is used, it may check whether the two function arguments are
+// in different GC generations (i.e. if the first argument points to an old object and the
+// second argument points to a young object), and if so, call the write barrier slow-path.
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT;
+// Freshly allocated objects are known to be in the young generation until the next safepoint,
+// so write barriers can be omitted until the next allocation. This function is a no-op that
+// can be used to annotate that a write barrier would be required were it not for this property
+// (as opposed to somebody just having forgotten to think about write barriers).
+STATIC_INLINE void jl_gc_wb_fresh(const void *parent, const void *ptr) JL_NOTSAFEPOINT {}
+// Used to annotate that a write barrier would be required, but may be omitted because `ptr`
+// is known to be an old object.
+STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOTSAFEPOINT {}
+// Write-barrier function that must be used after copying multiple fields of an object into
+// another. It should be semantically equivalent to triggering multiple write barriers – one
+// per field of the object being copied, but may be special-cased for performance reasons.
+STATIC_INLINE void jl_gc_multi_wb(const void *parent,
+                                  const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c
index 5af1c3d014770..2625fa812781a 100644
--- a/src/gc-page-profiler.c
+++ b/src/gc-page-profiler.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "gc-page-profiler.h"
+#include "julia.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -20,6 +21,8 @@ gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT
     gc_page_profiler_serializer_t serializer;
     if (__unlikely(page_profile_enabled)) {
         arraylist_new(&serializer.typestrs, GC_PAGE_SZ);
+        serializer.buffers = (char *)malloc_s(GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY);
+        serializer.cursor = 0;
     }
     else {
         serializer.typestrs.len = 0;
@@ -34,6 +37,8 @@ void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer,
         serializer->typestrs.len = 0;
         serializer->data = (char *)pg->data;
         serializer->osize = pg->osize;
+        serializer->cursor = 0;
+        serializer->capacity = GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY;
     }
 }
 
@@ -41,6 +46,7 @@ void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NO
 {
     if (__unlikely(page_profile_enabled)) {
         arraylist_free(&serializer->typestrs);
+        free(serializer->buffers);
     }
 }
 
@@ -71,8 +77,9 @@ void gc_page_profile_write_preamble(gc_page_profiler_serializer_t *serializer)
     JL_NOTSAFEPOINT
 {
     if (__unlikely(page_profile_enabled)) {
-        char str[GC_TYPE_STR_MAXLEN];
-        snprintf(str, GC_TYPE_STR_MAXLEN,
+        const size_t large_enough_str_size = 4096;
+        char str[large_enough_str_size];
+        snprintf(str, large_enough_str_size,
                  "{\"address\": \"%p\",\"object_size\": %d,\"objects\": [",
                  serializer->data, serializer->osize);
         ios_write(page_profile_stream, str, strlen(str));
@@ -102,22 +109,27 @@ void gc_page_profile_write_comma(gc_page_profiler_serializer_t *serializer) JL_N
 void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer)
     JL_NOTSAFEPOINT
 {
+    size_t large_enough_str_size = 4096;
     if (__unlikely(page_profile_enabled)) {
         // write to file
         uv_mutex_lock(&page_profile_lock);
         gc_page_profile_write_comma(serializer);
         gc_page_profile_write_preamble(serializer);
-        char str[GC_TYPE_STR_MAXLEN];
+        char *str = (char *)malloc_s(large_enough_str_size);
         for (size_t i = 0; i < serializer->typestrs.len; i++) {
             const char *name = (const char *)serializer->typestrs.items[i];
             if (name == GC_SERIALIZER_EMPTY) {
-                snprintf(str, GC_TYPE_STR_MAXLEN, "\"empty\",");
+                snprintf(str, large_enough_str_size, "\"empty\",");
             }
             else if (name == GC_SERIALIZER_GARBAGE) {
-                snprintf(str, GC_TYPE_STR_MAXLEN, "\"garbage\",");
+                snprintf(str, large_enough_str_size, "\"garbage\",");
             }
             else {
-                snprintf(str, GC_TYPE_STR_MAXLEN, "\"%s\",", name);
+                while ((strlen(name) + 1) > large_enough_str_size) {
+                    large_enough_str_size *= 2;
+                    str = (char *)realloc_s(str, large_enough_str_size);
+                }
+                snprintf(str, large_enough_str_size, "\"%s\",", name);
             }
             // remove trailing comma for last element
             if (i == serializer->typestrs.len - 1) {
@@ -125,6 +137,7 @@ void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer)
             }
             ios_write(page_profile_stream, str, strlen(str));
         }
+        free(str);
         gc_page_profile_write_epilogue(serializer);
         page_profile_pages_written++;
         uv_mutex_unlock(&page_profile_lock);
diff --git a/src/gc-page-profiler.h b/src/gc-page-profiler.h
index b103e23905ba5..0dd72ad072fa9 100644
--- a/src/gc-page-profiler.h
+++ b/src/gc-page-profiler.h
@@ -3,28 +3,35 @@
 #ifndef GC_PAGE_PROFILER_H
 #define GC_PAGE_PROFILER_H
 
-#include "gc.h"
+#include "gc-stock.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#define GC_TYPE_STR_MAXLEN (512)
+#define GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY (4096)
 
 typedef struct {
     arraylist_t typestrs;
     char *data;
     int osize;
+    char *buffers;
+    size_t cursor;
+    size_t capacity;
 } gc_page_profiler_serializer_t;
 
 // mutex for page profile
 extern uv_mutex_t page_profile_lock;
+// whether page profiling is enabled
+extern int page_profile_enabled;
 
 // Serializer functions
 gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT;
-void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT;
+void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer,
+                             jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT;
 void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT;
-void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer, const char *str) JL_NOTSAFEPOINT;
+void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer,
+                              const char *str) JL_NOTSAFEPOINT;
 // Page profile functions
 #define GC_SERIALIZER_EMPTY ((const char *)0x1)
 #define GC_SERIALIZER_GARBAGE ((const char *)0x2)
@@ -42,13 +49,89 @@ STATIC_INLINE void gc_page_profile_write_garbage(gc_page_profiler_serializer_t *
         gc_page_serializer_write(serializer, GC_SERIALIZER_GARBAGE);
     }
 }
+STATIC_INLINE char *gc_page_profile_request_buffer(gc_page_profiler_serializer_t *serializer, size_t size) JL_NOTSAFEPOINT
+{
+    while (serializer->cursor + size >= serializer->capacity) {
+        serializer->capacity *= 2;
+        serializer->buffers = (char *)realloc_s(serializer->buffers, serializer->capacity);
+    }
+    char *p = &serializer->buffers[serializer->cursor];
+    memset(p, 0, size);
+    serializer->cursor += size;
+    return p;
+}
 STATIC_INLINE void gc_page_profile_write_live_obj(gc_page_profiler_serializer_t *serializer,
                                                   jl_taggedvalue_t *v,
                                                   int enabled) JL_NOTSAFEPOINT
 {
     if (__unlikely(enabled)) {
-        const char *name = jl_typeof_str(jl_valueof(v));
-        gc_page_serializer_write(serializer, name);
+        jl_value_t *a = jl_valueof(v);
+        jl_value_t *t = jl_typeof(a);
+        ios_t str_;
+        int ios_need_close = 0;
+        char *type_name = NULL;
+        char *type_name_in_serializer = NULL;
+        if (t == (jl_value_t *)jl_get_buff_tag()) {
+            type_name = "Buffer";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_string(a)) {
+            type_name = "String";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_symbol(a)) {
+            type_name = jl_symbol_name((jl_sym_t *)a);
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_simplevector(a)) {
+            type_name = "SimpleVector";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_module(a)) {
+            type_name = jl_symbol_name_(((jl_module_t *)a)->name);
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_task(a)) {
+            type_name = "Task";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_datatype(a)) {
+            ios_need_close = 1;
+            ios_mem(&str_, 0);
+            JL_STREAM *str = (JL_STREAM *)&str_;
+            jl_static_show(str, a);
+            type_name = str_.buf;
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, str_.size + 1);
+            memcpy(type_name_in_serializer, type_name, str_.size);
+        }
+        else {
+            ios_need_close = 1;
+            ios_mem(&str_, 0);
+            JL_STREAM *str = (JL_STREAM *)&str_;
+            jl_static_show(str, t);
+            type_name = str_.buf;
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, str_.size + 1);
+            memcpy(type_name_in_serializer, type_name, str_.size);
+        }
+        gc_page_serializer_write(serializer, type_name_in_serializer);
+        if (ios_need_close) {
+            ios_close(&str_);
+        }
+        jl_may_leak(type_name_in_serializer);
     }
 }
 void gc_enable_page_profile(void) JL_NOTSAFEPOINT;
diff --git a/src/gc-pages.c b/src/gc-pages.c
index 696f0831762be..71d59de29166f 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -9,6 +10,13 @@
 extern "C" {
 #endif
 
+uv_mutex_t gc_pages_lock;
+
+JL_DLLEXPORT uint64_t jl_get_pg_size(void)
+{
+    return GC_PAGE_SZ;
+}
+
 // Try to allocate memory in chunks to permit faster allocation
 // and improve memory locality of the pools
 #ifdef _P64
@@ -62,7 +70,7 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
 // more chunks (or other allocations). The final page count is recorded
 // and will be used as the starting count next time. If the page count is
 // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown.
-// Assumes `gc_perm_lock` is acquired, the lock is released before the
+// Assumes `gc_pages_lock` is acquired, the lock is released before the
 // exception is thrown.
 char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
 {
@@ -82,7 +90,7 @@ char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
             block_pg_cnt = pg_cnt = min_block_pg_alloc;
         }
         else {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&gc_pages_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -122,11 +130,11 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
         goto exit;
     }
 
-    uv_mutex_lock(&gc_perm_lock);
+    uv_mutex_lock(&gc_pages_lock);
     // another thread may have allocated a large block while we were waiting...
     meta = pop_lf_back(&global_page_pool_clean);
     if (meta != NULL) {
-        uv_mutex_unlock(&gc_perm_lock);
+        uv_mutex_unlock(&gc_pages_lock);
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
@@ -144,7 +152,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
             push_lf_back(&global_page_pool_clean, pg);
         }
     }
-    uv_mutex_unlock(&gc_perm_lock);
+    uv_mutex_unlock(&gc_pages_lock);
 exit:
 #ifdef _OS_WINDOWS_
     VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 2d0fc011802c9..783129ea97693 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "threading.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -32,17 +33,20 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
     void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
     if (stk == NULL)
         return MAP_FAILED;
+
+    // set up a guard page to detect stack overflow
     DWORD dwOldProtect;
     if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
         VirtualFree(stk, 0, MEM_RELEASE);
         return MAP_FAILED;
     }
+
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
 
-static void free_stack(void *stkbuf, size_t bufsz)
+static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
     VirtualFree(stkbuf, 0, MEM_RELEASE);
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
@@ -52,28 +56,39 @@ static void free_stack(void *stkbuf, size_t bufsz)
 
 static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
 {
+# ifdef _OS_OPENBSD_
+    // we don't set up a guard page to detect stack overflow: on OpenBSD, any
+    // mmap-ed region has guard page managed by the kernel, so there is no
+    // need for it. Additionally, a memory region used as stack (memory
+    // allocated with MAP_STACK option) has strict permission, and you can't
+    // "create" a guard page on such memory by using `mprotect` on it
+    void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+    if (stk == MAP_FAILED)
+        return MAP_FAILED;
+# else
     void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (stk == MAP_FAILED)
         return MAP_FAILED;
-#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
-    // setup a guard page to detect stack overflow
+
+    // set up a guard page to detect stack overflow
     if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
         munmap(stk, bufsz);
         return MAP_FAILED;
     }
-#endif
+# endif
+
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
-static void free_stack(void *stkbuf, size_t bufsz)
+static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
     munmap(stkbuf, bufsz);
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
 #endif
 
-JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void)
+JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void) JL_NOTSAFEPOINT
 {
     return jl_atomic_load_relaxed(&num_stack_mappings);
 }
@@ -108,7 +123,7 @@ static unsigned select_pool(size_t nb) JL_NOTSAFEPOINT
 }
 
 
-static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
 #ifdef _COMPILER_ASAN_ENABLED_
     __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
@@ -116,7 +131,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
             return;
         }
     }
@@ -134,18 +149,18 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
 {
     // avoid adding an original thread stack to the free list
-    if (task == ptls->root_task && !task->copy_stack)
+    if (task == ptls->root_task && !task->ctx.copy_stack)
         return;
-    void *stkbuf = task->stkbuf;
-    size_t bufsz = task->bufsz;
+    void *stkbuf = task->ctx.stkbuf;
+    size_t bufsz = task->ctx.bufsz;
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            task->stkbuf = NULL;
+            task->ctx.stkbuf = NULL;
 #ifdef _COMPILER_ASAN_ENABLED_
             __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
 #endif
-            small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
         }
     }
 }
@@ -160,7 +175,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(ssize);
         ssize = pool_sizes[pool_id];
-        small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
+        small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id];
         if (pool->len > 0) {
             stk = small_arraylist_pop(pool);
         }
@@ -181,13 +196,13 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     }
     *bufsz = ssize;
     if (owner) {
-        small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks;
         mtarraylist_push(live_tasks, owner);
     }
     return stk;
 }
 
-void sweep_stack_pools(void)
+void sweep_stack_pools(void) JL_NOTSAFEPOINT
 {
     // Stack sweeping algorithm:
     //    // deallocate stacks if we have too many sitting around unused
@@ -208,7 +223,7 @@ void sweep_stack_pools(void)
 
         // free half of stacks that remain unused since last sweep
         for (int p = 0; p < JL_N_STACK_POOLS; p++) {
-            small_arraylist_t *al = &ptls2->heap.free_stacks[p];
+            small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p];
             size_t n_to_free;
             if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
                 n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
@@ -230,10 +245,10 @@ void sweep_stack_pools(void)
             }
         }
         if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
-            small_arraylist_free(ptls2->heap.free_stacks);
+            small_arraylist_free(ptls2->gc_tls.heap.free_stacks);
         }
 
-        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
         size_t n = 0;
         size_t ndel = 0;
         size_t l = live_tasks->len;
@@ -244,17 +259,17 @@ void sweep_stack_pools(void)
             jl_task_t *t = (jl_task_t*)lst[n];
             assert(jl_is_task(t));
             if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
-                if (t->stkbuf == NULL)
+                if (t->ctx.stkbuf == NULL)
                     ndel++; // jl_release_task_stack called
                 else
                     n++;
             }
             else {
                 ndel++;
-                void *stkbuf = t->stkbuf;
-                size_t bufsz = t->bufsz;
+                void *stkbuf = t->ctx.stkbuf;
+                size_t bufsz = t->ctx.bufsz;
                 if (stkbuf) {
-                    t->stkbuf = NULL;
+                    t->ctx.stkbuf = NULL;
                     _jl_free_stack(ptls2, stkbuf, bufsz);
                 }
 #ifdef _COMPILER_TSAN_ENABLED_
@@ -281,16 +296,12 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
     size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
 restart:
     for (size_t i = 0; i < nthreads; i++) {
-        // skip GC threads since they don't have tasks
-        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
-            continue;
-        }
         jl_ptls_t ptls2 = allstates[i];
         if (ptls2 == NULL)
             continue;
-        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
         size_t n = mtarraylist_length(live_tasks);
-        l += n + (ptls2->root_task->stkbuf != NULL);
+        l += n + (ptls2->root_task->ctx.stkbuf != NULL);
     }
     l += l / 20; // add 5% for margin of estimation error
     jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
@@ -298,24 +309,20 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
     allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     size_t j = 0;
     for (size_t i = 0; i < nthreads; i++) {
-        // skip GC threads since they don't have tasks
-        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
-            continue;
-        }
         jl_ptls_t ptls2 = allstates[i];
         if (ptls2 == NULL)
             continue;
         jl_task_t *t = ptls2->root_task;
-        if (t->stkbuf != NULL) {
+        if (t->ctx.stkbuf != NULL) {
             if (j == l)
                 goto restart;
             jl_array_data(a,void*)[j++] = t;
         }
-        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
         size_t n = mtarraylist_length(live_tasks);
         for (size_t i = 0; i < n; i++) {
             jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
-            if (t->stkbuf != NULL) {
+            if (t->ctx.stkbuf != NULL) {
                 if (j == l)
                     goto restart;
                 jl_array_data(a,void*)[j++] = t;
diff --git a/src/gc.c b/src/gc-stock.c
similarity index 74%
rename from src/gc.c
rename to src/gc-stock.c
index baa9c26a076a6..d25f8917f302d 100644
--- a/src/gc.c
+++ b/src/gc-stock.c
@@ -1,6 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
+#include "gc-alloc-profiler.h"
+#include "gc-heap-snapshot.h"
 #include "gc-page-profiler.h"
 #include "julia.h"
 #include "julia_atomics.h"
@@ -22,7 +25,7 @@ int jl_n_sweepthreads;
 _Atomic(int) gc_n_threads_marking;
 // Number of threads sweeping
 _Atomic(int) gc_n_threads_sweeping;
-// Temporary for the `ptls->page_metadata_allocd` used during parallel sweeping (padded to avoid false sharing)
+// Temporary for the `ptls->gc_tls.page_metadata_allocd` used during parallel sweeping (padded to avoid false sharing)
 _Atomic(jl_gc_padded_page_stack_t *) gc_allocd_scratch;
 // `tid` of mutator thread that triggered GC
 _Atomic(int) gc_master_tid;
@@ -35,126 +38,8 @@ uv_cond_t gc_threads_cond;
 uv_sem_t gc_sweep_assists_needed;
 // Mutex used to coordinate entry of GC threads in the mark loop
 uv_mutex_t gc_queue_observer_lock;
-
-// Linked list of callback functions
-
-typedef void (*jl_gc_cb_func_t)(void);
-
-typedef struct jl_gc_callback_list_t {
-    struct jl_gc_callback_list_t *next;
-    jl_gc_cb_func_t func;
-} jl_gc_callback_list_t;
-
-static jl_gc_callback_list_t *gc_cblist_root_scanner;
-static jl_gc_callback_list_t *gc_cblist_task_scanner;
-static jl_gc_callback_list_t *gc_cblist_pre_gc;
-static jl_gc_callback_list_t *gc_cblist_post_gc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_free;
-static jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
-
-#define gc_invoke_callbacks(ty, list, args) \
-    do { \
-        for (jl_gc_callback_list_t *cb = list; \
-                cb != NULL; \
-                cb = cb->next) \
-        { \
-            ((ty)(cb->func)) args; \
-        } \
-    } while (0)
-
-static void jl_gc_register_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func)
-            return;
-        list = &((*list)->next);
-    }
-    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
-    (*list)->next = NULL;
-    (*list)->func = func;
-}
-
-static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func) {
-            jl_gc_callback_list_t *tmp = *list;
-            (*list) = (*list)->next;
-            free(tmp);
-            return;
-        }
-        list = &((*list)->next);
-    }
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
-}
-
-// Protect all access to `finalizer_list_marked` and `to_finalize`.
-// For accessing `ptls->finalizers`, the lock is needed if a thread
-// is going to realloc the buffer (of its own list) or accessing the
-// list of another thread
-static jl_mutex_t finalizers_lock;
-static uv_mutex_t gc_cache_lock;
-
-// mutex for gc-heap-snapshot.
-jl_mutex_t heapsnapshot_lock;
+// Tag for sentinel nodes in bigval list
+uintptr_t gc_bigval_sentinel_tag;
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
@@ -192,435 +77,13 @@ static _Atomic(int) support_conservative_marking = 0;
  * finalizers in unmanaged (GC safe) mode.
  */
 
-jl_gc_num_t gc_num = {0};
-static size_t last_long_collect_interval;
-int gc_n_threads;
-jl_ptls_t* gc_all_tls_states;
 gc_heapstatus_t gc_heap_stats = {0};
-int next_sweep_full = 0;
-const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void)
-{
-    return jl_buff_tag;
-}
-
-// List of marked big objects.  Not per-thread.  Accessed only by master thread.
-bigval_t *big_objects_marked = NULL;
-
-// -- Finalization --
-// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
-// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
-// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
-//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
-// `to_finalize` should not have tagged pointers.
-arraylist_t finalizer_list_marked;
-arraylist_t to_finalize;
-JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
-
-
-NOINLINE uintptr_t gc_get_stack_ptr(void)
-{
-    return (uintptr_t)jl_get_frame_addr();
-}
-
-void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
-
-// malloc wrappers, aligned allocation
-
-#if defined(_OS_WINDOWS_)
-STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
-{
-    return _aligned_malloc(sz ? sz : 1, align);
-}
-STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-    (void)oldsz;
-    return _aligned_realloc(p, sz ? sz : 1, align);
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    _aligned_free(p);
-}
-#else
-STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return malloc(sz);
-#endif
-    void *ptr;
-    if (posix_memalign(&ptr, align, sz))
-        return NULL;
-    return ptr;
-}
-STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return realloc(d, sz);
-#endif
-    void *b = jl_malloc_aligned(sz, align);
-    if (b != NULL) {
-        memcpy(b, d, oldsz > sz ? sz : oldsz);
-        free(d);
-    }
-    return b;
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    free(p);
-}
-#endif
-#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
-#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
-
-static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
-{
-    arraylist_push(&to_finalize, o);
-    arraylist_push(&to_finalize, f);
-    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
-    // release our lock, and that will have a release barrier by then
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
-}
-
-static void run_finalizer(jl_task_t *ct, void *o, void *ff)
-{
-    int ptr_finalizer = gc_ptr_tag(o, 1);
-    o = gc_ptr_clear_tag(o, 3);
-    if (ptr_finalizer) {
-        ((void (*)(void*))ff)((void*)o);
-        return;
-    }
-    JL_TRY {
-        size_t last_age = ct->world_age;
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
-        ct->world_age = last_age;
-    }
-    JL_CATCH {
-        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
-        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
-    }
-}
-
-// if `need_sync` is true, the `list` is the `finalizers` list of another
-// thread and we need additional synchronizations
-static void finalize_object(arraylist_t *list, jl_value_t *o,
-                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
-{
-    // The acquire load makes sure that the first `len` objects are valid.
-    // If `need_sync` is true, all mutations of the content should be limited
-    // to the first `oldlen` elements and no mutation is allowed after the
-    // new length is published with the `cmpxchg` at the end of the function.
-    // This way, the mutation should not conflict with the owning thread,
-    // which only writes to locations later than `len`
-    // and will not resize the buffer without acquiring the lock.
-    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
-    size_t oldlen = len;
-    void **items = list->items;
-    size_t j = 0;
-    for (size_t i = 0; i < len; i += 2) {
-        void *v = items[i];
-        int move = 0;
-        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
-            void *f = items[i + 1];
-            move = 1;
-            arraylist_push(copied_list, v);
-            arraylist_push(copied_list, f);
-        }
-        if (move || __unlikely(!v)) {
-            // remove item
-        }
-        else {
-            if (j < i) {
-                items[j] = items[i];
-                items[j+1] = items[i+1];
-            }
-            j += 2;
-        }
-    }
-    len = j;
-    if (oldlen == len)
-        return;
-    if (need_sync) {
-        // The memset needs to be unconditional since the thread might have
-        // already read the length.
-        // The `memset` (like any other content mutation) has to be done
-        // **before** the `cmpxchg` which publishes the length.
-        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
-    }
-    else {
-        list->len = len;
-    }
-}
-
-// The first two entries are assumed to be empty and the rest are assumed to
-// be pointers to `jl_value_t` objects
-static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
-{
-    void **items = list->items;
-    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
-    items[1] = ct->gcstack;
-    ct->gcstack = (jl_gcframe_t*)items;
-}
-
-// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
-// to be hold for the current thread and will release the lock when the
-// function returns.
-static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
-{
-    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
-    // of `finalizer`) in a finalizer:
-    uint8_t sticky = ct->sticky;
-    // empty out the first two entries for the GC frame
-    arraylist_push(list, list->items[0]);
-    arraylist_push(list, list->items[1]);
-    jl_gc_push_arraylist(ct, list);
-    void **items = list->items;
-    size_t len = list->len;
-    JL_UNLOCK_NOGC(&finalizers_lock);
-    // run finalizers in reverse order they were added, so lower-level finalizers run last
-    for (size_t i = len-4; i >= 2; i -= 2)
-        run_finalizer(ct, items[i], items[i + 1]);
-    // first entries were moved last to make room for GC frame metadata
-    run_finalizer(ct, items[len-2], items[len-1]);
-    // matches the jl_gc_push_arraylist above
-    JL_GC_POP();
-    ct->sticky = sticky;
-}
-
-static uint64_t finalizer_rngState[JL_RNG_SIZE];
-
-void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
-
-JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
-{
-    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
-}
-
-static void run_finalizers(jl_task_t *ct, int finalizers_thread)
-{
-    // Racy fast path:
-    // The race here should be OK since the race can only happen if
-    // another thread is writing to it with the lock held. In such case,
-    // we don't need to run pending finalizers since the writer thread
-    // will flush it.
-    if (to_finalize.len == 0)
-        return;
-    JL_LOCK_NOGC(&finalizers_lock);
-    if (to_finalize.len == 0) {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-        return;
-    }
-    arraylist_t copied_list;
-    memcpy(&copied_list, &to_finalize, sizeof(copied_list));
-    if (to_finalize.items == to_finalize._space) {
-        copied_list.items = copied_list._space;
-    }
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
-    arraylist_new(&to_finalize, 0);
-
-    uint64_t save_rngState[JL_RNG_SIZE];
-    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
-    jl_rng_split(ct->rngState, finalizer_rngState);
 
-    // This releases the finalizers lock.
-    int8_t was_in_finalizer = ct->ptls->in_finalizer;
-    ct->ptls->in_finalizer = !finalizers_thread;
-    jl_gc_run_finalizers_in_list(ct, &copied_list);
-    ct->ptls->in_finalizer = was_in_finalizer;
-    arraylist_free(&copied_list);
-
-    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
-}
-
-JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
-{
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
-        run_finalizers(ct, 0);
-    }
-}
-
-JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
-{
-    if (ptls == NULL)
-        ptls = jl_current_task->ptls;
-    return ptls->finalizers_inhibited;
-}
-
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    ptls->finalizers_inhibited++;
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
-{
-    jl_task_t *ct = jl_current_task;
-#ifdef NDEBUG
-    ct->ptls->finalizers_inhibited--;
-#else
-    jl_gc_enable_finalizers(ct, 1);
-#endif
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
-{
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    int old_val = ptls->finalizers_inhibited;
-    int new_val = old_val + (on ? -1 : 1);
-    if (new_val < 0) {
-        JL_TRY {
-            jl_error(""); // get a backtrace
-        }
-        JL_CATCH {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
-            // Only print the backtrace once, to avoid spamming the logs
-            static int backtrace_printed = 0;
-            if (backtrace_printed == 0) {
-                backtrace_printed = 1;
-                jlbacktrace(); // written to STDERR_FILENO
-            }
-        }
-        return;
-    }
-    ptls->finalizers_inhibited = new_val;
-    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
-        jl_gc_run_pending_finalizers(ct);
-    }
-}
-
-JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
-{
-    return jl_current_task->ptls->in_finalizer;
-}
-
-static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
-{
-    void **items = flist->items;
-    size_t len = flist->len;
-    for(size_t i = 0; i < len; i+=2) {
-        void *v = items[i];
-        void *f = items[i + 1];
-        if (__unlikely(!v))
-            continue;
-        schedule_finalization(v, f);
-    }
-    flist->len = 0;
-}
-
-void jl_gc_run_all_finalizers(jl_task_t *ct)
-{
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    // this is called from `jl_atexit_hook`; threads could still be running
-    // so we have to guard the finalizers' lists
-    JL_LOCK_NOGC(&finalizers_lock);
-    schedule_all_finalizers(&finalizer_list_marked);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            schedule_all_finalizers(&ptls2->finalizers);
-    }
-    // unlock here because `run_finalizers` locks this
-    JL_UNLOCK_NOGC(&finalizers_lock);
-    run_finalizers(ct, 1);
-}
-
-void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
-{
-    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
-    arraylist_t *a = &ptls->finalizers;
-    // This acquire load and the release store at the end are used to
-    // synchronize with `finalize_object` on another thread. Apart from the GC,
-    // which is blocked by entering a unsafe region, there might be only
-    // one other thread accessing our list in `finalize_object`
-    // (only one thread since it needs to acquire the finalizer lock).
-    // Similar to `finalize_object`, all content mutation has to be done
-    // between the acquire and the release of the length.
-    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
-    if (__unlikely(oldlen + 2 > a->max)) {
-        JL_LOCK_NOGC(&finalizers_lock);
-        // `a->len` might have been modified.
-        // Another possibility is to always grow the array to `oldlen + 2` but
-        // it's simpler this way and uses slightly less memory =)
-        oldlen = a->len;
-        arraylist_grow(a, 2);
-        a->len = oldlen;
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    void **items = a->items;
-    items[oldlen] = v;
-    items[oldlen + 1] = f;
-    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
-}
-
-JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
-{
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
-}
-
-// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
-JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
-{
-    assert(!gc_ptr_tag(v, 3));
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
-}
-
-JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
-{
-    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
-        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
-    }
-    else {
-        jl_gc_add_finalizer_(ptls, v, f);
-    }
-}
-
-JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
-{
-    JL_LOCK_NOGC(&finalizers_lock);
-    // Copy the finalizers into a temporary list so that code in the finalizer
-    // won't change the list as we loop through them.
-    // This list is also used as the GC frame when we are running the finalizers
-    arraylist_t copied_list;
-    arraylist_new(&copied_list, 0);
-    // No need to check the to_finalize list since the user is apparently
-    // still holding a reference to the object
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
-    }
-    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
-    if (copied_list.len > 0) {
-        // This releases the finalizers lock.
-        jl_gc_run_finalizers_in_list(ct, &copied_list);
-    }
-    else {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    arraylist_free(&copied_list);
-}
+// List of big objects in oldest generation (`GC_OLD_MARKED`).  Not per-thread.  Accessed only by master thread.
+bigval_t *oldest_generation_of_bigvals = NULL;
 
 // explicitly scheduled objects for the sweepfunc callback
-static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
+static void gc_sweep_foreign_objs_in_list(arraylist_t *objs) JL_NOTSAFEPOINT
 {
     size_t p = 0;
     for (size_t i = 0; i < objs->len; i++) {
@@ -638,13 +101,13 @@ static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
     objs->len = p;
 }
 
-static void gc_sweep_foreign_objs(void)
+static void gc_sweep_foreign_objs(void) JL_NOTSAFEPOINT
 {
     assert(gc_n_threads);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
+            gc_sweep_foreign_objs_in_list(&ptls2->gc_tls.sweep_objs);
     }
 }
 
@@ -731,72 +194,37 @@ static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
 int prev_sweep_full = 1;
 int current_sweep_full = 0;
+int next_sweep_full = 0;
 int under_pressure = 0;
 
 // Full collection heuristics
 static int64_t live_bytes = 0;
 static int64_t promoted_bytes = 0;
 static int64_t last_live_bytes = 0; // live_bytes at last collection
-static int64_t t_start = 0; // Time GC starts;
 #ifdef __GLIBC__
 // maxrss at last malloc_trim
 static int64_t last_trim_maxrss = 0;
 #endif
 
-static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
+static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
 {
-    const int nbig = gc_cache->nbig_obj;
-    for (int i = 0; i < nbig; i++) {
-        void *ptr = gc_cache->big_obj[i];
-        bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1);
-        gc_big_object_unlink(hdr);
-        if (gc_ptr_tag(ptr, 1)) {
-            gc_big_object_link(hdr, &ptls->heap.big_objects);
-        }
-        else {
-            // Move hdr from `big_objects` list to `big_objects_marked list`
-            gc_big_object_link(hdr, &big_objects_marked);
-        }
-    }
-    gc_cache->nbig_obj = 0;
     perm_scanned_bytes += gc_cache->perm_scanned_bytes;
     scanned_bytes += gc_cache->scanned_bytes;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
 }
 
-static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    uv_mutex_lock(&gc_cache_lock);
-    gc_sync_cache_nolock(ptls, &ptls->gc_cache);
-    uv_mutex_unlock(&gc_cache_lock);
-}
-
 // No other threads can be running marking at the same time
-static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
+static void gc_sync_all_caches(jl_ptls_t ptls)
 {
     assert(gc_n_threads);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL)
-            gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
+            gc_sync_cache(ptls, &ptls2->gc_tls.gc_cache);
     }
 }
 
-STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
-                                       int toyoung) JL_NOTSAFEPOINT
-{
-    const int nentry = sizeof(ptls->gc_cache.big_obj) / sizeof(void*);
-    size_t nobj = ptls->gc_cache.nbig_obj;
-    if (__unlikely(nobj >= nentry)) {
-        gc_sync_cache(ptls);
-        nobj = 0;
-    }
-    uintptr_t v = (uintptr_t)hdr;
-    ptls->gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v);
-    ptls->gc_cache.nbig_obj = nobj + 1;
-}
-
 // Atomically set the mark bit for object and return whether it was previously unmarked
 FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
 {
@@ -832,21 +260,17 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
     assert(!gc_alloc_map_is_set((char*)o));
     bigval_t *hdr = bigval_header(o);
     if (mark_mode == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3;
-        gc_queue_big_marked(ptls, hdr, 0);
+        ptls->gc_tls.gc_cache.perm_scanned_bytes += hdr->sz;
     }
     else {
-        ptls->gc_cache.scanned_bytes += hdr->sz & ~3;
-        // We can't easily tell if the object is old or being promoted
-        // from the gc bits but if the `age` is `0` then the object
-        // must be already on a young list.
+        ptls->gc_tls.gc_cache.scanned_bytes += hdr->sz;
         if (mark_reset_age) {
+            assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking
             // Reset the object as if it was just allocated
-            gc_queue_big_marked(ptls, hdr, 1);
+            gc_big_object_unlink(hdr);
+            gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, hdr);
         }
     }
-    objprofile_count(jl_typeof(jl_valueof(o)),
-                     mark_mode == GC_OLD_MARKED, hdr->sz & ~3);
 }
 
 // This function should be called exactly once during marking for each pool
@@ -858,18 +282,16 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
     gc_setmark_big(ptls, o, mark_mode);
 #else
     if (mark_mode == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += page->osize;
+        ptls->gc_tls.gc_cache.perm_scanned_bytes += page->osize;
         static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
         jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1);
     }
     else {
-        ptls->gc_cache.scanned_bytes += page->osize;
+        ptls->gc_tls.gc_cache.scanned_bytes += page->osize;
         if (mark_reset_age) {
             page->has_young = 1;
         }
     }
-    objprofile_count(jl_typeof(jl_valueof(o)),
-                     mark_mode == GC_OLD_MARKED, page->osize);
     page->has_marked = 1;
 #endif
 }
@@ -928,13 +350,12 @@ STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 
 // weak references
 
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
-                                                jl_value_t *value)
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
 {
     jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
                                                   jl_weakref_type);
     wr->value = value;  // NOTE: wb not needed here
-    small_arraylist_push(&ptls->heap.weak_refs, wr);
+    small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr);
     return wr;
 }
 
@@ -944,8 +365,8 @@ static void clear_weak_refs(void)
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL) {
-            size_t n, l = ptls2->heap.weak_refs.len;
-            void **lst = ptls2->heap.weak_refs.items;
+            size_t n, l = ptls2->gc_tls.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls.heap.weak_refs.items;
             for (n = 0; n < l; n++) {
                 jl_weakref_t *wr = (jl_weakref_t*)lst[n];
                 if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
@@ -963,8 +384,8 @@ static void sweep_weak_refs(void)
         if (ptls2 != NULL) {
             size_t n = 0;
             size_t ndel = 0;
-            size_t l = ptls2->heap.weak_refs.len;
-            void **lst = ptls2->heap.weak_refs.items;
+            size_t l = ptls2->gc_tls.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls.heap.weak_refs.items;
             if (l == 0)
                 continue;
             while (1) {
@@ -979,7 +400,7 @@ static void sweep_weak_refs(void)
                 lst[n] = lst[n + ndel];
                 lst[n + ndel] = tmp;
             }
-            ptls2->heap.weak_refs.len -= ndel;
+            ptls2->gc_tls.heap.weak_refs.len -= ndel;
         }
     }
 }
@@ -987,18 +408,18 @@ static void sweep_weak_refs(void)
 
 STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
 {
-    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz;
+    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz;
     if (alloc_acc < 16*1024)
-        jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc);
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc);
     else {
         jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
-        jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
     }
 }
 
 STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
 {
-    jl_atomic_store_relaxed(&ptls->gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz);
 }
 
 // big value list
@@ -1019,28 +440,22 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
         jl_throw(jl_memory_exception);
     gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
         gc_cblist_notify_external_alloc, (v, allocsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1);
     jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
     v->sz = allocsz;
-    gc_big_object_link(v, &ptls->heap.big_objects);
+    gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, v);
     return jl_valueof(&v->header);
 }
 
-// Deprecated version, supported for legacy code.
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
-{
-    jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
-    maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag);
-    return val;
-}
+
 // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc_instrumented(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
 {
     jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
     maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
@@ -1054,62 +469,86 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
     return jl_gc_big_alloc_inner(ptls, sz);
 }
 
-// Sweep list rooted at *pv, removing and freeing any unmarked objects.
-// Return pointer to last `next` field in the culled list.
-static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
+FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT
 {
-    bigval_t *v = *pv;
+    gc_big_object_unlink(v);
+    gc_num.freed += v->sz;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - v->sz);
+#ifdef MEMDEBUG
+    memset(v, 0xbb, v->sz);
+#endif
+    gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v));
+    jl_free_aligned(v);
+}
+
+static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *last_node = young;
+    bigval_t *v = young->next; // skip the sentinel
+    bigval_t *old = oldest_generation_of_bigvals;
+    int sweep_full = current_sweep_full; // don't load the global in the hot loop
     while (v != NULL) {
         bigval_t *nxt = v->next;
         int bits = v->bits.gc;
         int old_bits = bits;
         if (gc_marked(bits)) {
-            pv = &v->next;
             if (sweep_full || bits == GC_MARKED) {
                 bits = GC_OLD;
+                last_node = v;
+            }
+            else { // `bits == GC_OLD_MARKED`
+                assert(bits == GC_OLD_MARKED);
+                // reached oldest generation, move from young list to old list
+                gc_big_object_unlink(v);
+                gc_big_object_link(old, v);
             }
             v->bits.gc = bits;
         }
         else {
-            // Remove v from list and free it
-            *pv = nxt;
-            if (nxt)
-                nxt->prev = pv;
-            gc_num.freed += v->sz&~3;
-            jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
-                jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - (v->sz&~3));
-#ifdef MEMDEBUG
-            memset(v, 0xbb, v->sz&~3);
-#endif
-            gc_invoke_callbacks(jl_gc_cb_notify_external_free_t,
-                gc_cblist_notify_external_free, (v));
-            jl_free_aligned(v);
+            sweep_unlink_and_free(v);
         }
         gc_time_count_big(old_bits, bits);
         v = nxt;
     }
-    return pv;
+    return last_node;
 }
 
-static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
+static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel
+    while (v != NULL) {
+        bigval_t *nxt = v->next;
+        assert(v->bits.gc == GC_OLD_MARKED);
+        v->bits.gc = GC_OLD;
+        gc_time_count_big(GC_OLD_MARKED, GC_OLD);
+        v = nxt;
+    }
+}
+
+static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     gc_time_big_start();
     assert(gc_n_threads);
+    bigval_t *last_node_in_my_list = NULL;
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            sweep_big_list(sweep_full, &ptls2->heap.big_objects);
+        if (ptls2 != NULL) {
+            bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->gc_tls.heap.young_generation_of_bigvals);
+            if (ptls == ptls2) {
+                last_node_in_my_list = last_node;
+            }
+        }
     }
-    if (sweep_full) {
-        bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
-        // Move all survivors from big_objects_marked list to the big_objects list of this thread.
-        if (ptls->heap.big_objects)
-            ptls->heap.big_objects->prev = last_next;
-        *last_next = ptls->heap.big_objects;
-        ptls->heap.big_objects = big_objects_marked;
-        if (ptls->heap.big_objects)
-            ptls->heap.big_objects->prev = &ptls->heap.big_objects;
-        big_objects_marked = NULL;
+    if (current_sweep_full) {
+        sweep_list_of_oldest_bigvals(ptls->gc_tls.heap.young_generation_of_bigvals);
+        // move all nodes in `oldest_generation_of_bigvals` to my list of bigvals
+        assert(last_node_in_my_list != NULL);
+        assert(last_node_in_my_list->next == NULL);
+        last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel
+        if (oldest_generation_of_bigvals->next != NULL) {
+            oldest_generation_of_bigvals->next->prev = last_node_in_my_list;
+        }
+        oldest_generation_of_bigvals->next = NULL;
     }
     gc_time_big_end();
 }
@@ -1118,27 +557,33 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 
 void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
     // This is **NOT** a GC safe point.
-    mallocarray_t *ma;
-    if (ptls->heap.mafreelist == NULL) {
-        ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t));
+    mallocmemory_t *ma;
+    if (ptls->gc_tls.heap.mafreelist == NULL) {
+        ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t));
     }
     else {
-        ma = ptls->heap.mafreelist;
-        ptls->heap.mafreelist = ma->next;
+        ma = ptls->gc_tls.heap.mafreelist;
+        ptls->gc_tls.heap.mafreelist = ma->next;
     }
-    ma->a = (jl_value_t*)((uintptr_t)m | !!isaligned);
-    ma->next = ptls->heap.mallocarrays;
-    ptls->heap.mallocarrays = ma;
+    ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned);
+    ma->next = ptls->gc_tls.heap.mallocarrays;
+    ptls->gc_tls.heap.mallocarrays = ma;
 }
 
 
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz);
     jl_batch_accum_heap_size(ptls, sz);
 }
+
+void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
+{
+    jl_batch_accum_free_size(jl_current_task->ptls, sz);
+}
+
 // Only safe to update the heap inside the GC
 static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
 {
@@ -1149,18 +594,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls) {
-            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval);
-            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc);
-            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
-            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
-            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
-            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
+            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval);
+            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc);
+            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc);
+            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc);
+            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc);
             if (update_heap) {
-                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
-                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
+                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc);
+                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc);
                 jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
-                jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
-                jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0);
             }
         }
     }
@@ -1176,13 +621,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
         jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls != NULL) {
             // don't reset `pool_live_bytes` here
-            jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
-            jl_atomic_store_relaxed(&ptls->gc_num.malloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_num.realloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0);
         }
     }
 }
@@ -1236,10 +681,10 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL) {
-            mallocarray_t *ma = ptls2->heap.mallocarrays;
-            mallocarray_t **pma = &ptls2->heap.mallocarrays;
+            mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
+            mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays;
             while (ma != NULL) {
-                mallocarray_t *nxt = ma->next;
+                mallocmemory_t *nxt = ma->next;
                 jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
                 int bits = jl_astaggedvalue(a)->bits.gc;
                 if (gc_marked(bits)) {
@@ -1249,8 +694,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
                     *pma = nxt;
                     int isaligned = (uintptr_t)ma->a & 1;
                     jl_gc_free_memory(a, isaligned);
-                    ma->next = ptls2->heap.mafreelist;
-                    ptls2->heap.mafreelist = ma;
+                    ma->next = ptls2->gc_tls.heap.mafreelist;
+                    ptls2->gc_tls.heap.mafreelist = ma;
                 }
                 gc_time_count_mallocd_memory(bits);
                 ma = nxt;
@@ -1265,7 +710,7 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
 {
     assert(GC_PAGE_OFFSET >= sizeof(void*));
     pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
-    pg->pool_n = p - ptls2->heap.norm_pools;
+    pg->pool_n = p - ptls2->gc_tls.heap.norm_pools;
     jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     pg->has_young = 0;
     pg->has_marked = 0;
@@ -1285,19 +730,13 @@ pagetable_t alloc_map;
 static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
-    // in pool_alloc significantly
+    // in small_alloc significantly
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_pagemeta_t *pg = pop_lf_back(&ptls->page_metadata_buffered);
-    if (pg != NULL) {
-        gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
-    }
-    else {
-        pg = jl_gc_alloc_page();
-    }
+    jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
     pg->thread_n = ptls->tid;
     set_page_metadata(pg);
-    push_lf_back(&ptls->page_metadata_allocd, pg);
+    push_lf_back(&ptls->gc_tls.page_metadata_allocd, pg);
     jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg);
     jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ);
     p->newpages = fl;
@@ -1305,24 +744,24 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 }
 
 // Size includes the tag and the tag is not cleared!!
-STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
+STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset,
                                           int osize)
 {
     // Use the pool offset instead of the pool address as the argument
     // to workaround a llvm bug.
     // Ref https://llvm.org/bugs/show_bug.cgi?id=27190
-    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset);
+    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + offset);
     assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
 #ifdef MEMDEBUG
-    return jl_gc_big_alloc(ptls, osize);
+    return jl_gc_big_alloc(ptls, osize, NULL);
 #endif
     maybe_collect(ptls);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_num.pool_live_bytes,
-        jl_atomic_load_relaxed(&ptls->gc_num.pool_live_bytes) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
     if (v != NULL) {
@@ -1362,28 +801,19 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset
     return jl_valueof(v);
 }
 
-// Deprecated version, supported for legacy code.
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
-                                          int osize)
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
 {
-    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
-    maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag);
-    return val;
-}
-// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc_instrumented(jl_ptls_t ptls, int pool_offset,
-                                        int osize, jl_value_t* type)
-{
-    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+    jl_value_t *val = jl_gc_small_alloc_inner(ptls, offset, osize);
     maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
     return val;
 }
 
-// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into
-// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner`
+// This wrapper exists only to prevent `jl_gc_small_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_small_alloc_inner`
 // into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) {
-    return jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) {
+    return jl_gc_small_alloc_inner(ptls, offset, osize);
 }
 
 int jl_gc_classify_pools(size_t sz, int *osize)
@@ -1393,7 +823,7 @@ int jl_gc_classify_pools(size_t sz, int *osize)
     size_t allocsz = sz + sizeof(jl_taggedvalue_t);
     int klass = jl_gc_szclass(allocsz);
     *osize = jl_gc_sizeclasses[klass];
-    return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]);
+    return (int)(intptr_t)(&((jl_ptls_t)0)->gc_tls.heap.norm_pools[klass]);
 }
 
 // sweep phase
@@ -1424,11 +854,9 @@ STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
     }
 }
 
-int64_t buffered_pages = 0;
-
-// Returns pointer to terminal pointer of list rooted at *pfl.
-static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *buffered,
-                          jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
+// Walks over a page, reconstruting the free lists if the page contains at least one live object. If not,
+// queues up the page for later decommit (i.e. through `madvise` on Unix).
+static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
 {
     char *data = pg->data;
     jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
@@ -1444,22 +872,10 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
     gc_page_serializer_init(s, pg);
 
     int re_use_page = 1;
-    int keep_as_local_buffer = 0;
     int freedall = 1;
     int pg_skpd = 1;
     if (!pg->has_marked) {
         re_use_page = 0;
-    #ifdef _P64 // TODO: re-enable on `_P32`?
-        // lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
-        // eager version: (freedall) free page as soon as possible
-        // the eager one uses less memory.
-        // FIXME - need to do accounting on a per-thread basis
-        // on quick sweeps, keep a few pages empty but allocated for performance
-        if (!current_sweep_full && buffered_pages <= default_collect_interval / GC_PAGE_SZ) {
-            buffered_pages++;
-            keep_as_local_buffer = 1;
-        }
-    #endif
         nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize;
         gc_page_profile_write_empty_page(s, page_profile_enabled);
         goto done;
@@ -1545,40 +961,44 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
         push_lf_back(allocd, pg);
     }
     else {
-        gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
         jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
-        if (keep_as_local_buffer) {
-            push_lf_back(buffered, pg);
-        }
-        else {
-            push_lf_back(&global_page_pool_lazily_freed, pg);
-        }
+        gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
+        push_lf_back(&global_page_pool_lazily_freed, pg);
     }
     gc_page_profile_write_to_file(s);
     gc_update_page_fragmentation_data(pg);
     gc_time_count_page(freedall, pg_skpd);
-    jl_ptls_t ptls = gc_all_tls_states[pg->thread_n];
-    jl_atomic_fetch_add_relaxed(&ptls->gc_num.pool_live_bytes, GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); //TODO: Could we avoid doing a fetch_add here?
+    jl_ptls_t ptls = jl_current_task->ptls;
+    // Note that we aggregate the `pool_live_bytes` over all threads before returning this
+    // value to the user. It doesn't matter how the `pool_live_bytes` are partitioned among
+    // the threads as long as the sum is correct. Let's add the `pool_live_bytes` to the current thread
+    // instead of adding it to the thread that originally allocated the page, so we can avoid
+    // an atomic-fetch-add here.
+    size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta);
     jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
 }
 
 // the actual sweeping over all allocated pages in a memory pool
-STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *lazily_freed,
-                                      jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
     jl_ptls_t ptls2 = gc_all_tls_states[t_n];
-    jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
+    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
     int osize = pg->osize;
-    gc_sweep_page(s, p, allocd, lazily_freed, pg, osize);
+    gc_sweep_page(s, p, allocd, pg, osize);
 }
 
 // sweep over all memory that is being used and not in a pool
 static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 {
+    sweep_stack_pools();
+    gc_sweep_foreign_objs();
     sweep_malloced_memory();
-    sweep_big(ptls, sweep_full);
+    sweep_big(ptls);
+    jl_engine_sweep(gc_all_tls_states);
 }
 
 static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
@@ -1615,7 +1035,7 @@ int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_sc
         jl_gc_pagemeta_t *tail = NULL;
         memset(&tmp, 0, sizeof(tmp));
         while (1) {
-            jl_gc_pagemeta_t *pg = pop_lf_back_nosync(&ptls2->page_metadata_allocd);
+            jl_gc_pagemeta_t *pg = pop_lf_back_nosync(&ptls2->gc_tls.page_metadata_allocd);
             if (pg == NULL) {
                 break;
             }
@@ -1637,16 +1057,16 @@ int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_sc
                 push_lf_back_nosync(&tmp, pg);
             }
             else {
-                gc_sweep_pool_page(&serializer, dest, &ptls2->page_metadata_buffered, pg);
+                gc_sweep_pool_page(&serializer, dest, pg);
             }
             if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
                 break;
             }
         }
         if (tail != NULL) {
-            tail->next = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+            tail->next = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         }
-        ptls2->page_metadata_allocd = tmp;
+        ptls2->gc_tls.page_metadata_allocd = tmp;
         if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
             break;
         }
@@ -1659,18 +1079,36 @@ int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_sc
 void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_scratch)
 {
     int parallel_sweep_worthwhile = gc_sweep_prescan(ptls, new_gc_allocd_scratch);
-    jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
-    if (!parallel_sweep_worthwhile) {
+    if (parallel_sweep_worthwhile && !page_profile_enabled) {
+        jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
+        uv_mutex_lock(&gc_threads_lock);
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_atomic_fetch_add(&ptls2->gc_tls.gc_sweeps_requested, 1);
+        }
+        uv_cond_broadcast(&gc_threads_cond);
+        uv_mutex_unlock(&gc_threads_lock);
         return;
     }
-    uv_mutex_lock(&gc_threads_lock);
-    for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        assert(ptls2 != NULL); // should be a GC thread
-        jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1);
+    if (page_profile_enabled) {
+        // we need to ensure that no threads are running sweeping when
+        // collecting a page profile.
+        // wait for all to leave in order to ensure that a straggler doesn't
+        // try to enter sweeping after we set `gc_allocd_scratch` below.
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            while (jl_atomic_load_acquire(&ptls2->gc_tls.gc_sweeps_requested) != 0) {
+                jl_cpu_pause();
+            }
+        }
     }
-    uv_cond_broadcast(&gc_threads_cond);
-    uv_mutex_unlock(&gc_threads_lock);
+    jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
 }
 
 // wait for all threads to finish sweeping
@@ -1699,12 +1137,12 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls)
                     continue;
                 }
                 jl_gc_page_stack_t *dest = &allocd_scratch[ptls2->tid].stack;
-                jl_gc_pagemeta_t *pg = try_pop_lf_back(&ptls2->page_metadata_allocd);
+                jl_gc_pagemeta_t *pg = try_pop_lf_back(&ptls2->gc_tls.page_metadata_allocd);
                 // failed steal attempt
                 if (pg == NULL) {
                     continue;
                 }
-                gc_sweep_pool_page(&serializer, dest, &ptls2->page_metadata_buffered, pg);
+                gc_sweep_pool_page(&serializer, dest, pg);
                 found_pg = 1;
             }
             if (!found_pg) {
@@ -1716,7 +1154,7 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls)
                     if (ptls2 == NULL) {
                         continue;
                     }
-                    jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+                    jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
                     if (pg != NULL) {
                         no_more_work = 0;
                         break;
@@ -1736,26 +1174,38 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls)
 // free all pages (i.e. through `madvise` on Linux) that were lazily freed
 void gc_free_pages(void)
 {
+    size_t n_pages_seen = 0;
+    jl_gc_page_stack_t tmp;
+    memset(&tmp, 0, sizeof(tmp));
     while (1) {
         jl_gc_pagemeta_t *pg = pop_lf_back(&global_page_pool_lazily_freed);
         if (pg == NULL) {
             break;
         }
+        n_pages_seen++;
+        // keep the last few pages around for a while
+        if (n_pages_seen * GC_PAGE_SZ <= default_collect_interval) {
+            push_lf_back(&tmp, pg);
+            continue;
+        }
         jl_gc_free_page(pg);
         push_lf_back(&global_page_pool_freed, pg);
     }
-}
-
-void gc_move_to_global_page_pool(jl_gc_page_stack_t *pgstack)
-{
-    while (1) {
-        jl_gc_pagemeta_t *pg = pop_lf_back(pgstack);
-        if (pg == NULL) {
-            break;
+    // If concurrent page sweeping is disabled, then `gc_free_pages` will be called in the stop-the-world
+    // phase. We can guarantee, therefore, that there won't be any concurrent modifications to
+    // `global_page_pool_lazily_freed`, so it's safe to assign `tmp` back to `global_page_pool_lazily_freed`.
+    // Otherwise, we need to use the thread-safe push_lf_back/pop_lf_back functions.
+    if (jl_n_sweepthreads == 0) {
+        global_page_pool_lazily_freed = tmp;
+    }
+    else {
+        while (1) {
+            jl_gc_pagemeta_t *pg = pop_lf_back(&tmp);
+            if (pg == NULL) {
+                break;
+            }
+            push_lf_back(&global_page_pool_lazily_freed, pg);
         }
-        jl_gc_free_page(pg);
-        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
-        push_lf_back(&global_page_pool_freed, pg);
     }
 }
 
@@ -1763,7 +1213,6 @@ void gc_move_to_global_page_pool(jl_gc_page_stack_t *pgstack)
 static void gc_sweep_pool(void)
 {
     gc_time_pool_start();
-    buffered_pages = 0;
 
     // For the benefit of the analyzer, which doesn't know that gc_n_threads
     // doesn't change over the course of this function
@@ -1783,9 +1232,9 @@ static void gc_sweep_pool(void)
             }
             continue;
         }
-        jl_atomic_store_relaxed(&ptls2->gc_num.pool_live_bytes, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0);
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
-            jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *last = p->freelist;
             if (last != NULL) {
                 jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last));
@@ -1804,17 +1253,10 @@ static void gc_sweep_pool(void)
                 pg->has_young = 1;
             }
         }
-        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_buffered.bottom);
-        while (pg != NULL) {
-            jl_gc_pagemeta_t *pg2 = pg->next;
-            buffered_pages++;
-            pg = pg2;
-        }
     }
 
     // the actual sweeping
-    jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) malloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
-    memset(new_gc_allocd_scratch, 0, n_threads * sizeof(jl_gc_padded_page_stack_t));
+    jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
     jl_ptls_t ptls = jl_current_task->ptls;
     gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
     gc_sweep_pool_parallel(ptls);
@@ -1824,9 +1266,9 @@ static void gc_sweep_pool(void)
     for (int t_i = 0; t_i < n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL) {
-            ptls2->page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
+            ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
             for (int i = 0; i < JL_GC_N_POOLS; i++) {
-                jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+                jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
                 p->newpages = NULL;
             }
         }
@@ -1838,7 +1280,7 @@ static void gc_sweep_pool(void)
         if (ptls2 == NULL) {
             continue;
         }
-        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             jl_gc_pagemeta_t *pg2 = pg->next;
             if (pg->fl_begin_offset != UINT16_MAX) {
@@ -1898,12 +1340,10 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
     // We need to ensure that objects are in the remset at
     // most once, since the mark phase may update page metadata,
     // which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
-    uintptr_t header = jl_atomic_load_relaxed((_Atomic(uintptr_t) *)&o->header);
-    header &= ~GC_OLD; // clear the age bit
-    header = jl_atomic_exchange_relaxed((_Atomic(uintptr_t) *)&o->header, header);
+    uintptr_t header = jl_atomic_fetch_and_relaxed((_Atomic(uintptr_t) *)&o->header, ~GC_OLD);
     if (header & GC_OLD) { // write barrier has not been triggered in this object yet
-        arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
-        ptls->heap.remset_nptr++; // conservative
+        arraylist_push(&ptls->gc_tls.heap.remset, (jl_value_t*)ptr);
+        ptls->gc_tls.heap.remset_nptr++; // conservative
     }
 }
 
@@ -2005,8 +1445,8 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
                                        uintptr_t nptr) JL_NOTSAFEPOINT
 {
     if (__unlikely((nptr & 0x3) == 0x3)) {
-        ptls->heap.remset_nptr += nptr >> 2;
-        arraylist_t *remset = ptls->heap.remset;
+        ptls->gc_tls.heap.remset_nptr += nptr >> 2;
+        arraylist_t *remset = &ptls->gc_tls.heap.remset;
         size_t len = remset->len;
         if (__unlikely(len >= remset->max)) {
             arraylist_push(remset, obj);
@@ -2074,7 +1514,7 @@ JL_NORETURN NOINLINE void gc_dump_queue_and_abort(jl_ptls_t ptls, jl_datatype_t
     if (jl_n_gcthreads == 0) {
         jl_safe_printf("\n");
         jl_value_t *new_obj;
-        jl_gc_markqueue_t *mq = &ptls->mark_queue;
+        jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
         jl_safe_printf("thread %d ptr queue:\n", ptls->tid);
         jl_safe_printf("~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n");
         while ((new_obj = gc_ptr_queue_steal_from(mq)) != NULL) {
@@ -2113,7 +1553,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_
                          uint8_t *obj8_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj8_begin < obj8_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj8_begin < obj8_end; obj8_begin++) {
@@ -2145,7 +1585,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint
                           uint16_t *obj16_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj16_begin < obj16_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj16_begin < obj16_end; obj16_begin++) {
@@ -2177,7 +1617,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
                           uint32_t *obj32_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj32_begin < obj32_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj32_begin < obj32_end; obj32_begin++) {
@@ -2208,7 +1648,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
 STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t **obj_begin,
                       jl_value_t **obj_end, uint32_t step, uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     // Decide whether need to chunk objary
     assert(step > 0);
@@ -2276,7 +1716,7 @@ STATIC_INLINE void gc_mark_memory8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_v
                     jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end, uintptr_t elsize,
                     uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     assert(elsize > 0);
     (void)jl_assume(elsize > 0);
@@ -2324,7 +1764,7 @@ STATIC_INLINE void gc_mark_memory8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_v
             pushed_chunk = 1;
         }
     }
-    for (; ary8_begin < ary8_end; ary8_begin += elsize) {
+    for (; ary8_begin < scan_end; ary8_begin += elsize) {
         for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
             jl_value_t **slot = &ary8_begin[*pindex];
             new_obj = *slot;
@@ -2353,7 +1793,7 @@ STATIC_INLINE void gc_mark_memory16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl
                      jl_value_t **ary16_end, uint16_t *elem_begin, uint16_t *elem_end, size_t elsize,
                      uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     assert(elsize > 0);
     (void)jl_assume(elsize > 0);
@@ -2482,7 +1922,7 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
 STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots, uintptr_t offset,
                    uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     uint32_t nr = nroots >> 2;
     while (1) {
@@ -2527,7 +1967,7 @@ STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroot
 // Mark exception stack
 STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, size_t itr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     while (itr > 0) {
         size_t bt_size = jl_excstack_bt_size(excstack, itr);
@@ -2558,7 +1998,7 @@ STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, siz
 STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent, uintptr_t nptr,
                             uint8_t bits) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *bindings = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindings);
     gc_assert_parent_validity((jl_value_t *)mb_parent, bindings);
     gc_try_claim_and_push(mq, bindings, &nptr);
@@ -2633,7 +2073,7 @@ JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
 {
     int may_claim = gc_try_setmark_tag(jl_astaggedvalue(obj), GC_MARKED);
     if (may_claim)
-        gc_ptr_queue_push(&ptls->mark_queue, obj);
+        gc_ptr_queue_push(&ptls->gc_tls.mark_queue, obj);
     return may_claim;
 }
 
@@ -2644,13 +2084,12 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
     gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
 }
 
-// Enqueue and mark all outgoing references from `new_obj` which have not been marked
-// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
-// objects which have been enqueued into the `remset`
-FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
-                              int meta_updated)
+// Enqueue and mark all outgoing references from `new_obj` which have not been marked yet.
+// `_new_obj` has its lowest bit tagged if it's in the remset (in which case we shouldn't update page metadata)
+FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj)
 {
-    jl_value_t *new_obj = (jl_value_t *)_new_obj;
+    int meta_updated = (uintptr_t)_new_obj & GC_REMSET_PTR_TAG;
+    jl_value_t *new_obj = (jl_value_t *)((uintptr_t)_new_obj & ~(uintptr_t)GC_REMSET_PTR_TAG);
     mark_obj: {
         jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
         uintptr_t vtag = o->header & ~(uintptr_t)0xf;
@@ -2680,8 +2119,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(jl_simplevector_type, bits == GC_OLD_MARKED, dtsz);
                 jl_value_t *objary_parent = new_obj;
                 jl_value_t **objary_begin = data;
                 jl_value_t **objary_end = data + l;
@@ -2692,8 +2129,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             else if (vtag == jl_module_tag << 4) {
                 if (update_meta)
                     gc_setmark(ptls, o, bits, sizeof(jl_module_t));
-                else if (foreign_alloc)
-                    objprofile_count(jl_module_type, bits == GC_OLD_MARKED, sizeof(jl_module_t));
                 jl_module_t *mb_parent = (jl_module_t *)new_obj;
                 uintptr_t nptr = ((mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
                 gc_mark_module_binding(ptls, mb_parent, nptr, bits);
@@ -2701,8 +2136,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             else if (vtag == jl_task_tag << 4) {
                 if (update_meta)
                     gc_setmark(ptls, o, bits, sizeof(jl_task_t));
-                else if (foreign_alloc)
-                    objprofile_count(jl_task_type, bits == GC_OLD_MARKED, sizeof(jl_task_t));
                 jl_task_t *ta = (jl_task_t *)new_obj;
                 gc_scrub_record_task(ta);
                 if (gc_cblist_task_scanner) {
@@ -2711,9 +2144,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                                         (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
                 }
         #ifdef COPY_STACKS
-                void *stkbuf = ta->stkbuf;
-                if (stkbuf && ta->copy_stack) {
-                    gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                void *stkbuf = ta->ctx.stkbuf;
+                if (stkbuf && ta->ctx.copy_stack) {
+                    gc_setmark_buf_(ptls, stkbuf, bits, ta->ctx.bufsz);
                     // For gc_heap_snapshot_record:
                     // TODO: attribute size of stack
                     // TODO: edge to stack data
@@ -2726,12 +2159,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 uintptr_t lb = 0;
                 uintptr_t ub = (uintptr_t)-1;
         #ifdef COPY_STACKS
-                if (stkbuf && ta->copy_stack && !ta->ptls) {
+                if (stkbuf && ta->ctx.copy_stack && !ta->ptls) {
                     int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                     assert(tid >= 0);
                     jl_ptls_t ptls2 = gc_all_tls_states[tid];
                     ub = (uintptr_t)ptls2->stackbase;
-                    lb = ub - ta->copy_stack;
+                    lb = ub - ta->ctx.copy_stack;
                     offset = (uintptr_t)stkbuf - lb;
                 }
         #endif
@@ -2771,16 +2204,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(jl_string_type, bits == GC_OLD_MARKED, dtsz);
             }
             else {
                 jl_datatype_t *vt = ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
                 size_t dtsz = jl_datatype_size(vt);
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
             }
             return;
         }
@@ -2799,24 +2228,19 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 else
                     gc_setmark_big(ptls, o, bits);
             }
-            else if (foreign_alloc) {
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_genericmemory_t));
-            }
             int how = jl_genericmemory_how(m);
             if (how == 0 || how == 2) {
                 gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, jl_genericmemory_nbytes(m), how == 0 ? 2 : 0);
             }
             else if (how == 1) {
                 if (update_meta || foreign_alloc) {
-                    objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED,
-                                     jl_genericmemory_nbytes(m));
                     size_t nb = jl_genericmemory_nbytes(m);
                     gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, nb, 0);
                     if (bits == GC_OLD_MARKED) {
-                        ptls->gc_cache.perm_scanned_bytes += nb;
+                        ptls->gc_tls.gc_cache.perm_scanned_bytes += nb;
                     }
                     else {
-                        ptls->gc_cache.scanned_bytes += nb;
+                        ptls->gc_tls.gc_cache.scanned_bytes += nb;
                     }
                 }
             }
@@ -2876,8 +2300,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         size_t dtsz = jl_datatype_size(vt);
         if (update_meta)
             gc_setmark(ptls, o, bits, dtsz);
-        else if (foreign_alloc)
-            objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
         if (vt == jl_weakref_type)
             return;
         const jl_datatype_layout_t *layout = vt->layout;
@@ -2885,6 +2307,16 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         if (npointers == 0)
             return;
         uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
+        if (vt == jl_binding_partition_type) {
+            // BindingPartition has a special union of jl_value_t and flag bits
+            // but is otherwise regular.
+            jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_valueof(o);
+            jl_value_t *val = decode_restriction_value(
+                jl_atomic_load_relaxed(&bpart->restriction));
+            if (val)
+                gc_heap_snapshot_record_binding_partition_edge((jl_value_t*)bpart, val);
+            gc_try_claim_and_push(mq, val, &nptr);
+        }
         assert((layout->nfields > 0 || layout->flags.fielddesc_type == 3) &&
                "opaque types should have been handled specially");
         if (layout->flags.fielddesc_type == 0) {
@@ -2943,12 +2375,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
 void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 {
     while (1) {
-        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->mark_queue);
+        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->gc_tls.mark_queue);
         // No more objects to mark
         if (__unlikely(new_obj == NULL)) {
             return;
         }
-        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        gc_mark_outrefs(ptls, mq, new_obj);
     }
 }
 
@@ -2971,17 +2403,16 @@ void gc_drain_own_chunkqueue(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 // makes it easier to implement parallel marking via work-stealing
 JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
 {
-    gc_mark_loop_serial_(ptls, &ptls->mark_queue);
-    gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
+    gc_mark_loop_serial_(ptls, &ptls->gc_tls.mark_queue);
+    gc_drain_own_chunkqueue(ptls, &ptls->gc_tls.mark_queue);
 }
 
 void gc_mark_and_steal(jl_ptls_t ptls)
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
-    jl_gc_markqueue_t *mq_master = NULL;
     int master_tid = jl_atomic_load(&gc_master_tid);
     assert(master_tid != -1);
-    mq_master = &gc_all_tls_states[master_tid]->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
+    jl_gc_markqueue_t *mq_master = &gc_all_tls_states[master_tid]->gc_tls.mark_queue;
     void *new_obj;
     jl_gc_chunk_t c;
     pop : {
@@ -2997,7 +2428,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
         goto steal;
     }
     mark : {
-        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        gc_mark_outrefs(ptls, mq, new_obj);
         goto pop;
     }
     // Note that for the stealing heuristics, we try to
@@ -3005,10 +2436,14 @@ void gc_mark_and_steal(jl_ptls_t ptls)
     // since we know chunks will likely expand into a lot
     // of work for the mark loop
     steal : {
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
         // Try to steal chunk from random GC thread
         for (int i = 0; i < 4 * jl_n_markthreads; i++) {
-            uint32_t v = gc_first_tid + cong(jl_n_markthreads,  &ptls->rngseed);
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            int v = gc_random_parallel_collector_thread_id(ptls);
+            jl_ptls_t ptls2 = gc_all_tls_states[v];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             c = gc_chunkqueue_steal_from(mq2);
             if (c.cid != GC_empty_chunk) {
                 gc_mark_chunk(ptls, mq, &c);
@@ -3016,8 +2451,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
             }
         }
         // Sequentially walk GC threads to try to steal chunk
-        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             c = gc_chunkqueue_steal_from(mq2);
             if (c.cid != GC_empty_chunk) {
                 gc_mark_chunk(ptls, mq, &c);
@@ -3025,34 +2462,34 @@ void gc_mark_and_steal(jl_ptls_t ptls)
             }
         }
         // Try to steal chunk from master thread
-        if (mq_master != NULL) {
-            c = gc_chunkqueue_steal_from(mq_master);
-            if (c.cid != GC_empty_chunk) {
-                gc_mark_chunk(ptls, mq, &c);
-                goto pop;
-            }
+        c = gc_chunkqueue_steal_from(mq_master);
+        if (c.cid != GC_empty_chunk) {
+            gc_mark_chunk(ptls, mq, &c);
+            goto pop;
         }
         // Try to steal pointer from random GC thread
         for (int i = 0; i < 4 * jl_n_markthreads; i++) {
-            uint32_t v = gc_first_tid + cong(jl_n_markthreads, &ptls->rngseed);
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            int v = gc_random_parallel_collector_thread_id(ptls);
+            jl_ptls_t ptls2 = gc_all_tls_states[v];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             new_obj = gc_ptr_queue_steal_from(mq2);
             if (new_obj != NULL)
                 goto mark;
         }
         // Sequentially walk GC threads to try to steal pointer
-        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             new_obj = gc_ptr_queue_steal_from(mq2);
             if (new_obj != NULL)
                 goto mark;
         }
         // Try to steal pointer from master thread
-        if (mq_master != NULL) {
-            new_obj = gc_ptr_queue_steal_from(mq_master);
-            if (new_obj != NULL)
-                goto mark;
-        }
+        new_obj = gc_ptr_queue_steal_from(mq_master);
+        if (new_obj != NULL)
+            goto mark;
     }
 }
 
@@ -3061,10 +2498,10 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
     assert(ptls != NULL);
     // assume each chunk is worth 256 units of work and each pointer
     // is worth 1 unit of work
-    size_t work = 256 * (jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.bottom) -
-        jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.top));
-    work += (jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.bottom) -
-        jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.top));
+    size_t work = 256 * (jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.chunk_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.chunk_queue.top));
+    work += (jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.ptr_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.ptr_queue.top));
     return work;
 }
 
@@ -3103,12 +2540,13 @@ int gc_should_mark(void)
         }
         int tid = jl_atomic_load_relaxed(&gc_master_tid);
         assert(tid != -1);
+        assert(gc_all_tls_states != NULL);
         size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
-        for (tid = gc_first_tid; tid < gc_first_tid + jl_n_markthreads; tid++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[tid];
-            if (ptls2 == NULL) {
-                continue;
-            }
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
             work += gc_count_work_in_queue(ptls2);
         }
         // if there is a lot of work left, enter the mark loop
@@ -3174,7 +2612,7 @@ void gc_mark_clean_reclaim_sets(void)
         if (ptls2 == NULL) {
             continue;
         }
-        arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set;
+        arraylist_t *reclaim_set2 = &ptls2->gc_tls.mark_queue.reclaim_set;
         ws_array_t *a = NULL;
         while ((a = (ws_array_t *)arraylist_pop(reclaim_set2)) != NULL) {
             free(a->buffer);
@@ -3187,28 +2625,10 @@ void gc_mark_clean_reclaim_sets(void)
         if (ptls2 == NULL) {
             continue;
         }
-        jl_atomic_store_relaxed(&ptls2->mark_queue.ptr_queue.bottom, 0);
-        jl_atomic_store_relaxed(&ptls2->mark_queue.ptr_queue.top, 0);
-        jl_atomic_store_relaxed(&ptls2->mark_queue.chunk_queue.bottom, 0);
-        jl_atomic_store_relaxed(&ptls2->mark_queue.chunk_queue.top, 0);
-    }
-}
-
-static void gc_premark(jl_ptls_t ptls2)
-{
-    arraylist_t *remset = ptls2->heap.remset;
-    ptls2->heap.remset = ptls2->heap.last_remset;
-    ptls2->heap.last_remset = remset;
-    ptls2->heap.remset->len = 0;
-    ptls2->heap.remset_nptr = 0;
-    // avoid counting remembered objects
-    // in `perm_scanned_bytes`
-    size_t len = remset->len;
-    void **items = remset->items;
-    for (size_t i = 0; i < len; i++) {
-        jl_value_t *item = (jl_value_t *)items[i];
-        objprofile_count(jl_typeof(item), 2, 0);
-        jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.ptr_queue.bottom, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.ptr_queue.top, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.chunk_queue.bottom, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.chunk_queue.top, 0);
     }
 }
 
@@ -3255,14 +2675,29 @@ static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
     }
 }
 
-static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
+static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
 {
-    size_t len = ptls2->heap.last_remset->len;
-    void **items = ptls2->heap.last_remset->items;
+    void **items = ptls2->gc_tls.heap.remset.items;
+    size_t len = ptls2->gc_tls.heap.remset.len;
     for (size_t i = 0; i < len; i++) {
-        // Objects in the `remset` are already marked,
-        // so a `gc_try_claim_and_push` wouldn't work here
-        gc_mark_outrefs(ptls, &ptls->mark_queue, (jl_value_t *)items[i], 1);
+        void *_v = items[i];
+        jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
+        jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
+        gc_ptr_queue_push(mq, v);
+    }
+    // Don't forget to clear the remset
+    ptls2->gc_tls.heap.remset.len = 0;
+    ptls2->gc_tls.heap.remset_nptr = 0;
+}
+
+static void gc_check_all_remsets_are_empty(void)
+{
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            assert(ptls2->gc_tls.heap.remset.len == 0);
+            assert(ptls2->gc_tls.heap.remset_nptr == 0);
+        }
     }
 }
 
@@ -3407,13 +2842,6 @@ JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
     return num;
 }
 
-JL_DLLEXPORT void jl_gc_reset_stats(void)
-{
-    gc_num.max_pause = 0;
-    gc_num.max_memory = 0;
-    gc_num.max_time_to_safepoint = 0;
-}
-
 // TODO: these were supposed to be thread local
 JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT
 {
@@ -3441,7 +2869,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
     for (int i = 0; i < n_threads; i++) {
         jl_ptls_t ptls2 = all_tls_states[i];
         if (ptls2 != NULL) {
-            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_num.pool_live_bytes);
+            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes);
         }
     }
     return pool_live_bytes;
@@ -3496,7 +2924,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // so that the sweep shows a downward trend in memory usage.
     jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);
 
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
 
     uint64_t gc_start_time = jl_hrtime();
     uint64_t mutator_time = gc_end_time == 0 ? old_mut_time : gc_start_time - gc_end_time;
@@ -3506,15 +2934,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     JL_PROBE_GC_MARK_BEGIN();
     {
         JL_TIMING(GC, GC_Mark);
-
-        // 1. fix GC bits of objects in the remset.
-        assert(gc_n_threads);
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            if (ptls2 != NULL)
-                gc_premark(ptls2);
-        }
-
         assert(gc_n_threads);
         int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
         for (int t_i = 0; t_i < gc_n_threads; t_i++) {
@@ -3522,21 +2941,23 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
             jl_ptls_t ptls_dest = ptls;
             jl_gc_markqueue_t *mq_dest = mq;
             if (!single_threaded_mark) {
-                ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads];
-                mq_dest = &ptls_dest->mark_queue;
+                int dest_tid = gc_ith_parallel_collector_thread_id(t_i % jl_n_markthreads);
+                ptls_dest = gc_all_tls_states[dest_tid];
+                mq_dest = &ptls_dest->gc_tls.mark_queue;
             }
             if (ptls2 != NULL) {
-                // 2.1. mark every thread local root
+                // 1.1. mark every thread local root
                 gc_queue_thread_local(mq_dest, ptls2);
-                // 2.2. mark any managed objects in the backtrace buffer
+                // 1.2. mark any managed objects in the backtrace buffer
                 // TODO: treat these as roots for gc_heap_snapshot_record
                 gc_queue_bt_buf(mq_dest, ptls2);
-                // 2.3. mark every object in the `last_remsets` and `rem_binding`
-                gc_queue_remset(ptls_dest, ptls2);
+                // 1.3. mark every object in the remset
+                gc_queue_remset(mq_dest, ptls2);
             }
         }
+        gc_check_all_remsets_are_empty();
 
-        // 3. walk roots
+        // 2. walk roots
         gc_mark_roots(mq);
         if (gc_cblist_root_scanner) {
             gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
@@ -3546,7 +2967,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_mark_loop_barrier();
         gc_mark_clean_reclaim_sets();
 
-        // 4. check for objects to finalize
+        // 3. check for objects to finalize
         clear_weak_refs();
         // Record the length of the marked list since we need to
         // mark the object moved to the marked list from the
@@ -3595,18 +3016,16 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // marking is over
 
     // Flush everything in mark cache
-    gc_sync_all_caches_nolock(ptls);
+    gc_sync_all_caches(ptls);
 
 
     gc_verify(ptls);
     gc_stats_all_pool();
     gc_stats_big_obj();
-    objprofile_printall();
-    objprofile_reset();
     gc_num.total_allocd += gc_num.allocd;
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
-    // 5. next collection decision
+    // 4. next collection decision
     int remset_nptr = 0;
     int sweep_full = next_sweep_full;
     int recollect = 0;
@@ -3614,7 +3033,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            remset_nptr += ptls2->heap.remset_nptr;
+            remset_nptr += ptls2->gc_tls.heap.remset_nptr;
     }
     (void)remset_nptr; //Use this information for something?
 
@@ -3636,7 +3055,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         promoted_bytes = 0;
     }
     scanned_bytes = 0;
-    // 6. start sweeping
+    // 5. start sweeping
     uint64_t start_sweep_time = jl_hrtime();
     JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
     {
@@ -3650,8 +3069,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 #endif
         current_sweep_full = sweep_full;
         sweep_weak_refs();
-        sweep_stack_pools();
-        gc_sweep_foreign_objs();
         gc_sweep_other(ptls, sweep_full);
         gc_scrub();
         gc_verify_tags();
@@ -3772,7 +3189,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (heap_size > user_max || thrashing)
         under_pressure = 1;
     // sweeping is over
-    // 7. if it is a quick sweep, put back the remembered objects in queued state
+    // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
     assert(gc_n_threads);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
@@ -3780,31 +3197,30 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         if (ptls2 == NULL)
             continue;
         if (!sweep_full) {
-            for (int i = 0; i < ptls2->heap.remset->len; i++) {
-                void *ptr = ptls2->heap.remset->items[i];
+            for (int i = 0; i < ptls2->gc_tls.heap.remset.len; i++) {
+                void *ptr = ptls2->gc_tls.heap.remset.items[i];
                 jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
             }
         }
         else {
-            ptls2->heap.remset->len = 0;
+            ptls2->gc_tls.heap.remset.len = 0;
         }
         // free empty GC state for threads that have exited
-        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL &&
-            (ptls->tid < gc_first_tid || ptls2->tid >= gc_first_tid + jl_n_gcthreads)) {
-            jl_thread_heap_t *heap = &ptls2->heap;
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            // GC threads should never exit
+            assert(!gc_is_parallel_collector_thread(t_i));
+            assert(!gc_is_concurrent_collector_thread(t_i));
+            jl_thread_heap_t *heap = &ptls2->gc_tls.heap;
             if (heap->weak_refs.len == 0)
                 small_arraylist_free(&heap->weak_refs);
             if (heap->live_tasks.len == 0)
                 small_arraylist_free(&heap->live_tasks);
-            if (heap->remset->len == 0)
-                arraylist_free(heap->remset);
-            if (heap->last_remset->len == 0)
-                arraylist_free(heap->last_remset);
+            if (heap->remset.len == 0)
+                arraylist_free(&heap->remset);
             if (ptls2->finalizers.len == 0)
                 arraylist_free(&ptls2->finalizers);
-            if (ptls2->sweep_objs.len == 0)
-                arraylist_free(&ptls2->sweep_objs);
-            gc_move_to_global_page_pool(&ptls2->page_metadata_buffered);
+            if (ptls2->gc_tls.sweep_objs.len == 0)
+                arraylist_free(&ptls2->gc_tls.sweep_objs);
         }
     }
 
@@ -3833,7 +3249,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     live_bytes += -gc_num.freed + gc_num.allocd;
     jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
 
-    gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
+    gc_time_summary(sweep_full, gc_start_time, gc_end_time, gc_num.freed,
                     live_bytes, gc_num.interval, pause,
                     gc_num.time_to_safepoint,
                     gc_num.mark_time, gc_num.sweep_time);
@@ -3867,8 +3283,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
     if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
-        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
         static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
         jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
@@ -3941,7 +3357,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
-    if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
+    if (!ptls->finalizers_inhibited && ptls->locks.len == 0 && ptls->engine_nqueued == 0) {
         JL_TIMING(GC, GC_Finalizers);
         run_finalizers(ct, 0);
     }
@@ -3980,7 +3396,7 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
-    jl_thread_heap_t *heap = &ptls->heap;
+    jl_thread_heap_t *heap = &ptls->gc_tls.heap;
     jl_gc_pool_t *p = heap->norm_pools;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
         p[i].osize = jl_gc_sizeclasses[i];
@@ -3993,21 +3409,19 @@ void jl_init_thread_heap(jl_ptls_t ptls)
         small_arraylist_new(&heap->free_stacks[i], 0);
     heap->mallocarrays = NULL;
     heap->mafreelist = NULL;
-    heap->big_objects = NULL;
-    heap->remset = &heap->_remset[0];
-    heap->last_remset = &heap->_remset[1];
-    arraylist_new(heap->remset, 0);
-    arraylist_new(heap->last_remset, 0);
+    heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
+    heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
+    arraylist_new(&heap->remset, 0);
     arraylist_new(&ptls->finalizers, 0);
-    arraylist_new(&ptls->sweep_objs, 0);
+    arraylist_new(&ptls->gc_tls.sweep_objs, 0);
 
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
+    jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
-    gc_cache->nbig_obj = 0;
 
     // Initialize GC mark-queue
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq->chunk_queue;
     ws_array_t *wsa = create_ws_array(GC_CHUNK_QUEUE_INIT_SIZE, sizeof(jl_gc_chunk_t));
     jl_atomic_store_relaxed(&cq->top, 0);
@@ -4020,13 +3434,13 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     jl_atomic_store_relaxed(&q->array, wsa2);
     arraylist_new(&mq->reclaim_set, 32);
 
-    memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+    memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num));
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
 }
 
 void jl_free_thread_gc_state(jl_ptls_t ptls)
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq->chunk_queue;
     free_ws_array(jl_atomic_load_relaxed(&cq->array));
     jl_atomic_store_relaxed(&cq->array, NULL);
@@ -4036,18 +3450,116 @@ void jl_free_thread_gc_state(jl_ptls_t ptls)
     arraylist_free(&mq->reclaim_set);
 }
 
+void jl_start_gc_threads(void)
+{
+    int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    int ngcthreads = jl_n_gcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
+    uv_thread_t uvtid;
+    for (int i = nmutator_threads; i < nthreads; ++i) {
+        jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
+        t->tid = i;
+        t->barrier = &thread_init_done;
+        if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
+            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
+        }
+        else {
+            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
+        }
+        uv_thread_detach(&uvtid);
+    }
+}
+
+STATIC_INLINE int may_mark(void) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&gc_n_threads_marking) > 0);
+}
+
+STATIC_INLINE int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_tls.gc_sweeps_requested) > 0);
+}
+
+// parallel gc thread function
+void jl_parallel_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_PARALLEL_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        uv_mutex_lock(&gc_threads_lock);
+        while (!may_mark() && !may_sweep(ptls)) {
+            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
+        }
+        uv_mutex_unlock(&gc_threads_lock);
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+        gc_mark_loop_parallel(ptls, 0);
+        if (may_sweep(ptls)) {
+            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+            gc_sweep_pool_parallel(ptls);
+            jl_atomic_fetch_add(&ptls->gc_tls.gc_sweeps_requested, -1);
+        }
+    }
+}
+
+// concurrent gc thread function
+void jl_concurrent_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_CONCURRENT_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_CONCURRENT_COLLECTOR_THREAD);
+        uv_sem_wait(&gc_sweep_assists_needed);
+        gc_free_pages();
+    }
+}
+
 // System-wide initializations
 void jl_gc_init(void)
 {
     JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
     JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
     uv_mutex_init(&page_profile_lock);
-    uv_mutex_init(&gc_cache_lock);
     uv_mutex_init(&gc_perm_lock);
+    uv_mutex_init(&gc_pages_lock);
     uv_mutex_init(&gc_threads_lock);
     uv_cond_init(&gc_threads_cond);
     uv_sem_init(&gc_sweep_assists_needed, 0);
     uv_mutex_init(&gc_queue_observer_lock);
+    void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag
+    uintptr_t addr = (uintptr_t)_addr;
+    gc_bigval_sentinel_tag = addr;
+    oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag;
 
     jl_gc_init_page();
     jl_gc_debug_init();
@@ -4056,7 +3568,6 @@ void jl_gc_init(void)
     arraylist_new(&to_finalize, 0);
     jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval);
     gc_num.interval = default_collect_interval;
-    last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
     gc_num.max_pause = 0;
     gc_num.max_memory = 0;
@@ -4077,20 +3588,14 @@ void jl_gc_init(void)
             hint = min_heap_size_hint;
         jl_gc_set_max_memory(hint - mem_reserve);
     }
-
-    t_start = jl_hrtime();
 }
 
 JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
 {
-    if (max_mem > 0
-        && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) {
-        #ifdef _P64
-        max_total_memory = max_mem;
-        #else
-        max_total_memory = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
-        #endif
-    }
+#ifdef _P32
+    max_mem = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
+#endif
+    max_total_memory = max_mem;
 }
 
 JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
@@ -4098,12 +3603,6 @@ JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
     return max_total_memory;
 }
 
-// callback for passing OOM errors from gmp
-JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
-{
-    jl_throw(jl_memory_exception);
-}
-
 // allocation wrappers that track allocation and let collection run
 
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
@@ -4114,10 +3613,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
     if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1);
         jl_batch_accum_heap_size(ptls, sz);
     }
     return data;
@@ -4131,10 +3630,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
     if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1);
         jl_batch_accum_heap_size(ptls, sz * nm);
     }
     return data;
@@ -4159,10 +3658,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         if (!(sz < old))
-            jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-                jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
-        jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
+            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+                jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old));
+        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1);
 
         int64_t diff = sz - old;
         if (diff < 0) {
@@ -4250,10 +3749,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     if (b == NULL)
         jl_throw(jl_memory_exception);
 
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1);
     jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
@@ -4264,98 +3763,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     return b;
 }
 
-static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
-                                 int isaligned, jl_value_t *owner, int8_t can_collect)
-{
-    if (can_collect)
-        maybe_collect(ptls);
-    int is_old_marked = jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED;
-    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-
-    int last_errno = errno;
-#ifdef _OS_WINDOWS_
-    DWORD last_error = GetLastError();
-#endif
-    void *b;
-    if (isaligned)
-        b = realloc_cache_align(d, allocsz, oldsz);
-    else
-        b = realloc(d, allocsz);
-    if (b == NULL)
-        jl_throw(jl_memory_exception);
-#ifdef _OS_WINDOWS_
-    SetLastError(last_error);
-#endif
-    errno = last_errno;
-    // gc_managed_realloc_ is currently used exclusively for resizing array buffers.
-    if (is_old_marked) {
-        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
-        inc_live_bytes(allocsz - oldsz);
-    }
-    else if (!(allocsz < oldsz))
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-
-    int64_t diff = allocsz - oldsz;
-    if (diff < 0) {
-        jl_batch_accum_free_size(ptls, -diff);
-    }
-    else {
-        jl_batch_accum_heap_size(ptls, diff);
-    }
-    if (allocsz > oldsz) {
-        maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);
-    }
-    return b;
-}
-
-JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
-                                         int isaligned, jl_value_t *owner)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1);
-}
-
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
-{
-    size_t len = jl_string_len(s);
-    if (sz <= len) return s;
-    jl_taggedvalue_t *v = jl_astaggedvalue(s);
-    size_t strsz = len + sizeof(size_t) + 1;
-    if (strsz <= GC_MAX_SZCLASS ||
-        // TODO: because of issue #17971 we can't resize old objects
-        gc_marked(v->bits.gc)) {
-        // pool allocated; can't be grown in place so allocate a new object.
-        jl_value_t *snew = jl_alloc_string(sz);
-        memcpy(jl_string_data(snew), jl_string_data(s), len);
-        return snew;
-    }
-    size_t newsz = sz + sizeof(size_t) + 1;
-    size_t offs = sizeof(bigval_t);
-    size_t oldsz = LLT_ALIGN(strsz + offs, JL_CACHE_BYTE_ALIGNMENT);
-    size_t allocsz = LLT_ALIGN(newsz + offs, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-    bigval_t *hdr = bigval_header(v);
-    jl_ptls_t ptls = jl_current_task->ptls;
-    maybe_collect(ptls); // don't want this to happen during jl_gc_managed_realloc
-    gc_big_object_unlink(hdr);
-    // TODO: this is not safe since it frees the old pointer. ideally we'd like
-    // the old pointer to be left alone if we can't grow in place.
-    // for now it's up to the caller to make sure there are no references to the
-    // old pointer.
-    bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0);
-    newbig->sz = allocsz;
-    gc_big_object_link(newbig, &ptls->heap.big_objects);
-    jl_value_t *snew = jl_valueof(&newbig->header);
-    *(size_t*)snew = sz;
-    return snew;
-}
-
 // Perm gen allocator
 // 2M pool
 #define GC_PERM_POOL_SIZE (2 * 1024 * 1024)
@@ -4400,7 +3807,7 @@ STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned o
 }
 
 // **NOT** a safepoint
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
+void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT
 {
     // The caller should have acquired `gc_perm_lock`
     assert(align < GC_PERM_POOL_LIMIT);
@@ -4445,15 +3852,16 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
     return p;
 }
 
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
+jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_add_finalizer_th(ptls, v, f);
-}
-
-JL_DLLEXPORT void jl_finalize(jl_value_t *o)
-{
-    jl_finalize_th(jl_current_task, o);
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
+                                                              sizeof(void*) % align);
+    uintptr_t tag = (uintptr_t)ty;
+    o->header = tag | GC_OLD_MARKED;
+    return jl_valueof(o);
 }
 
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
@@ -4468,30 +3876,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
     return jl_gc_alloc(ptls, sz, NULL);
 }
 
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, 0, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*), NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL);
-}
-
 JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
 {
     if (jl_is_initialized()) {
@@ -4546,7 +3930,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             goto valid_object;
         }
         jl_gc_pool_t *pool =
-            gc_all_tls_states[meta->thread_n]->heap.norm_pools +
+            gc_all_tls_states[meta->thread_n]->gc_tls.heap.norm_pools +
             meta->pool_n;
         if (meta->fl_begin_offset == UINT16_MAX) {
             // case 2: this is a page on the newpages list
@@ -4626,7 +4010,7 @@ JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
 
 JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
 {
-    arraylist_push(&ptls->sweep_objs, obj);
+    arraylist_push(&ptls->gc_tls.sweep_objs, obj);
 }
 
 #ifdef __cplusplus
diff --git a/src/gc.h b/src/gc-stock.h
similarity index 86%
rename from src/gc.h
rename to src/gc-stock.h
index 4bb8828910e6c..45c93bf4289ae 100644
--- a/src/gc.h
+++ b/src/gc-stock.h
@@ -18,16 +18,8 @@
 #include "julia.h"
 #include "julia_threads.h"
 #include "julia_internal.h"
-#include "threading.h"
-#ifndef _OS_WINDOWS_
-#include <sys/mman.h>
-#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-#endif
 #include "julia_assert.h"
-#include "gc-heap-snapshot.h"
-#include "gc-alloc-profiler.h"
+#include "threading.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -41,9 +33,6 @@ extern "C" {
 #define GC_PAGE_SZ (1 << GC_PAGE_LG2)
 #define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))
 
-#define jl_malloc_tag ((void*)0xdeadaa01)
-#define jl_singleton_tag ((void*)0xdeadaa02)
-
 // Used by GC_DEBUG_ENV
 typedef struct {
     uint64_t num;
@@ -62,34 +51,6 @@ typedef struct {
     jl_alloc_num_t print;
 } jl_gc_debug_env_t;
 
-// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
-typedef struct {
-    int64_t     allocd;
-    int64_t     deferred_alloc;
-    int64_t     freed;
-    uint64_t    malloc;
-    uint64_t    realloc;
-    uint64_t    poolalloc;
-    uint64_t    bigalloc;
-    uint64_t    freecall;
-    uint64_t    total_time;
-    uint64_t    total_allocd;
-    size_t      interval;
-    int         pause;
-    int         full_sweep;
-    uint64_t    max_pause;
-    uint64_t    max_memory;
-    uint64_t    time_to_safepoint;
-    uint64_t    max_time_to_safepoint;
-    uint64_t    total_time_to_safepoint;
-    uint64_t    sweep_time;
-    uint64_t    mark_time;
-    uint64_t    total_sweep_time;
-    uint64_t    total_mark_time;
-    uint64_t    last_full_sweep;
-    uint64_t    last_incremental_sweep;
-} jl_gc_num_t;
-
 // Array chunks (work items representing suffixes of
 // large arrays of pointers left to be marked)
 
@@ -118,11 +79,15 @@ typedef struct _jl_gc_chunk_t {
 #define GC_PTR_QUEUE_INIT_SIZE (1 << 18)    // initial size of queue of `jl_value_t *`
 #define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14)  // initial size of chunk-queue
 
+#define GC_REMSET_PTR_TAG (0x1)             // lowest bit of `jl_value_t *` is tagged if it's in the remset
+
 // layout for big (>2k) objects
 
+extern uintptr_t gc_bigval_sentinel_tag;
+
 JL_EXTENSION typedef struct _bigval_t {
     struct _bigval_t *next;
-    struct _bigval_t **prev; // pointer to the next field of the prev entry
+    struct _bigval_t *prev;
     size_t sz;
 #ifdef _P64 // Add padding so that the value is 64-byte aligned
     // (8 pointers of 8 bytes each) - (4 other pointers in struct)
@@ -141,17 +106,16 @@ JL_EXTENSION typedef struct _bigval_t {
     // must be 64-byte aligned here, in 32 & 64 bit modes
 } bigval_t;
 
-// data structure for tracking malloc'd arrays and genericmemory.
-
-typedef struct _mallocarray_t {
-    jl_value_t *a;
-    struct _mallocarray_t *next;
-} mallocarray_t;
+// data structure for tracking malloc'd genericmemory.
+typedef struct _mallocmemory_t {
+    jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory
+    struct _mallocmemory_t *next;
+} mallocmemory_t;
 
 // pool page metadata
 typedef struct _jl_gc_pagemeta_t {
     // next metadata structure in per-thread list
-    // or in one of the `jl_gc_global_page_pool_t`
+    // or in one of the `jl_gc_page_stack_t`
     struct _jl_gc_pagemeta_t *next;
     // index of pool that owns this page
     uint8_t pool_n;
@@ -437,80 +401,118 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
 }
 #endif
 
-extern jl_gc_num_t gc_num;
-extern bigval_t *big_objects_marked;
-extern arraylist_t finalizer_list_marked;
-extern arraylist_t to_finalize;
+extern bigval_t *oldest_generation_of_bigvals;
 extern int64_t buffered_pages;
 extern int gc_first_tid;
-extern int gc_n_threads;
-extern jl_ptls_t* gc_all_tls_states;
 extern gc_heapstatus_t gc_heap_stats;
 
-STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_first_parallel_collector_thread_id(void) JL_NOTSAFEPOINT
 {
-    return container_of(o, bigval_t, header);
+    if (jl_n_markthreads == 0) {
+        return 0;
+    }
+    return gc_first_tid;
 }
 
-STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_last_parallel_collector_thread_id(void) JL_NOTSAFEPOINT
 {
-    return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
+    if (jl_n_markthreads == 0) {
+        return -1;
+    }
+    return gc_first_tid + jl_n_markthreads - 1;
 }
 
-STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT
 {
-    return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
+    assert(i >= 0 && i < jl_n_markthreads);
+    return gc_first_tid + i;
 }
 
-STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT
 {
-    return (bits & GC_MARKED) != 0;
+    return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id();
 }
 
-STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT
 {
-    return (bits & GC_OLD) != 0;
+    if (jl_n_sweepthreads == 0) {
+        return 0;
+    }
+    int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id();
+    int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1;
+    return tid == concurrent_collector_thread_id;
 }
 
-STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    return (tag & ~(uintptr_t)3) | bits;
+    assert(jl_n_markthreads > 0);
+    int v = gc_first_tid + (int)cong(jl_n_markthreads, &ptls->rngseed); // cong is [0, n)
+    assert(v >= gc_first_tid && v <= gc_last_parallel_collector_thread_id());
+    return v;
 }
 
-STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_parallel_collector_threads_enabled(void) JL_NOTSAFEPOINT
 {
-    return ((uintptr_t)v) & mask;
+    return jl_n_markthreads > 0;
 }
 
-STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_check_ptls_of_parallel_collector_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    return (void*)(((uintptr_t)v) & ~mask);
+    (void)ptls;
+    assert(gc_parallel_collector_threads_enabled());
+    assert(ptls != NULL);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
 }
 
-NOINLINE uintptr_t gc_get_stack_ptr(void);
+STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
+{
+    return container_of(o, bigval_t, header);
+}
+
+STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+{
+    return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
+}
 
-STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT
+STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+{
+    return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
+}
+
+FORCE_INLINE void gc_big_object_unlink(const bigval_t *node) JL_NOTSAFEPOINT
 {
-    *hdr->prev = hdr->next;
-    if (hdr->next) {
-        hdr->next->prev = hdr->prev;
+    assert(node != oldest_generation_of_bigvals);
+    assert(node->header != gc_bigval_sentinel_tag);
+    assert(node->prev != NULL);
+    if (node->next != NULL) {
+        node->next->prev = node->prev;
     }
+    node->prev->next = node->next;
 }
 
-STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFEPOINT
+FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL_NOTSAFEPOINT
 {
-    hdr->next = *list;
-    hdr->prev = list;
-    if (*list)
-        (*list)->prev = &hdr->next;
-    *list = hdr;
+    assert(sentinel_node != NULL);
+    assert(sentinel_node->header == gc_bigval_sentinel_tag);
+    assert(sentinel_node->prev == NULL);
+    assert(node->header != gc_bigval_sentinel_tag);
+    // a new node gets linked in at the head of the list
+    node->next = sentinel_node->next;
+    node->prev = sentinel_node;
+    if (sentinel_node->next != NULL) {
+        sentinel_node->next->prev = node;
+    }
+    sentinel_node->next = node;
 }
 
+extern uv_mutex_t gc_perm_lock;
 extern uv_mutex_t gc_threads_lock;
 extern uv_cond_t gc_threads_cond;
 extern uv_sem_t gc_sweep_assists_needed;
 extern _Atomic(int) gc_n_threads_marking;
 extern _Atomic(int) gc_n_threads_sweeping;
+extern _Atomic(int) n_threads_running;
+extern uv_barrier_t thread_init_done;
 void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
 void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
 void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
@@ -519,11 +521,12 @@ void gc_mark_loop_serial(jl_ptls_t ptls);
 void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
 void gc_sweep_pool_parallel(jl_ptls_t ptls);
 void gc_free_pages(void);
-void sweep_stack_pools(void);
+void sweep_stack_pools(void) JL_NOTSAFEPOINT;
 void jl_gc_debug_init(void);
 
 // GC pages
 
+extern uv_mutex_t gc_pages_lock;
 void jl_gc_init_page(void) JL_NOTSAFEPOINT;
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT;
 void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT;
@@ -694,24 +697,6 @@ static inline void gc_scrub(void)
 }
 #endif
 
-#ifdef OBJPROFILE
-void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT;
-void objprofile_printall(void);
-void objprofile_reset(void);
-#else
-static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT
-{
-}
-
-static inline void objprofile_printall(void)
-{
-}
-
-static inline void objprofile_reset(void)
-{
-}
-#endif
-
 #ifdef MEMPROFILE
 void gc_stats_all_pool(void);
 void gc_stats_big_obj(void);
@@ -723,8 +708,6 @@ void gc_stats_big_obj(void);
 // For debugging
 void gc_count_pool(void);
 
-size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
-
 JL_DLLEXPORT void jl_enable_gc_logging(int enable);
 JL_DLLEXPORT int jl_is_gc_logging_enabled(void);
 JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void);
diff --git a/src/gc-tls.h b/src/gc-tls.h
new file mode 100644
index 0000000000000..9e4b09404db84
--- /dev/null
+++ b/src/gc-tls.h
@@ -0,0 +1,92 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Meant to be included in "julia_threads.h"
+#ifndef JL_GC_TLS_H
+#define JL_GC_TLS_H
+
+#include "julia_atomics.h"
+#include "work-stealing-queue.h"
+// GC threading ------------------------------------------------------------------
+
+#include "arraylist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    struct _jl_taggedvalue_t *freelist; // root of list of free objects
+    struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
+    uint16_t osize; // size of objects in this pool
+} jl_gc_pool_t;
+
+typedef struct {
+    // variable for tracking weak references
+    small_arraylist_t weak_refs;
+    // live tasks started on this thread
+    // that are holding onto a stack from the pool
+    small_arraylist_t live_tasks;
+
+    // variables for tracking malloc'd arrays
+    struct _mallocmemory_t *mallocarrays;
+    struct _mallocmemory_t *mafreelist;
+
+    // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects
+    struct _bigval_t *young_generation_of_bigvals;
+
+    // lower bound of the number of pointers inside remembered values
+    int remset_nptr;
+    // remembered set
+    arraylist_t remset;
+
+    // variables for allocating objects from pools
+#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
+    jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
+
+#define JL_N_STACK_POOLS 16
+    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
+} jl_thread_heap_t;
+
+typedef struct {
+    _Atomic(int64_t) allocd;
+    _Atomic(int64_t) pool_live_bytes;
+    _Atomic(uint64_t) malloc;
+    _Atomic(uint64_t) realloc;
+    _Atomic(uint64_t) poolalloc;
+    _Atomic(uint64_t) bigalloc;
+    _Atomic(int64_t) free_acc;
+    _Atomic(uint64_t) alloc_acc;
+} jl_thread_gc_num_t;
+
+typedef struct {
+    ws_queue_t chunk_queue;
+    ws_queue_t ptr_queue;
+    arraylist_t reclaim_set;
+} jl_gc_markqueue_t;
+
+typedef struct {
+    // thread local increment of `perm_scanned_bytes`
+    size_t perm_scanned_bytes;
+    // thread local increment of `scanned_bytes`
+    size_t scanned_bytes;
+} jl_gc_mark_cache_t;
+
+typedef struct {
+    _Atomic(struct _jl_gc_pagemeta_t *) bottom;
+} jl_gc_page_stack_t;
+
+typedef struct {
+    jl_thread_heap_t heap;
+    jl_gc_page_stack_t page_metadata_allocd;
+    jl_thread_gc_num_t gc_num;
+    jl_gc_markqueue_t mark_queue;
+    jl_gc_mark_cache_t gc_cache;
+    _Atomic(size_t) gc_sweeps_requested;
+    arraylist_t sweep_objs;
+} jl_gc_tls_states_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_H
diff --git a/src/genericmemory.c b/src/genericmemory.c
index f0e7b695f1122..ea52fca66ba48 100644
--- a/src/genericmemory.c
+++ b/src/genericmemory.c
@@ -161,9 +161,9 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void
     m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype);
     m->ptr = data;
     m->length = nel;
-    jl_genericmemory_data_owner_field(m) = NULL;
-    int isaligned = 0;  // TODO: allow passing memalign'd buffers
+    jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL;
     if (own_buffer) {
+        int isaligned = 0;  // TODO: allow passing memalign'd buffers
         jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
         jl_gc_count_allocd(nel*elsz);
     }
@@ -208,8 +208,11 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
         JL_GC_PUSH1(&o);
         jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
         JL_GC_POP();
+        if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon
+            jl_gc_count_freed(mlength);
         return str;
     }
+    // n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object
     return jl_pchar_to_string((const char*)m->ptr, len);
 }
 
diff --git a/src/gf.c b/src/gf.c
index 88890b7e5f203..970cb62b8a862 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -95,7 +95,7 @@ void jl_call_tracer(tracer_cb callback, jl_value_t *tracee)
     JL_CATCH {
         ct->ptls->in_pure_callback = last_in;
         jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: tracer callback function threw an error:\n");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
     }
@@ -113,7 +113,7 @@ static int8_t jl_cachearg_offset(jl_methtable_t *mt)
 
 static uint_t speccache_hash(size_t idx, jl_value_t *data)
 {
-    jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
+    jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_value_t *sig = ml->specTypes;
     if (jl_is_unionall(sig))
         sig = jl_unwrap_unionall(sig);
@@ -122,6 +122,8 @@ static uint_t speccache_hash(size_t idx, jl_value_t *data)
 
 static int speccache_eq(size_t idx, const void *ty, jl_value_t *data, uint_t hv)
 {
+    if (idx >= jl_svec_len(data))
+        return 0; // We got a OOB access, probably due to a data race
     jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
     jl_value_t *sig = ml->specTypes;
     if (ty == sig)
@@ -320,7 +322,7 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 
     jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
         (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
-        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+        0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL);
     jl_mi_cache_insert(mi, codeinst);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr1, fptr);
     jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_args);
@@ -338,7 +340,7 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 // returns the inferred source, and may cache the result in mi
 // if successful, also updates the mi argument to describe the validity of this src
 // if inference doesn't occur (or can't finish), returns NULL instead
-jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force, uint8_t source_mode)
+jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, uint8_t source_mode)
 {
     if (jl_typeinf_func == NULL)
         return NULL;
@@ -354,7 +356,7 @@ jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int fo
 
     jl_code_instance_t *ci = NULL;
 #ifdef ENABLE_INFERENCE
-    if (mi->inInference && !force)
+    if (jl_engine_hasreserved(mi, jl_nothing)) // don't recur on a thread on the same MethodInstance--force it to interpret it until the inference has finished
         return NULL;
     JL_TIMING(INFERENCE, INFERENCE);
     jl_value_t **fargs;
@@ -380,7 +382,6 @@ jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int fo
     ct->ptls->in_pure_callback = 0;
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
-    mi->inInference = 1;
     // first bit is for reentrant timing,
     // so adding 1 to the bit above performs
     // inference reentrancy counter addition.
@@ -393,7 +394,7 @@ jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int fo
         ci = (jl_code_instance_t*)jl_apply(fargs, 4);
     }
     JL_CATCH {
-        jl_value_t *e = jl_current_exception();
+        jl_value_t *e = jl_current_exception(ct);
         jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: during type inference of\n");
         jl_static_show_func_sig((JL_STREAM*)STDERR_FILENO, (jl_value_t*)mi->specTypes);
         jl_printf((JL_STREAM*)STDERR_FILENO, "\nEncountered ");
@@ -415,7 +416,6 @@ jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int fo
     ct->world_age = last_age;
     ct->reentrant_timing -= 0b10;
     ct->ptls->in_pure_callback = last_pure;
-    mi->inInference = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -430,6 +430,44 @@ jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int fo
     return ci;
 }
 
+// Attempt to run `Core.Compiler.code_typed` on the lambda "mi"
+JL_DLLEXPORT jl_code_info_t *jl_gdbcodetyped1(jl_method_instance_t *mi, size_t world)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_code_info_t *ci = NULL;
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 4);
+    jl_module_t *CC = (jl_module_t*)jl_get_global(jl_core_module, jl_symbol("Compiler"));
+    if (CC != NULL && jl_is_module(CC)) {
+        fargs[0] = jl_get_global(CC, jl_symbol("NativeInterpreter"));;
+        fargs[1] = jl_box_ulong(world);
+        fargs[1] = jl_apply(fargs, 2);
+        fargs[0] = jl_get_global(CC, jl_symbol("typeinf_code"));
+        fargs[2] = (jl_value_t*)mi;
+        fargs[3] = jl_true;
+        ci = (jl_code_info_t*)jl_apply(fargs, 4);
+    }
+    ct->world_age = last_age;
+    ct->ptls->in_pure_callback = last_pure;
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    if (ci && !jl_is_code_info(ci)) {
+        ci = NULL;
+    }
+    JL_GC_POP();
+    return ci;
+}
+
 JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
 {
     jl_task_t *ct = jl_current_task;
@@ -442,7 +480,7 @@ JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
 
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
-        size_t min_world, size_t max_world)
+        size_t min_world, size_t max_world, jl_debuginfo_t *edges)
 {
     jl_value_t *owner = jl_nothing; // TODO: owner should be arg
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
@@ -451,25 +489,45 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
             jl_atomic_load_relaxed(&codeinst->max_world) == max_world &&
             jl_egal(codeinst->owner, owner) &&
             jl_egal(codeinst->rettype, rettype)) {
-            return codeinst;
+            if (edges == NULL)
+                return codeinst;
+            jl_debuginfo_t *debuginfo = jl_atomic_load_relaxed(&codeinst->debuginfo);
+            if (edges == debuginfo)
+                return codeinst;
+            if (debuginfo == NULL && jl_atomic_cmpswap_relaxed(&codeinst->debuginfo, &debuginfo, edges))
+                return codeinst;
+            if (debuginfo && jl_egal((jl_value_t*)debuginfo, (jl_value_t*)edges))
+                return codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     codeinst = jl_new_codeinst(
         mi, owner, rettype, (jl_value_t*)jl_any_type, NULL, NULL,
-        0, min_world, max_world, 0, 0, jl_nothing, 0);
+        0, min_world, max_world, 0, jl_nothing, 0, edges);
     jl_mi_cache_insert(mi, codeinst);
     return codeinst;
 }
 
+JL_DLLEXPORT int jl_mi_cache_has_ci(jl_method_instance_t *mi,
+                                     jl_code_instance_t *ci)
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst) {
+        if (codeinst == ci)
+            return 1;
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    return 0;
+}
+
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *owner,
         jl_value_t *rettype, jl_value_t *exctype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *analysis_results,
-        uint8_t relocatability
-        /*, jl_array_t *edges, int absolute_max*/)
+        uint32_t effects, jl_value_t *analysis_results,
+        uint8_t relocatability,
+        jl_debuginfo_t *edges /* , int absolute_max*/)
 {
     jl_task_t *ct = jl_current_task;
     assert(min_world <= max_world && "attempting to set invalid world constraints");
@@ -482,10 +540,10 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     codeinst->rettype = rettype;
     codeinst->exctype = exctype;
     jl_atomic_store_release(&codeinst->inferred, inferred);
-    //codeinst->edges = NULL;
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
+    jl_atomic_store_relaxed(&codeinst->debuginfo, (jl_value_t*)edges == jl_nothing ? NULL : edges);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
     jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     if ((const_flags & 1) != 0) {
@@ -495,13 +553,74 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
     jl_atomic_store_relaxed(&codeinst->precompile, 0);
     jl_atomic_store_relaxed(&codeinst->next, NULL);
-    codeinst->ipo_purity_bits = ipo_effects;
-    jl_atomic_store_relaxed(&codeinst->purity_bits, effects);
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
     codeinst->analysis_results = analysis_results;
     codeinst->relocatability = relocatability;
     return codeinst;
 }
 
+JL_DLLEXPORT void jl_update_codeinst(
+        jl_code_instance_t *codeinst, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        uint8_t relocatability, jl_debuginfo_t *edges /* , int absolute_max*/)
+{
+    codeinst->relocatability = relocatability;
+    codeinst->analysis_results = analysis_results;
+    jl_gc_wb(codeinst, analysis_results);
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    jl_atomic_store_relaxed(&codeinst->debuginfo, edges);
+    jl_gc_wb(codeinst, edges);
+    if ((const_flags & 1) != 0) {
+        assert(codeinst->rettype_const);
+        jl_atomic_store_release(&codeinst->invoke, jl_fptr_const_return);
+    }
+    jl_atomic_store_release(&codeinst->inferred, inferred);
+    jl_gc_wb(codeinst, inferred);
+    jl_atomic_store_relaxed(&codeinst->min_world, min_world); // XXX: these should be unchanged?
+    jl_atomic_store_relaxed(&codeinst->max_world, max_world); // since the edges shouldn't change after jl_fill_codeinst
+}
+
+JL_DLLEXPORT void jl_fill_codeinst(
+        jl_code_instance_t *codeinst,
+        jl_value_t *rettype, jl_value_t *exctype,
+        jl_value_t *inferred_const,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *edges /* , int absolute_max*/)
+{
+    assert(min_world <= max_world && "attempting to set invalid world constraints");
+    codeinst->rettype = rettype;
+    jl_gc_wb(codeinst, rettype);
+    codeinst->exctype = exctype;
+    jl_gc_wb(codeinst, exctype);
+    if ((const_flags & 2) != 0) {
+        codeinst->rettype_const = inferred_const;
+        jl_gc_wb(codeinst, inferred_const);
+    }
+    jl_atomic_store_relaxed(&codeinst->debuginfo, (jl_value_t*)edges == jl_nothing ? NULL : edges);
+    jl_gc_wb(codeinst, edges);
+    if ((const_flags & 1) != 0) {
+        // TODO: may want to follow ordering restrictions here (see jitlayers.cpp)
+        assert(const_flags & 2);
+        jl_atomic_store_release(&codeinst->invoke, jl_fptr_const_return);
+    }
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    codeinst->analysis_results = analysis_results;
+    assert(jl_atomic_load_relaxed(&codeinst->min_world) == 1);
+    assert(jl_atomic_load_relaxed(&codeinst->max_world) == 0);
+    jl_atomic_store_release(&codeinst->inferred, jl_nothing);
+    jl_atomic_store_release(&codeinst->min_world, min_world);
+    jl_atomic_store_release(&codeinst->max_world, max_world);
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner)
+{
+    jl_code_instance_t *codeinst = jl_new_codeinst(mi, owner, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, 0, NULL);
+    jl_atomic_store_relaxed(&codeinst->min_world, 1); // make temporarily invalid before returning, so that jl_fill_codeinst is valid later
+    return codeinst;
+}
+
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED)
 {
@@ -520,6 +639,29 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
     return;
 }
 
+JL_DLLEXPORT int jl_mi_try_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                   jl_code_instance_t *expected_ci,
+                                   jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED)
+{
+    JL_GC_PUSH1(&ci);
+    if (jl_is_method(mi->def.method))
+        JL_LOCK(&mi->def.method->writelock);
+    jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
+    int ret = 0;
+    if (oldci == expected_ci) {
+        jl_atomic_store_relaxed(&ci->next, oldci);
+        if (oldci)
+            jl_gc_wb(ci, oldci);
+        jl_atomic_store_release(&mi->cache, ci);
+        jl_gc_wb(mi, ci);
+        ret = 1;
+    }
+    if (jl_is_method(mi->def.method))
+        JL_UNLOCK(&mi->def.method->writelock);
+    JL_GC_POP();
+    return ret;
+}
+
 static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
 {
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
@@ -558,40 +700,38 @@ int foreach_mtable_in_module(
         if ((void*)b == jl_nothing)
             break;
         jl_sym_t *name = b->globalref->name;
-        if (jl_atomic_load_relaxed(&b->owner) == b && b->constp) {
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                jl_value_t *uw = jl_unwrap_unionall(v);
-                if (jl_is_datatype(uw)) {
-                    jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
-                    if (tn->module == m && tn->name == name && tn->wrapper == v) {
-                        // this is the original/primary binding for the type (name/wrapper)
-                        jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
-                            assert(mt->module == m);
-                            if (!visit(mt, env))
-                                return 0;
-                        }
-                    }
-                }
-                else if (jl_is_module(v)) {
-                    jl_module_t *child = (jl_module_t*)v;
-                    if (child != m && child->parent == m && child->name == name) {
-                        // this is the original/primary binding for the submodule
-                        if (!foreach_mtable_in_module(child, visit, env))
-                            return 0;
-                    }
-                }
-                else if (jl_is_mtable(v)) {
-                    jl_methtable_t *mt = (jl_methtable_t*)v;
-                    if (mt->module == m && mt->name == name) {
-                        // this is probably an external method table here, so let's
-                        // assume so as there is no way to precisely distinguish them
+        jl_value_t *v = jl_get_binding_value_if_const(b);
+        if (v) {
+            jl_value_t *uw = jl_unwrap_unionall(v);
+            if (jl_is_datatype(uw)) {
+                jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
+                if (tn->module == m && tn->name == name && tn->wrapper == v) {
+                    // this is the original/primary binding for the type (name/wrapper)
+                    jl_methtable_t *mt = tn->mt;
+                    if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
+                        assert(mt->module == m);
                         if (!visit(mt, env))
                             return 0;
                     }
                 }
             }
+            else if (jl_is_module(v)) {
+                jl_module_t *child = (jl_module_t*)v;
+                if (child != m && child->parent == m && child->name == name) {
+                    // this is the original/primary binding for the submodule
+                    if (!foreach_mtable_in_module(child, visit, env))
+                        return 0;
+                }
+            }
+            else if (jl_is_mtable(v)) {
+                jl_methtable_t *mt = (jl_methtable_t*)v;
+                if (mt->module == m && mt->name == name) {
+                    // this is probably an external method table here, so let's
+                    // assume so as there is no way to precisely distinguish them
+                    if (!visit(mt, env))
+                        return 0;
+                }
+            }
         }
         table = jl_atomic_load_relaxed(&m->bindings);
     }
@@ -660,7 +800,7 @@ JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
         for (i = 0, l = jl_array_nrows(unspec); i < l; i++) {
             jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(unspec, i);
             if (jl_rettype_inferred_native(mi, world, world) == jl_nothing)
-                jl_type_infer(mi, world, 1, SOURCE_MODE_NOT_REQUIRED);
+                jl_type_infer(mi, world, SOURCE_MODE_NOT_REQUIRED);
         }
         JL_GC_POP();
     }
@@ -728,7 +868,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams)
                 vm = T_has_tv ? jl_type_unionall(v, T) : T;
                 if (N_has_tv)
                     N = NULL;
-                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1); // this cannot throw for these inputs
+                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1, 0); // this cannot throw for these inputs
             }
             sp++;
             decl = ((jl_unionall_t*)decl)->body;
@@ -977,7 +1117,7 @@ static void jl_compilation_sig(
             // avoid Vararg{Type{Type{...}}}
             if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i)))
                 type_i = (jl_value_t*)jl_type_type;
-            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1); // this cannot throw for these inputs
+            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1, 0); // this cannot throw for these inputs
         }
         else {
             type_i = inst_varargp_in_env(decl, sparams);
@@ -1239,7 +1379,9 @@ static jl_method_instance_t *cache_method(
                 return entry->func.linfo;
         }
         struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
+        jl_typemap_t *cacheentry = jl_atomic_load_relaxed(cache);
+        assert(cacheentry != NULL);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(cacheentry, &search, offs, /*subtype*/1);
         if (entry && entry->func.value)
             return entry->func.linfo;
     }
@@ -1538,14 +1680,6 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m)
     }
 }
 
-static int is_anonfn_typename(char *name)
-{
-    if (name[0] != '#' || name[1] == '#')
-        return 0;
-    char *other = strrchr(name, '#');
-    return other > &name[1] && other[1] > '0' && other[1] <= '9';
-}
-
 static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue)
 {
     // method overwritten
@@ -2201,7 +2335,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     JL_GC_POP();
 }
 
-static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, size_t world)
+static void JL_NORETURN jl_method_error_bare(jl_value_t *f, jl_value_t *args, size_t world)
 {
     if (jl_methoderror_type) {
         jl_value_t *e = jl_new_struct_uninit(jl_methoderror_type);
@@ -2226,7 +2360,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
     // not reached
 }
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world)
+void JL_NORETURN jl_method_error(jl_value_t *f, jl_value_t **args, size_t na, size_t world)
 {
     jl_value_t *argtup = jl_f_tuple(NULL, args, na - 1);
     JL_GC_PUSH1(&argtup);
@@ -2297,16 +2431,6 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *
     return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
-jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT)
-{
-    jl_method_t *def = method->def.method;
-    jl_method_instance_t *mi = jl_get_unspecialized(def);
-    if (mi == NULL) {
-        return method;
-    }
-    return mi;
-}
-
 jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
 {
     // one unspecialized version of a function can be shared among all cached specializations
@@ -2390,7 +2514,7 @@ jl_code_instance_t *jl_method_inferred_with_abi(jl_method_instance_t *mi JL_PROP
 
 jl_mutex_t precomp_statement_out_lock;
 
-static void record_precompile_statement(jl_method_instance_t *mi)
+static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time)
 {
     static ios_t f_precompile;
     static JL_STREAM* s_precompile = NULL;
@@ -2399,6 +2523,8 @@ static void record_precompile_statement(jl_method_instance_t *mi)
         return;
     if (!jl_is_method(def))
         return;
+    if (def->is_for_opaque_closure)
+        return; // OpaqueClosure methods cannot be looked up by their types, so are incompatible with `precompile(...)`
 
     JL_LOCK(&precomp_statement_out_lock);
     if (s_precompile == NULL) {
@@ -2413,6 +2539,8 @@ static void record_precompile_statement(jl_method_instance_t *mi)
         }
     }
     if (!jl_has_free_typevars(mi->specTypes)) {
+        if (jl_options.trace_compile_timing)
+            jl_printf(s_precompile, "#= %6.1f ms =# ", compilation_time / 1e6);
         jl_printf(s_precompile, "precompile(");
         jl_static_show(s_precompile, mi->specTypes);
         jl_printf(s_precompile, ")\n");
@@ -2422,6 +2550,45 @@ static void record_precompile_statement(jl_method_instance_t *mi)
     JL_UNLOCK(&precomp_statement_out_lock);
 }
 
+// If waitcompile is 0, this will return NULL if compiling is on-going in the JIT. This is
+// useful for the JIT itself, since it just doesn't cause redundant work or missed updates,
+// but merely causes it to look into the current JIT worklist.
+void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile)
+{
+    uint8_t flags = jl_atomic_load_acquire(&ci->specsigflags); // happens-before for subsequent read of fptr
+    while (1) {
+        jl_callptr_t initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
+        while (initial_invoke == jl_fptr_wait_for_compiled_addr) {
+            if (!waitcompile) {
+                *invoke = NULL;
+                *specptr = NULL;
+                *specsigflags = 0b00;
+                return;
+            }
+            jl_cpu_pause();
+            initial_invoke = jl_atomic_load_acquire(&ci->invoke);
+        }
+        void *fptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+        if (initial_invoke == NULL || fptr == NULL) {
+            *invoke = initial_invoke;
+            *specptr = NULL;
+            *specsigflags = 0b00;
+            return;
+        }
+        while (!(flags & 0b10)) {
+            jl_cpu_pause();
+            flags = jl_atomic_load_acquire(&ci->specsigflags);
+        }
+        jl_callptr_t final_invoke = jl_atomic_load_relaxed(&ci->invoke);
+        if (final_invoke == initial_invoke) {
+            *invoke = final_invoke;
+            *specptr = fptr;
+            *specsigflags = flags;
+            return;
+        }
+    }
+}
+
 jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT);
 
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
@@ -2438,18 +2605,16 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_code_instance_t *codeinst2 = jl_compile_method_internal(mi2, world);
         jl_code_instance_t *codeinst = jl_get_method_inferred(
                 mi, codeinst2->rettype,
-                jl_atomic_load_relaxed(&codeinst2->min_world), jl_atomic_load_relaxed(&codeinst2->max_world));
+                jl_atomic_load_relaxed(&codeinst2->min_world), jl_atomic_load_relaxed(&codeinst2->max_world),
+                jl_atomic_load_relaxed(&codeinst2->debuginfo));
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
             codeinst->rettype_const = codeinst2->rettype_const;
-            uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
-            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
-            void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
+            jl_gc_wb(codeinst, codeinst->rettype_const);
+            uint8_t specsigflags;
+            jl_callptr_t invoke;
+            void *fptr;
+            jl_read_codeinst_invoke(codeinst2, &specsigflags, &invoke, &fptr, 1);
             if (fptr != NULL) {
-                while (!(specsigflags & 0b10)) {
-                    jl_cpu_pause();
-                    specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
-                }
-                invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
                 void *prev_fptr = NULL;
                 // see jitlayers.cpp for the ordering restrictions here
                 if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
@@ -2457,14 +2622,16 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                     jl_atomic_store_release(&codeinst->invoke, invoke);
                     // unspec is probably not specsig, but might be using specptr
                     jl_atomic_store_release(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
-                } else {
+                }
+                else {
                     // someone else already compiled it
                     while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
                         jl_cpu_pause();
                     }
                     // codeinst is now set up fully, safe to return
                 }
-            } else {
+            }
+            else {
                 jl_callptr_t prev = NULL;
                 jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
             }
@@ -2486,7 +2653,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
     // if compilation is disabled or source is unavailable, try calling unspecialized version
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
         compile_option == JL_OPTIONS_COMPILE_MIN ||
-        def->source == jl_nothing) {
+        (jl_is_method(def) && def->source == jl_nothing)) {
         // copy fptr from the template method definition
         if (jl_is_method(def)) {
             jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
@@ -2496,20 +2663,18 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
                     jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
                         (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
-                        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-                    void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
-                    if (unspec_fptr) {
-                        // wait until invoke and specsigflags are properly set
-                        while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
-                            jl_cpu_pause();
-                        }
-                        unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
-                    }
-                    jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
+                        0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL);
                     codeinst->rettype_const = unspec->rettype_const;
-                    jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
+                    uint8_t specsigflags;
+                    jl_callptr_t invoke;
+                    void *fptr;
+                    jl_read_codeinst_invoke(unspec, &specsigflags, &invoke, &fptr, 1);
+                    jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+                    jl_atomic_store_relaxed(&codeinst->invoke, invoke);
+                    // unspec is probably not specsig, but might be using specptr
+                    jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
                     jl_mi_cache_insert(mi, codeinst);
-                    record_precompile_statement(mi);
+                    record_precompile_statement(mi, 0);
                     return codeinst;
                 }
             }
@@ -2523,10 +2688,10 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         if (!jl_code_requires_compiler(src, 0)) {
             jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
                 (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
-                0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+                0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL);
             jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
-            record_precompile_statement(mi);
+            record_precompile_statement(mi, 0);
             return codeinst;
         }
         if (compile_option == JL_OPTIONS_COMPILE_OFF) {
@@ -2546,13 +2711,15 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
     int is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
 
     // This codeinst hasn't been previously inferred do that now
+    // jl_type_infer will internally do a cache lookup and jl_engine_reserve call
+    // to synchronize this across threads
     if (!codeinst) {
         // Don't bother inferring toplevel thunks or macros - the performance cost of inference is likely
         // to significantly exceed the actual runtime.
         int should_skip_inference = !jl_is_method(mi->def.method) || jl_symbol_name(mi->def.method->name)[0] == '@';
 
         if (!should_skip_inference) {
-            codeinst = jl_type_infer(mi, world, 0, SOURCE_MODE_ABI);
+            codeinst = jl_type_infer(mi, world, SOURCE_MODE_ABI);
         }
     }
 
@@ -2564,24 +2731,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         }
 
         JL_GC_PUSH1(&codeinst);
-        jl_compile_codeinst(codeinst);
+        double compile_time = jl_hrtime();
+        int did_compile = jl_compile_codeinst(codeinst);
+        compile_time = jl_hrtime() - compile_time;
 
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
             // Something went wrong. Bail to the fallback path.
             codeinst = NULL;
-        } else {
-            record_precompile_statement(mi);
+        }
+        else if (did_compile && codeinst->owner == jl_nothing) {
+            record_precompile_statement(mi, compile_time);
         }
         JL_GC_POP();
     }
     if (!codeinst) {
-        jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
-        jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
+        jl_method_instance_t *unspec = jl_get_unspecialized(def);
+        if (unspec == NULL)
+            unspec = mi;
+        jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0, NULL);
         // ask codegen to make the fptr for unspec
         jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
         if (ucache_invoke == NULL) {
-            if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
-                                              jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
+            if ((!jl_is_method(def) || def->source == jl_nothing) &&
+                !jl_cached_uninferred(jl_atomic_load_relaxed(&ucache->def->cache), world)) {
                 jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
             }
             jl_generate_fptr_for_unspecialized(ucache);
@@ -2596,20 +2768,16 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         }
         codeinst = jl_new_codeinst(mi, jl_nothing,
             (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
-            0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-        void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
-        if (unspec_fptr) {
-            // wait until invoke and specsigflags are properly set
-            while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
-                jl_cpu_pause();
-            }
-            ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
-        }
-        // unspec is always not specsig, but might use specptr
-        jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
-        jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
+            0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL);
         codeinst->rettype_const = ucache->rettype_const;
-        jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
+        uint8_t specsigflags;
+        jl_callptr_t invoke;
+        void *fptr;
+        jl_read_codeinst_invoke(ucache, &specsigflags, &invoke, &fptr, 1);
+        // unspec is always not specsig, but might use specptr
+        jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+        jl_atomic_store_relaxed(&codeinst->invoke, invoke);
+        jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
         jl_mi_cache_insert(mi, codeinst);
     }
     jl_atomic_store_relaxed(&codeinst->precompile, 1);
@@ -2638,6 +2806,23 @@ jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_
     return invoke(f, args, nargs, sparams);
 }
 
+jl_value_t *jl_fptr_wait_for_compiled(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+{
+    // This relies on the invariant that the JIT will set the invoke ptr immediately upon adding `m` to itself.
+    size_t nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    // This should only be possible if there's more than one thread. If not, either there's a bug somewhere
+    // that resulted in this not getting cleared, or we're about to deadlock. Either way, that's bad.
+    if (nthreads == 1) {
+        jl_error("Internal error: Reached jl_fptr_wait_for_compiled in single-threaded execution.");
+    }
+    jl_callptr_t invoke = jl_atomic_load_acquire(&m->invoke);
+    while (invoke == &jl_fptr_wait_for_compiled) {
+        jl_cpu_pause();
+        invoke = jl_atomic_load_acquire(&m->invoke);
+    }
+    return invoke(f, args, nargs, m);
+}
+
 JL_DLLEXPORT const jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
 
 JL_DLLEXPORT const jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
@@ -2646,6 +2831,8 @@ JL_DLLEXPORT const jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
 
 JL_DLLEXPORT const jl_callptr_t jl_f_opaque_closure_call_addr = (jl_callptr_t)&jl_f_opaque_closure_call;
 
+JL_DLLEXPORT const jl_callptr_t jl_fptr_wait_for_compiled_addr = &jl_fptr_wait_for_compiled;
+
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
 {
@@ -2720,6 +2907,7 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor
     jl_method_instance_t *mi = NULL;
     if (jl_is_datatype(ti)) {
         jl_methtable_t *mt = jl_method_get_table(m);
+        assert(mt != NULL);
         if ((jl_value_t*)mt != jl_nothing) {
             // get the specialization, possibly also caching it
             if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
@@ -2845,7 +3033,7 @@ static void _generate_from_hint(jl_method_instance_t *mi, size_t world)
 {
     jl_value_t *codeinst = jl_rettype_inferred_native(mi, world, world);
     if (codeinst == jl_nothing) {
-        (void)jl_type_infer(mi, world, 1, SOURCE_MODE_NOT_REQUIRED);
+        (void)jl_type_infer(mi, world, SOURCE_MODE_NOT_REQUIRED);
         codeinst = jl_rettype_inferred_native(mi, world, world);
     }
     if (codeinst != jl_nothing) {
@@ -2886,10 +3074,10 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
             JL_GC_POP();
             jl_atomic_store_relaxed(&mi2->precompiled, 1);
             if (jl_rettype_inferred_native(mi2, world, world) == jl_nothing)
-                (void)jl_type_infer(mi2, world, 1, SOURCE_MODE_NOT_REQUIRED);
+                (void)jl_type_infer(mi2, world, SOURCE_MODE_NOT_REQUIRED);
             if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
                 if (jl_rettype_inferred_native(mi2, tworld, tworld) == jl_nothing)
-                    (void)jl_type_infer(mi2, tworld, 1, SOURCE_MODE_NOT_REQUIRED);
+                    (void)jl_type_infer(mi2, tworld, SOURCE_MODE_NOT_REQUIRED);
             }
         }
     }
@@ -3479,7 +3667,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
         int msp2 = !msp && jl_method_morespecific(m2, m);
         if (!msp) {
             if (subt || !include_ambiguous || (lim != -1 && msp2)) {
-                if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                if (subt2 || ((lim != -1 || (!include_ambiguous && !msp2)) && jl_subtype((jl_value_t*)ti, m2->sig))) {
                     // this may be filtered out as fully intersected, if applicable later
                     mayexclude = 1;
                 }
@@ -4088,16 +4276,6 @@ JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start, int is_recompile)
     }
 }
 
-JL_DLLEXPORT void jl_typeinf_lock_begin(void)
-{
-    JL_LOCK(&jl_codegen_lock);
-}
-
-JL_DLLEXPORT void jl_typeinf_lock_end(void)
-{
-    JL_UNLOCK(&jl_codegen_lock);
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/init.c b/src/init.c
index 801f12ec53930..86c0877b14289 100644
--- a/src/init.c
+++ b/src/init.c
@@ -64,39 +64,29 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
     // threads since it seems to return bogus values for master thread on Linux
     // and possibly OSX.
     if (!ismaster) {
-#  if defined(_OS_LINUX_)
+#  if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
         pthread_attr_t attr;
+#if defined(_OS_FREEBSD_)
+        pthread_attr_init(&attr);
+        pthread_attr_get_np(pthread_self(), &attr);
+#else
         pthread_getattr_np(pthread_self(), &attr);
+#endif
         void *stackaddr;
         size_t stacksize;
         pthread_attr_getstack(&attr, &stackaddr, &stacksize);
         pthread_attr_destroy(&attr);
-        *stack_lo = (void*)stackaddr;
-#pragma GCC diagnostic push
-#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
-#pragma GCC diagnostic ignored "-Wdangling-pointer"
-#endif
-        *stack_hi = (void*)__builtin_frame_address(0);
-#pragma GCC diagnostic pop
+        *stack_lo = stackaddr;
+        *stack_hi = (char*)stackaddr + stacksize;
         return;
 #  elif defined(_OS_DARWIN_)
         extern void *pthread_get_stackaddr_np(pthread_t thread);
         extern size_t pthread_get_stacksize_np(pthread_t thread);
         pthread_t thread = pthread_self();
         void *stackaddr = pthread_get_stackaddr_np(thread);
-        *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)__builtin_frame_address(0);
-        return;
-#  elif defined(_OS_FREEBSD_)
-        pthread_attr_t attr;
-        pthread_attr_init(&attr);
-        pthread_attr_get_np(pthread_self(), &attr);
-        void *stackaddr;
-        size_t stacksize;
-        pthread_attr_getstack(&attr, &stackaddr, &stacksize);
-        pthread_attr_destroy(&attr);
-        *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)__builtin_frame_address(0);
+        size_t stacksize = pthread_get_stacksize_np(thread);
+        *stack_lo = (char*)stackaddr - stacksize;
+        *stack_hi = stackaddr;
         return;
 #  else
 #      warning "Getting precise stack size for thread is not supported."
@@ -273,7 +263,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\natexit hook threw an error: ");
-                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                 jlbacktrace(); // written to STDERR_FILENO
             }
@@ -317,7 +307,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
                     assert(item);
                     uv_unref(item->h);
                     jl_printf((JL_STREAM*)STDERR_FILENO, "error during exit cleanup: close: ");
-                    jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                    jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                     jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                     jlbacktrace(); // written to STDERR_FILENO
                     item = next_shutdown_queue_item(item);
@@ -338,15 +328,15 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
                          // we would like to guarantee this, but cannot currently, so there is still a small race window
                          // that needs to be fixed in libuv
     }
-    if (ct)
-        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
     if (loop != NULL) {
         // TODO: consider uv_loop_close(loop) here, before shutdown?
         uv_library_shutdown();
         // no JL_UV_UNLOCK(), since it is now torn down
     }
-
-    // TODO: Destroy threads?
+    if (ct)
+        jl_safepoint_suspend_all_threads(ct); // Destroy other threads, so that they don't segfault
+    if (ct)
+        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
 
     jl_destroy_timing(); // cleans up the current timing_stack for noreturn
 #ifdef USE_TIMING_COUNTS
@@ -372,7 +362,7 @@ JL_DLLEXPORT void jl_postoutput_hook(void)
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\npostoutput hook threw an error: ");
-                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                 jlbacktrace(); // written to STDERR_FILENO
             }
@@ -737,7 +727,6 @@ static void init_global_mutexes(void) {
     JL_MUTEX_INIT(&precomp_statement_out_lock, "precomp_statement_out_lock");
     JL_MUTEX_INIT(&newly_inferred_mutex, "newly_inferred_mutex");
     JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock");
-    JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock");
     JL_MUTEX_INIT(&typecache_lock, "typecache_lock");
     JL_MUTEX_INIT(&profile_show_peek_cond_lock, "profile_show_peek_cond_lock");
 }
@@ -751,6 +740,9 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     // Make sure we finalize the tls callback before starting any threads.
     (void)jl_get_pgcstack();
 
+    // initialize symbol-table lock
+    uv_mutex_init(&symtab_lock);
+
     // initialize backtraces
     jl_init_profile_lock();
 #ifdef _OS_WINDOWS_
@@ -826,6 +818,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 
     arraylist_new(&jl_linkage_blobs, 0);
     arraylist_new(&jl_image_relocs, 0);
+    arraylist_new(&jl_top_mods, 0);
     arraylist_new(&eytzinger_image_tree, 0);
     arraylist_new(&eytzinger_idxs, 0);
     arraylist_push(&eytzinger_idxs, (void*)0);
@@ -883,12 +876,14 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_n_markthreads = 0;
         jl_n_sweepthreads = 0;
         jl_n_gcthreads = 0;
-        jl_n_threads_per_pool[0] = 1;
-        jl_n_threads_per_pool[1] = 0;
+        jl_n_threads_per_pool[0] = 0; // Interactive threadpool
+        jl_n_threads_per_pool[1] = 1; // Default threadpool
     } else {
         post_image_load_hooks();
     }
     jl_start_threads();
+    jl_start_gc_threads();
+    uv_barrier_wait(&thread_init_done);
 
     jl_gc_enable(1);
 
diff --git a/src/interpreter.c b/src/interpreter.c
index 5102d1417c939..f9d981687c631 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -17,6 +17,7 @@ extern "C" {
 typedef struct {
     jl_code_info_t *src; // contains the names and number of slots
     jl_method_instance_t *mi; // MethodInstance we're executing, or NULL if toplevel
+    jl_code_instance_t *ci; // CodeInstance we're executing (for generated functions)
     jl_module_t *module; // context for globals
     jl_value_t **locals; // slots for holding local slots and ssavalues
     jl_svec_t *sparam_vals; // method static parameters, if eval-ing a method body
@@ -93,9 +94,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
             jl_error("method: invalid declaration");
         }
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
-        _Atomic(jl_value_t*) *bp = &b->value;
-        jl_value_t *gf = jl_generic_function_def(fname, modu, bp, b);
-        return gf;
+        return jl_declare_const_gf(b, modu, fname);
     }
 
     jl_value_t *atypes = NULL, *meth = NULL, *fname = NULL;
@@ -135,10 +134,10 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state
     JL_GC_PUSHARGS(argv, nargs - 1);
     size_t i;
     for (i = 1; i < nargs; i++)
-        argv[i] = eval_value(args[i], s);
+        argv[i-1] = eval_value(args[i], s);
     jl_method_instance_t *meth = (jl_method_instance_t*)args[0];
     assert(jl_is_method_instance(meth));
-    jl_value_t *result = jl_invoke(argv[1], &argv[2], nargs - 2, meth);
+    jl_value_t *result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, meth);
     JL_GC_POP();
     return result;
 }
@@ -231,6 +230,11 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
+        int allow_import = 1;
+        if (nargs == 2) {
+            assert(jl_is_bool(args[1]) && "malformed IR");
+            allow_import = args[1] == jl_true;
+        }
         if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
             ssize_t n = jl_slot_number(sym);
             if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
@@ -238,10 +242,10 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             defined = s->locals[n - 1] != NULL;
         }
         else if (jl_is_globalref(sym)) {
-            defined = jl_boundp(jl_globalref_mod(sym), jl_globalref_name(sym));
+            defined = jl_boundp(jl_globalref_mod(sym), jl_globalref_name(sym), allow_import);
         }
         else if (jl_is_symbol(sym)) {
-            defined = jl_boundp(s->module, (jl_sym_t*)sym);
+            defined = jl_boundp(s->module, (jl_sym_t*)sym, allow_import);
         }
         else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == jl_static_parameter_sym) {
             ssize_t n = jl_unbox_long(jl_exprarg(sym, 0));
@@ -297,7 +301,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             argv[i] = eval_value(args[i], s);
         JL_NARGSV(new_opaque_closure, 4);
         jl_value_t *ret = (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)argv[0], argv[1], argv[2],
-            argv[3], argv+4, nargs-4, 1);
+            argv[4], argv+5, nargs-5, 1);
         JL_GC_POP();
         return ret;
     }
@@ -317,7 +321,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         return jl_copy_ast(eval_value(args[0], s));
     }
     else if (head == jl_exc_sym) {
-        return jl_current_exception();
+        return jl_current_exception(jl_current_task);
     }
     else if (head == jl_boundscheck_sym) {
         return jl_true;
@@ -490,7 +494,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             s->locals[jl_source_nslots(s->src) + id] = val;
         }
         else if (jl_is_enternode(stmt)) {
-            jl_enter_handler(&__eh);
+            jl_enter_handler(ct, &__eh);
             // This is a bit tricky, but supports the implementation of PhiC nodes.
             // They are conceptually slots, but the slot to store to doesn't get explicitly
             // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
@@ -521,13 +525,14 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                 }
                 // store current top of exception stack for restore in pop_exception.
             }
-            s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state());
+            s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state(ct));
             if (jl_enternode_scope(stmt)) {
                 jl_value_t *old_scope = ct->scope;
                 JL_GC_PUSH1(&old_scope);
                 jl_value_t *new_scope = eval_value(jl_enternode_scope(stmt), s);
                 ct->scope = new_scope;
                 if (!jl_setjmp(__eh.eh_ctx, 1)) {
+                    ct->eh = &__eh;
                     eval_body(stmts, s, next_ip, toplevel);
                     jl_unreachable();
                 }
@@ -536,17 +541,20 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             }
             else {
                 if (!jl_setjmp(__eh.eh_ctx, 1)) {
+                    ct->eh = &__eh;
                     eval_body(stmts, s, next_ip, toplevel);
                     jl_unreachable();
                 }
             }
-            jl_eh_restore_state(&__eh);
+
             if (s->continue_at) { // means we reached a :leave expression
+                jl_eh_restore_state_noexcept(ct, &__eh);
                 ip = s->continue_at;
                 s->continue_at = 0;
                 continue;
             }
             else { // a real exception
+                jl_eh_restore_state(ct, &__eh);
                 ip = catch_ip;
                 assert(jl_enternode_catch_dest(stmt) != 0);
                 continue;
@@ -566,9 +574,13 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                 else {
                     jl_module_t *modu;
                     jl_sym_t *sym;
+                    // Plain assignment is allowed to create bindings at
+                    // toplevel and only for the current module
+                    int alloc = toplevel;
                     if (jl_is_globalref(lhs)) {
                         modu = jl_globalref_mod(lhs);
                         sym = jl_globalref_name(lhs);
+                        alloc &= modu == s->module;
                     }
                     else {
                         assert(jl_is_symbol(lhs));
@@ -576,7 +588,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                         sym = (jl_sym_t*)lhs;
                     }
                     JL_GC_PUSH1(&rhs);
-                    jl_binding_t *b = jl_get_binding_wr(modu, sym);
+                    jl_binding_t *b = jl_get_binding_wr(modu, sym, alloc);
                     jl_checked_assignment(b, modu, sym, rhs);
                     JL_GC_POP();
                 }
@@ -609,7 +621,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             }
             else if (head == jl_pop_exception_sym) {
                 size_t prev_state = jl_unbox_ulong(eval_value(jl_exprarg(stmt, 0), s));
-                jl_restore_excstack(prev_state);
+                jl_restore_excstack(ct, prev_state);
             }
             else if (toplevel) {
                 if (head == jl_method_sym && jl_expr_nargs(stmt) > 1) {
@@ -619,6 +631,18 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     jl_value_t *res = jl_toplevel_eval(s->module, stmt);
                     s->locals[jl_source_nslots(s->src) + s->ip] = res;
                 }
+                else if (head == jl_globaldecl_sym) {
+                    jl_value_t *val = eval_value(jl_exprarg(stmt, 1), s);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = val; // temporarily root
+                    jl_declare_global(s->module, jl_exprarg(stmt, 0), val);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = jl_nothing;
+                }
+                else if (head == jl_const_sym) {
+                    jl_value_t *val = jl_expr_nargs(stmt) == 1 ? NULL : eval_value(jl_exprarg(stmt, 1), s);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = val; // temporarily root
+                    jl_eval_const_decl(s->module, jl_exprarg(stmt, 0), val);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = jl_nothing;
+                }
                 else if (jl_is_toplevel_only_expr(stmt)) {
                     jl_toplevel_eval(s->module, stmt);
                 }
@@ -681,31 +705,55 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 
 // preparing method IR for interpreter
 
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
+jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *mi, size_t world)
 {
-    jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&mi->uninferred);
+    jl_value_t *ret = NULL;
+    jl_code_info_t *src = NULL;
     if (jl_is_method(mi->def.value)) {
-        if (!src || (jl_value_t*)src == jl_nothing) {
-            if (mi->def.method->source) {
-                src = (jl_code_info_t*)mi->def.method->source;
+        if (mi->def.method->source) {
+            jl_method_t *m = mi->def.method;
+            src = (jl_code_info_t*)m->source;
+            if (!jl_is_code_info(src)) {
+                src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
+                // Replace the method source by the uncompressed version,
+                // under the assumption that the interpreter may need to
+                // access it frequently. TODO: Have some sort of usage-based
+                // cache here.
+                m->source = (jl_value_t*)src;
+                jl_gc_wb(m, src);
             }
-            else {
+            ret = (jl_value_t*)src;
+        }
+        else {
+            jl_code_instance_t *cache = jl_atomic_load_relaxed(&mi->cache);
+            jl_code_instance_t *uninferred = jl_cached_uninferred(cache, world);
+            if (!uninferred) {
                 assert(mi->def.method->generator);
-                src = jl_code_for_staged(mi, world);
+                src = jl_code_for_staged(mi, world, &uninferred);
             }
+            ret = (jl_value_t*)uninferred;
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
         }
-        if (src && (jl_value_t*)src != jl_nothing) {
-            JL_GC_PUSH1(&src);
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-            jl_atomic_store_release(&mi->uninferred, (jl_value_t*)src);
-            jl_gc_wb(mi, src);
-            JL_GC_POP();
+    }
+    else {
+        jl_code_instance_t *uninferred = jl_cached_uninferred(jl_atomic_load_relaxed(&mi->cache), world);
+        ret = (jl_value_t*)uninferred;
+        if (ret) {
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
         }
     }
     if (!src || !jl_is_code_info(src)) {
         jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
     }
-    return src;
+    return ret;
+}
+
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
+{
+    jl_value_t *code_or_ci = jl_code_or_ci_for_interpreter(mi, world);
+    if (jl_is_code_instance(code_or_ci))
+        return (jl_code_info_t*)jl_atomic_load_relaxed(&((jl_code_instance_t*)code_or_ci)->inferred);
+    return (jl_code_info_t*)code_or_ci;
 }
 
 // interpreter entry points
@@ -716,7 +764,15 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     jl_method_instance_t *mi = codeinst->def;
     jl_task_t *ct = jl_current_task;
     size_t world = ct->world_age;
-    jl_code_info_t *src = jl_code_for_interpreter(mi, world);
+    jl_code_info_t *src = NULL;
+    jl_value_t *code = jl_code_or_ci_for_interpreter(mi, world);
+    jl_code_instance_t *ci = NULL;
+    if (jl_is_code_instance(code)) {
+        ci = (jl_code_instance_t*)code;
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&ci->inferred);
+    } else {
+        src = (jl_code_info_t*)code;
+    }
     jl_array_t *stmts = src->code;
     assert(jl_typetagis(stmts, jl_array_any_type));
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2;
@@ -731,8 +787,8 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     }
     else {
         s->module = mi->def.method->module;
-        size_t defargs = mi->def.method->nargs;
-        int isva = mi->def.method->isva ? 1 : 0;
+        size_t defargs = src->nargs;
+        int isva = src->isva;
         size_t i;
         s->locals[0] = f;
         assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
@@ -747,6 +803,7 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     s->preevaluation = 0;
     s->continue_at = 0;
     s->mi = mi;
+    s->ci = ci;
     JL_GC_ENABLEFRAME(s);
     jl_value_t *r = eval_body(stmts, s, 0, 0);
     JL_GC_POP();
@@ -758,7 +815,25 @@ JL_DLLEXPORT const jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
 {
     jl_method_t *source = oc->source;
-    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
+    jl_code_info_t *code = NULL;
+    if (source->source) {
+        code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
+    }
+    else {
+        // OC constructed from optimized IR. It'll have a single specialization with optimized code
+        // in it that we'll try to interpret.
+        jl_svec_t *specializations = (jl_svec_t*)jl_atomic_load_relaxed(&source->specializations);
+        assert(jl_is_method_instance(specializations));
+        jl_method_instance_t *mi = (jl_method_instance_t *)specializations;
+        jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
+        jl_value_t *src = jl_atomic_load_relaxed(&ci->inferred);
+        if (!src) {
+            // This can happen if somebody did :new_opaque_closure with broken IR. This is definitely bad
+            // and UB, but let's try to be slightly nicer than segfaulting here for people debugging.
+            jl_error("Internal Error: Opaque closure with no source at all");
+        }
+        code = jl_uncompress_ir(source, ci, src);
+    }
     interpreter_state *s;
     unsigned nroots = jl_source_nslots(code) + jl_source_nssavalues(code) + 2;
     jl_task_t *ct = jl_current_task;
@@ -778,6 +853,7 @@ jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **ar
     s->preevaluation = 0;
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     size_t defargs = source->nargs;
     int isva = source->isva;
     assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
@@ -809,6 +885,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
     s->sparam_vals = jl_emptysvec;
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     JL_GC_ENABLEFRAME(s);
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
@@ -833,6 +910,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e
     s->preevaluation = (sparam_vals != NULL);
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     JL_GC_ENABLEFRAME(s);
     jl_value_t *v = eval_value(e, s);
     assert(v);
@@ -852,7 +930,8 @@ JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_entry,
     uintptr_t entry_tags = jl_bt_entry_descriptor(njlvalues, 0, JL_BT_INTERP_FRAME_TAG, s->ip);
     bt_entry[0].uintptr = JL_BT_NON_PTR_ENTRY;
     bt_entry[1].uintptr = entry_tags;
-    bt_entry[2].jlvalue = s->mi  ? (jl_value_t*)s->mi  :
+    bt_entry[2].jlvalue = s->ci  ? (jl_value_t*)s->ci  :
+                          s->mi  ? (jl_value_t*)s->mi  :
                           s->src ? (jl_value_t*)s->src : (jl_value_t*)jl_nothing;
     if (need_module) {
         // If we only have a CodeInfo (s->src), we are in a top level thunk and
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 3a98850ddca68..c747edfeffe5f 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -4,6 +4,10 @@ namespace JL_I {
 #include "intrinsics.h"
 }
 
+#include <array>
+#include <bitset>
+#include <string>
+
 #include "ccall.cpp"
 
 //Mark our stats as being from intrinsics irgen
@@ -17,6 +21,7 @@ STATISTIC(EmittedRuntimeCalls, "Number of runtime intrinsic calls emitted");
 STATISTIC(EmittedIntrinsics, "Number of intrinsic calls emitted");
 STATISTIC(Emitted_pointerref, "Number of pointerref calls emitted");
 STATISTIC(Emitted_pointerset, "Number of pointerset calls emitted");
+STATISTIC(Emitted_pointerarith, "Number of pointer arithmetic calls emitted");
 STATISTIC(Emitted_atomic_fence, "Number of atomic_fence calls emitted");
 STATISTIC(Emitted_atomic_pointerref, "Number of atomic_pointerref calls emitted");
 STATISTIC(Emitted_atomic_pointerop, "Number of atomic_pointerop calls emitted");
@@ -398,26 +403,24 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
         CreateTrap(ctx.builder);
         return UndefValue::get(to);
     }
-    if (frompointer && topointer) {
-        unboxed = emit_bitcast(ctx, unboxed, to);
-    }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
         assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
-        AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        Align align = std::max(DL.getPrefTypeAlign(ty), DL.getPrefTypeAlign(to));
+        AllocaInst *cast = emit_static_alloca(ctx, ty, align);
         setName(ctx.emission_context, cast, "coercion");
-        ctx.builder.CreateStore(unboxed, cast);
-        unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
+        ctx.builder.CreateAlignedStore(unboxed, cast, align);
+        unboxed = ctx.builder.CreateAlignedLoad(to, cast, align);
     }
     else if (frompointer) {
         Type *INTT_to = INTT(to, DL);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
         setName(ctx.emission_context, unboxed, "coercion");
-        if (INTT_to != to)
+        if (INTT_to != to) //TODO when is this true?
             unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     else if (topointer) {
         Type *INTT_to = INTT(to, DL);
-        if (to != INTT_to)
+        if (to != INTT_to) //TODO when is this true?
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
         setName(ctx.emission_context, unboxed, "coercion");
@@ -455,7 +458,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
 
     if (jt == (jl_value_t*)jl_bool_type || to->isIntegerTy(1)) {
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
-        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext()))));
+        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), p));
         setName(ctx.emission_context, unbox_load, p->getName() + ".unbox");
         if (jt == (jl_value_t*)jl_bool_type)
             unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
@@ -489,7 +492,6 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
             return emit_unboxed_coercion(ctx, to, ai.decorateInst(load));
         }
     }
-    p = maybe_bitcast(ctx, p, ptype);
     Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
     setName(ctx.emission_context, load, p->getName() + ".unbox");
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
@@ -497,7 +499,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
 }
 
 // emit code to store a raw value into a destination
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile)
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, Align alignment, bool isVolatile)
 {
     if (x.isghost) {
         // this can happen when a branch yielding a different type ends
@@ -509,10 +511,7 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
     if (!x.ispointer()) { // already unboxed, but sometimes need conversion (e.g. f32 -> i32)
         assert(x.V);
         Value *unboxed = zext_struct(ctx, x.V);
-        Type *dest_ty = unboxed->getType()->getPointerTo();
-        if (dest->getType() != dest_ty)
-            dest = emit_bitcast(ctx, dest, dest_ty);
-        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, Align(alignment));
+        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, alignment);
         store->setVolatile(isVolatile);
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
         ai.decorateInst(store);
@@ -520,7 +519,7 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
     }
 
     Value *src = data_pointer(ctx, x);
-    emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, julia_alignment(x.typ), isVolatile);
+    emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), Align(alignment), Align(julia_alignment(x.typ)), isVolatile);
 }
 
 static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
@@ -612,8 +611,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, v.tbaa);
         vx = ai.decorateInst(ctx.builder.CreateLoad(
             storage_type,
-            emit_bitcast(ctx, data_pointer(ctx, v),
-                storage_type->getPointerTo())));
+            data_pointer(ctx, v)));
         setName(ctx.emission_context, vx, "bitcast");
     }
 
@@ -625,13 +623,25 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
             vx = ctx.builder.CreateZExt(vx, llvmt);
         } else if (vxt->isPointerTy() && !llvmt->isPointerTy()) {
             vx = ctx.builder.CreatePtrToInt(vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // CreatePtrToInt may undo an IntToPtr
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         } else if (!vxt->isPointerTy() && llvmt->isPointerTy()) {
             vx = emit_inttoptr(ctx, vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // emit_inttoptr may undo an PtrToInt
+                setName(ctx.emission_context, vx, "bitcast_coercion");
+        } else if (vxt->isPointerTy() && llvmt->isPointerTy()) {
+            // emit_bitcast preserves the origin address space, which we can't have here
+            vx = ctx.builder.CreateAddrSpaceCast(vx, llvmt);
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // cast may have been folded
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         } else {
             vx = emit_bitcast(ctx, vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // emit_bitcast may undo another bitcast
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         }
     }
 
@@ -683,10 +693,11 @@ static jl_cgval_t generic_cast(
             // understood that everything is implicitly rounded to 23 bits,
             // but if we start looking at more bits we need to actually do the
             // rounding first instead of carrying around incorrect low bits.
-            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+            Align align(julia_alignment((jl_value_t*)jlto));
+            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType(), align);
             setName(ctx.emission_context, jlfloattemp_var, "rounding_slot");
-            ctx.builder.CreateStore(from, jlfloattemp_var);
-            from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+            ctx.builder.CreateAlignedStore(from, jlfloattemp_var, align);
+            from = ctx.builder.CreateAlignedLoad(from->getType(), jlfloattemp_var, align, /*force this to load from the stack*/true);
             setName(ctx.emission_context, from, "rounded");
         }
     }
@@ -741,7 +752,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
-        setName(ctx.emission_context, thePtr, "unbox_any_ptr");
+        if (isa<Instruction>(thePtr) && !thePtr->hasName())
+            setName(ctx.emission_context, thePtr, "unbox_any_ptr");
         LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb));
         setName(ctx.emission_context, load, "any_unbox");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
@@ -756,11 +768,11 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerref_offset");
-        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
-        thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
+        Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
+        thePtr = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, thePtr, "pointerref_src");
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
-        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, sizeof(jl_value_t*), align_nb);
+        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, Align(sizeof(jl_value_t*)), Align(align_nb));
         return mark_julia_type(ctx, strct, true, ety);
     }
     else {
@@ -789,7 +801,7 @@ static jl_cgval_t emit_runtime_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t
 static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = argv[1];
+    jl_cgval_t x = argv[1];
     const jl_cgval_t &i = argv[2];
     const jl_cgval_t &align = argv[3];
 
@@ -812,6 +824,9 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
         return jl_cgval_t();
     }
     emit_typecheck(ctx, x, ety, "pointerset");
+    x = update_julia_type(ctx, x, ety);
+    if (x.typ == jl_bottom_type)
+        return jl_cgval_t();
 
     Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
@@ -830,14 +845,14 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
         ai.decorateInst(store);
     }
     else if (x.ispointer()) {
-        thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerset_offset");
-        auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
+        auto gep = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
-        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, julia_alignment(ety));
+        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, Align(align_nb), Align(julia_alignment(ety)));
     }
     else {
         bool isboxed;
@@ -853,6 +868,34 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
     return e;
 }
 
+// ptr + offset
+// ptr - offset
+static jl_cgval_t emit_pointerarith(jl_codectx_t &ctx, intrinsic f,
+                                    ArrayRef<jl_cgval_t> argv)
+{
+    jl_value_t *ptrtyp = argv[0].typ;
+    jl_value_t *offtyp = argv[1].typ;
+    if (!jl_is_cpointer_type(ptrtyp) || offtyp != (jl_value_t *)jl_ulong_type)
+        return emit_runtime_call(ctx, f, argv, argv.size());
+    assert(f == add_ptr || f == sub_ptr);
+
+    Value *ptr = emit_unbox(ctx, ctx.types().T_ptr, argv[0], ptrtyp);
+    Value *off = emit_unbox(ctx, ctx.types().T_size, argv[1], offtyp);
+    if (f == sub_ptr)
+        off = ctx.builder.CreateNeg(off);
+    Value *ans = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), ptr, off);
+
+    if (jl_is_concrete_type(ptrtyp)) {
+        return mark_julia_type(ctx, ans, false, ptrtyp);
+    }
+    else {
+        Value *box = emit_allocobj(ctx, (jl_datatype_t *)ptrtyp, true);
+        setName(ctx.emission_context, box, "ptr_box");
+        init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
+        return mark_julia_type(ctx, box, true, (jl_datatype_t *)ptrtyp);
+    }
+}
+
 static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &ord = argv[0];
@@ -911,16 +954,15 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t>
         assert(jl_is_datatype(ety));
         Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
         setName(ctx.emission_context, strct, "atomic_pointerref_box");
-        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8);
-        thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
         LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
         setName(ctx.emission_context, load, "atomic_pointerref");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(load);
         load->setOrdering(llvm_order);
-        thePtr = emit_bitcast(ctx, strct, thePtr->getType());
+        thePtr = strct;
         StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
         ai.decorateInst(store);
         return mark_julia_type(ctx, strct, true, ety);
@@ -955,7 +997,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef
     bool ismodifyfield = f == atomic_pointermodify;
     const jl_cgval_t undefval;
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    jl_cgval_t x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
     const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
     const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
     const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
@@ -997,8 +1039,12 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef
         emit_error(ctx, msg);
         return jl_cgval_t();
     }
-    if (!ismodifyfield)
+    if (!ismodifyfield) {
         emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+        x = update_julia_type(ctx, x, ety);
+        if (x.typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
 
     size_t nb = jl_datatype_size(ety);
     if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
@@ -1010,7 +1056,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef
 
     if (!jl_isbits(ety)) {
         //if (!deserves_stack(ety))
-        //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        //Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         //uint64_t size = jl_datatype_size(ety);
         return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
     }
@@ -1161,8 +1207,6 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
             else {
                 x_ptr = decay_derived(ctx, x_ptr);
                 y_ptr = decay_derived(ctx, y_ptr);
-                if (x_ptr->getType() != y_ptr->getType())
-                    y_ptr = ctx.builder.CreateBitCast(y_ptr, x_ptr->getType());
                 ifelse_result = ctx.builder.CreateSelect(isfalse, y_ptr, x_ptr);
                 setName(ctx.emission_context, ifelse_result, "ifelse_result");
                 ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa);
@@ -1270,6 +1314,13 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         ++Emitted_pointerset;
         assert(nargs == 4);
         return emit_pointerset(ctx, argv);
+
+    case add_ptr:
+    case sub_ptr:
+        ++Emitted_pointerarith;
+        assert(nargs == 2);
+        return emit_pointerarith(ctx, f, argv);
+
     case atomic_fence:
         ++Emitted_atomic_fence;
         assert(nargs == 1);
@@ -1437,26 +1488,6 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, ArrayRef<Va
     case srem_int: return ctx.builder.CreateSRem(x, y);
     case urem_int: return ctx.builder.CreateURem(x, y);
 
-    // LLVM will not fold ptrtoint+arithmetic+inttoptr to GEP. The reason for this
-    // has to do with alias analysis. When adding two integers, either one of them
-    // could be the pointer base. With getelementptr, it is clear which of the
-    // operands is the pointer base. We also have this information at the julia
-    // level. Thus, to not lose information, we need to have a separate intrinsic
-    // for pointer arithmetic which lowers to getelementptr.
-    case add_ptr: {
-        return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), y), t);
-
-    }
-
-    case sub_ptr: {
-        return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), ctx.builder.CreateNeg(y)), t);
-
-    }
-
     case neg_float: return math_builder(ctx)().CreateFNeg(x);
     case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x);
     case add_float: return math_builder(ctx)().CreateFAdd(x, y);
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 805aa914b5366..5b463e3bafe28 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -12,8 +12,6 @@
     ADD_I(udiv_int, 2) \
     ADD_I(srem_int, 2) \
     ADD_I(urem_int, 2) \
-    ADD_I(add_ptr, 2) \
-    ADD_I(sub_ptr, 2) \
     ADD_I(neg_float, 1) \
     ADD_I(add_float, 2) \
     ADD_I(sub_float, 2) \
@@ -86,6 +84,9 @@
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
     ADD_I(sqrt_llvm_fast, 1) \
+    /*  pointer arithmetic */ \
+    ADD_I(add_ptr, 2) \
+    ADD_I(sub_ptr, 2) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
     ADD_I(pointerset, 4) \
diff --git a/src/ircode.c b/src/ircode.c
index a89d0140e0d88..873d33d2d7523 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -367,11 +367,6 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_UINT8);
         write_int8(s->s, *(int8_t*)jl_data_ptr(v));
     }
-    else if (jl_typetagis(v, jl_lineinfonode_type)) {
-        write_uint8(s->s, TAG_LINEINFO);
-        for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++)
-            jl_encode_value(s, jl_get_nth_field(v, i));
-    }
     else if (((jl_datatype_t*)jl_typeof(v))->instance == v) {
         write_uint8(s->s, TAG_SINGLETON);
         jl_encode_value(s, jl_typeof(v));
@@ -463,14 +458,17 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
 }
 
 static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t has_fcall,
-                                            uint8_t nospecializeinfer, uint8_t inlining, uint8_t constprop)
+                                            uint8_t nospecializeinfer, uint8_t isva,
+                                            uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod)
 {
     jl_code_info_flags_t flags;
     flags.bits.propagate_inbounds = propagate_inbounds;
     flags.bits.has_fcall = has_fcall;
     flags.bits.nospecializeinfer = nospecializeinfer;
+    flags.bits.isva = isva;
     flags.bits.inlining = inlining;
     flags.bits.constprop = constprop;
+    flags.bits.nargsmatchesmethod = nargsmatchesmethod;
     return flags;
 }
 
@@ -666,7 +664,7 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
 {
     assert(!ios_eof(s->s));
     jl_value_t *v;
-    size_t i, n;
+    size_t n;
     uint64_t key;
     uint8_t tag = read_uint8(s->s);
     if (tag > LAST_TAG)
@@ -782,13 +780,6 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         v = jl_alloc_string(n);
         ios_readall(s->s, jl_string_data(v), n);
         return v;
-    case TAG_LINEINFO:
-        v = jl_new_struct_uninit(jl_lineinfonode_type);
-        for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++) {
-            //size_t offs = jl_field_offset(jl_lineinfonode_type, i);
-            set_nth_field(jl_lineinfonode_type, v, i, jl_decode_value(s), 0);
-        }
-        return v;
     default:
         assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL);
         return jl_decode_value_any(s, tag);
@@ -799,7 +790,41 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
 
 typedef jl_value_t jl_string_t; // for local expressibility
 
-#define IR_DATASIZE_FLAGS         sizeof(uint8_t)
+static size_t codelocs_parseheader(jl_string_t *cl, int *line_offset, int *line_bytes, int *to_bytes) JL_NOTSAFEPOINT
+{
+    if (jl_string_len(cl) == 0) {
+        *line_offset = *line_bytes = *to_bytes = 0;
+        return 0;
+    }
+    int32_t header[3];
+    memcpy(&header, (char*)jl_string_data(cl), sizeof(header));
+    *line_offset = header[0];
+    if (header[1] < 255)
+        *line_bytes = 1;
+    else if (header[1] < 65535)
+        *line_bytes = 2;
+    else
+        *line_bytes = 4;
+    if (header[2] == 0)
+        *to_bytes = 0;
+    else if (header[2] < 255)
+        *to_bytes = 1;
+    else if (header[2] < 65535)
+        *to_bytes = 2;
+    else
+        *to_bytes = 4;
+    assert(jl_string_len(cl) >= sizeof(header) + *line_bytes);
+    return (jl_string_len(cl) - sizeof(header) - *line_bytes) / (*line_bytes + *to_bytes * 2); // compute nstmts
+}
+#ifndef NDEBUG
+static int codelocs_nstmts(jl_string_t *cl) JL_NOTSAFEPOINT
+{
+    int line_offset, line_bytes, to_bytes;
+    return codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+}
+#endif
+
+#define IR_DATASIZE_FLAGS         sizeof(uint16_t)
 #define IR_DATASIZE_PURITY        sizeof(uint16_t)
 #define IR_DATASIZE_INLINING_COST sizeof(uint16_t)
 #define IR_DATASIZE_NSLOTS        sizeof(int32_t)
@@ -811,12 +836,26 @@ typedef enum {
     ir_offset_slotflags     = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST + IR_DATASIZE_NSLOTS
 } ir_offset;
 
+// static_assert is technically a declaration, so shenanigans are required to
+// open an inline declaration context. `sizeof` is the traditional way to do this,
+// but this pattern is illegal in C++, which some compilers warn about, so use
+// `offsetof` instead.
+#define declaration_context(what) (void)offsetof(struct{what; int dummy_;}, dummy_)
+
+// Checks (at compile time) that sizeof(data) == macro_size
+#define checked_size(data, macro_size) \
+    (declaration_context(static_assert(sizeof(data) == macro_size, #macro_size " does not match written size")), data)
+
 JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
 {
     JL_TIMING(AST_COMPRESS, AST_COMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
+    int isdef = code == NULL;
+    if (isdef)
+        code = (jl_code_info_t*)m->source;
     assert(jl_is_method(m));
     assert(jl_is_code_info(code));
+    assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0);
     ios_t dest;
     ios_mem(&dest, 0);
     int en = jl_gc_enable(0); // Might GC
@@ -833,29 +872,33 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
         1
     };
 
+    uint8_t nargsmatchesmethod = code->nargs == m->nargs;
     jl_code_info_flags_t flags = code_info_flags(code->propagate_inbounds, code->has_fcall,
-                                                 code->nospecializeinfer, code->inlining, code->constprop);
-    write_uint8(s.s, flags.packed);
-    static_assert(sizeof(flags.packed) == IR_DATASIZE_FLAGS, "ir_datasize_flags is mismatched with the actual size");
-    write_uint16(s.s, code->purity.bits);
-    static_assert(sizeof(code->purity.bits) == IR_DATASIZE_PURITY, "ir_datasize_purity is mismatched with the actual size");
-    write_uint16(s.s, code->inlining_cost);
-    static_assert(sizeof(code->inlining_cost) == IR_DATASIZE_INLINING_COST, "ir_datasize_inlining_cost is mismatched with the actual size");
-
-    int32_t nslots = jl_array_nrows(code->slotflags);
+                                                 code->nospecializeinfer, code->isva,
+                                                 code->inlining, code->constprop,
+                                                 nargsmatchesmethod);
+    write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS));
+    write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY));
+    write_uint16(s.s, checked_size(code->inlining_cost, IR_DATASIZE_INLINING_COST));
+
+    size_t nslots = jl_array_nrows(code->slotflags);
     assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions
-    write_int32(s.s, nslots);
-    static_assert(sizeof(nslots) == IR_DATASIZE_NSLOTS, "ir_datasize_nslots is mismatched with the actual size");
+    write_int32(s.s, checked_size((int32_t)nslots, IR_DATASIZE_NSLOTS));
     ios_write(s.s, jl_array_data(code->slotflags, const char), nslots);
 
     // N.B.: The layout of everything before this point is explicitly referenced
     // by the various jl_ir_ accessors. Make sure to adjust those if you change
     // the data layout.
+    if (!nargsmatchesmethod) {
+        size_t nargs = code->nargs;
+        assert(nargs < INT32_MAX);
+        write_int32(s.s, (int32_t)nargs);
+    }
 
     for (i = 0; i < 5; i++) {
         int copy = 1;
-        if (i == 1) { // skip codelocs
-            assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, codelocs));
+        if (i == 1) { // skip debuginfo
+            assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, debuginfo));
             continue;
         }
         jl_encode_value_(&s, jl_get_nth_field((jl_value_t*)code, i), copy);
@@ -866,28 +909,14 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
     if (m->is_for_opaque_closure)
         jl_encode_value_(&s, code->slottypes, 1);
 
+    // Slotnames. For regular methods, we require that m->slot_syms matches the
+    // CodeInfo's slotnames, so we do not need to save it here.
     if (m->generator)
         // can't optimize generated functions
         jl_encode_value_(&s, (jl_value_t*)jl_compress_argnames(code->slotnames), 1);
     else
         jl_encode_value(&s, jl_nothing);
 
-    size_t nstmt = jl_array_nrows(code->code);
-    assert(nstmt == jl_array_nrows(code->codelocs));
-    if (jl_array_nrows(code->linetable) < 256) {
-        for (i = 0; i < nstmt; i++) {
-            write_uint8(s.s, jl_array_data(code->codelocs, int32_t)[i]);
-        }
-    }
-    else if (jl_array_nrows(code->linetable) < 65536) {
-        for (i = 0; i < nstmt; i++) {
-            write_uint16(s.s, jl_array_data(code->codelocs, int32_t)[i]);
-        }
-    }
-    else {
-        ios_write(s.s, (char*)jl_array_data(code->codelocs, int32_t), nstmt * sizeof(int32_t));
-    }
-
     write_uint8(s.s, s.relocatability);
 
     ios_flush(s.s);
@@ -927,21 +956,29 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
 
     jl_code_info_t *code = jl_new_code_info_uninit();
     jl_code_info_flags_t flags;
-    flags.packed = read_uint8(s.s);
+    flags.packed = read_uint16(s.s);
     code->inlining = flags.bits.inlining;
     code->constprop = flags.bits.constprop;
     code->propagate_inbounds = flags.bits.propagate_inbounds;
     code->has_fcall = flags.bits.has_fcall;
     code->nospecializeinfer = flags.bits.nospecializeinfer;
+    code->isva = flags.bits.isva;
     code->purity.bits = read_uint16(s.s);
     code->inlining_cost = read_uint16(s.s);
 
-    size_t nslots = read_int32(&src);
+
+    size_t nslots = read_int32(s.s);
     code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots);
     ios_readall(s.s, jl_array_data(code->slotflags, char), nslots);
 
+    if (flags.bits.nargsmatchesmethod) {
+        code->nargs = m->nargs;
+    } else {
+        code->nargs = read_int32(s.s);
+    }
+
     for (i = 0; i < 5; i++) {
-        if (i == 1)  // skip codelocs
+        if (i == 1)  // skip debuginfo
             continue;
         assert(jl_field_isptr(jl_code_info_type, i));
         jl_value_t **fld = (jl_value_t**)((char*)jl_data_ptr(code) + jl_field_offset(jl_code_info_type, i));
@@ -955,25 +992,17 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
         slotnames = m->slot_syms;
     code->slotnames = jl_uncompress_argnames(slotnames);
 
-    size_t nstmt = jl_array_nrows(code->code);
-    code->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmt);
-    if (jl_array_nrows(code->linetable) < 256) {
-        for (i = 0; i < nstmt; i++) {
-            jl_array_data(code->codelocs, int32_t)[i] = read_uint8(s.s);
-        }
-    }
-    else if (jl_array_nrows(code->linetable) < 65536) {
-        for (i = 0; i < nstmt; i++) {
-            jl_array_data(code->codelocs, int32_t)[i] = read_uint16(s.s);
-        }
-    }
-    else {
-        ios_readall(s.s, (char*)jl_array_data(code->codelocs, int32_t), nstmt * sizeof(int32_t));
-    }
+    if (metadata)
+        code->debuginfo = jl_atomic_load_relaxed(&metadata->debuginfo);
+    else
+        code->debuginfo = m->debuginfo;
+    assert(code->debuginfo);
+    assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0);
 
     (void) read_uint8(s.s);   // relocatability
-
+    assert(!ios_eof(s.s));
     assert(ios_getc(s.s) == -1);
+
     ios_close(s.s);
     JL_GC_PUSH1(&code);
     jl_gc_enable(en);
@@ -981,6 +1010,11 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     JL_GC_POP();
     if (metadata) {
         code->parent = metadata->def;
+        jl_gc_wb(code, code->parent);
+        code->rettype = metadata->rettype;
+        jl_gc_wb(code, code->rettype);
+        code->min_world = jl_atomic_load_relaxed(&metadata->min_world);
+        code->max_world = jl_atomic_load_relaxed(&metadata->max_world);
     }
 
     return code;
@@ -1108,6 +1142,244 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i)
     return jl_nothing;
 }
 
+// codelocs are compressed as follows:
+// The input vector is a NTuple{3,UInt32} (struct jl_codeloc_t)
+// The vector is scanned for min and max of the values for each element
+// The output is then allocated to hold (min-line, max-line, max-at) first, then line - min (in the smallest space), then the remainder (in the smallest space)
+static inline struct jl_codeloc_t unpack_codeloc(jl_string_t *cl, size_t pc, int line_offset, int line_bytes, int to_bytes) JL_NOTSAFEPOINT
+{
+    const char *ptr = jl_string_data(cl) + sizeof(int32_t[3]);
+    if (pc == 0)
+        to_bytes = 0;
+    else
+        ptr += line_bytes + (pc - 1) * (line_bytes + to_bytes * 2);
+    uint8_t int8;
+    uint16_t int16;
+    uint32_t int32;
+    struct jl_codeloc_t codeloc;
+    switch (line_bytes) {
+    case 0:
+        codeloc.line = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.line = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.line = int16;
+        break;
+    case 4:
+        memcpy(&int32, ptr, 4);
+        codeloc.line = int32;
+        break;
+    }
+    if (codeloc.line > 0)
+        codeloc.line += line_offset - 1;
+    ptr += line_bytes;
+    switch (to_bytes) {
+    case 0:
+        codeloc.to = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.to = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.to = int16;
+        break;
+    case 4:
+        memcpy(&int32, ptr, 4);
+        codeloc.to = int32;
+        break;
+    }
+    ptr += to_bytes;
+    switch (to_bytes) {
+    case 0:
+        codeloc.pc = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.pc = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.pc = int16;
+        break;
+    case 3:
+        memcpy(&int32, ptr, 4);
+        codeloc.pc = int32;
+        break;
+    }
+    ptr += to_bytes;
+    return codeloc;
+}
+
+
+static const struct jl_codeloc_t badloc = {-1, 0, 0};
+
+JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_string_t *cl, size_t pc) JL_NOTSAFEPOINT
+{
+    assert(jl_is_string(cl));
+    int line_offset, line_bytes, to_bytes;
+    size_t nstmts = codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+    if (pc > nstmts)
+        return badloc;
+    return unpack_codeloc(cl, pc, line_offset, line_bytes, to_bytes);
+}
+
+static int allzero(jl_value_t *codelocs) JL_NOTSAFEPOINT
+{
+    int32_t *p = jl_array_data(codelocs,int32_t);
+    int32_t *pend = p + jl_array_nrows(codelocs);
+    do {
+        if (*p)
+            return 0;
+    } while (++p < pend);
+    return 1;
+}
+
+JL_DLLEXPORT jl_string_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts) // firstline+Vector{Int32} => Memory{UInt8}
+{
+    assert(jl_typeis(codelocs, jl_array_int32_type));
+    if (jl_array_nrows(codelocs) == 0)
+        nstmts = 0;
+    assert(nstmts * 3 == jl_array_nrows(codelocs));
+    if (allzero(codelocs))
+        return jl_an_empty_string;
+    struct jl_codeloc_t codeloc, min, max;
+    size_t i;
+    min.line = min.to = min.pc = firstline <= 0 ? INT32_MAX : firstline;
+    max.line = max.to = max.pc = 0;
+    for (i = 0; i < nstmts; i++) {
+        memcpy(&codeloc, jl_array_data(codelocs,int32_t) + 3 * i, sizeof(codeloc));
+#define SETMIN(x) if (codeloc.x < min.x) min.x = codeloc.x
+#define SETMAX(x) if (codeloc.x > max.x) max.x = codeloc.x
+        if (codeloc.line > 0)
+            SETMIN(line);
+        SETMAX(line);
+        SETMIN(to);
+        SETMAX(to);
+        SETMIN(pc);
+        SETMAX(pc);
+#undef SETMIN
+#undef SETMAX
+    }
+    int32_t header[3];
+    header[0] = min.line > max.line ? 0 : min.line;
+    header[1] = min.line > max.line ? 0 : max.line - min.line;
+    header[2] = max.to > max.pc ? max.to : max.pc;
+    size_t line_bytes;
+    if (header[1] < 255)
+        line_bytes = 1;
+    else if (header[1] < 65535)
+        line_bytes = 2;
+    else
+        line_bytes = 4;
+    size_t to_bytes;
+    if (header[2] == 0)
+        to_bytes = 0;
+    else if (header[2] < 255)
+        to_bytes = 1;
+    else if (header[2] < 65535)
+        to_bytes = 2;
+    else
+        to_bytes = 4;
+    jl_string_t *cl = jl_alloc_string(sizeof(header) + line_bytes + nstmts * (line_bytes + to_bytes * 2));
+    // store header structure
+    memcpy(jl_string_data(cl), &header, sizeof(header));
+    // pack bytes
+    char *ptr = jl_string_data(cl) + sizeof(header);
+    uint8_t int8;
+    uint16_t int16;
+    uint32_t int32;
+    { // store firstline value
+        int8 = int16 = int32 = firstline > 0 ? firstline - header[0] + 1 : 0;
+        switch (line_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += line_bytes;
+    }
+    for (i = 0; i < nstmts; i++) {
+        memcpy(&codeloc, jl_array_data(codelocs,int32_t) + 3 * i, sizeof(codeloc));
+        int8 = int16 = int32 = codeloc.line > 0 ? codeloc.line - header[0] + 1 : 0;
+        switch (line_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += line_bytes;
+        int8 = int16 = int32 = codeloc.to;
+        switch (to_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += to_bytes;
+        int8 = int16 = int32 = codeloc.pc;
+        switch (to_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += to_bytes;
+    }
+    return cl;
+}
+
+JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_string_t *cl, size_t nstmts) // Memory{UInt8} => Vector{Int32}
+{
+    assert(jl_is_string(cl));
+    int line_offset, line_bytes, to_bytes;
+    size_t nlocs = codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+    assert(nlocs == 0 || nlocs == nstmts);
+    jl_value_t *codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmts * 3);
+    size_t i;
+    for (i = 0; i < nlocs; i++) {
+        struct jl_codeloc_t codeloc = unpack_codeloc(cl, i + 1, line_offset, line_bytes, to_bytes);;
+        memcpy(jl_array_data(codelocs,int32_t) + i * 3, &codeloc, sizeof(codeloc));
+    }
+    if (nlocs == 0) {
+        memset(jl_array_data(codelocs,int32_t), 0, nstmts * sizeof(struct jl_codeloc_t));
+    }
+    return codelocs;
+}
+
 void jl_init_serializer(void)
 {
     jl_task_t *ct = jl_current_task;
@@ -1186,7 +1458,6 @@ void jl_init_serializer(void)
     deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type;
     deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type;
     deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type;
-    deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type;
     deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type;
     deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type;
     deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type;
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index c3ccb5746e37a..442103c91be0f 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -3,7 +3,7 @@
 #include "llvm-version.h"
 #include "platform.h"
 #include <stdint.h>
-#include <sstream>
+#include <string>
 
 #include "llvm/IR/Mangler.h"
 #include <llvm/ADT/Statistic.h>
@@ -28,7 +28,11 @@
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
 #include <llvm/MC/TargetRegistry.h>
 #include <llvm/Target/TargetOptions.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Host.h>
+#else
 #include <llvm/Support/Host.h>
+#endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Object/SymbolSize.h>
 
@@ -38,7 +42,11 @@ using namespace llvm;
 #include "julia_assert.h"
 #include "processor.h"
 
+#if JL_LLVM_VERSION >= 180000
+# include <llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h>
+#else
 # include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
+#endif
 # include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
 # include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
 # include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
@@ -113,6 +121,15 @@ static void *getTLSAddress(void *control)
 }
 #endif
 
+#ifdef _OS_OPENBSD_
+extern "C" {
+    __int128 __divti3(__int128, __int128);
+    __int128 __modti3(__int128, __int128);
+    unsigned __int128 __udivti3(unsigned __int128, unsigned __int128);
+    unsigned __int128 __umodti3(unsigned __int128, unsigned __int128);
+}
+#endif
+
 // Snooping on which functions are being compiled, and how long it takes
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_compiles_impl(void *s)
@@ -177,8 +194,6 @@ static orc::ThreadSafeModule jl_get_globals_module(orc::ThreadSafeContext &ctx,
     return GTSM;
 }
 
-extern jl_value_t *jl_fptr_wait_for_compiled(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m);
-
 // this generates llvm code for the lambda info
 // and adds the result to the jitlayers
 // (and the shadow module),
@@ -315,7 +330,7 @@ static jl_callptr_t _jl_compile_codeinst(
             jl_callptr_t prev_invoke = NULL;
             // Allow replacing addr if it is either NULL or our special waiting placeholder.
             if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
-                if (prev_invoke == &jl_fptr_wait_for_compiled && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
                     addr = prev_invoke;
                     //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
                     //known lesser function)
@@ -372,7 +387,6 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
         backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx,  pparams ? pparams->DL : jl_ExecutionEngine->getDataLayout(), pparams ? pparams->TargetTriple : jl_ExecutionEngine->getTargetTriple());
         into = &backing;
     }
-    JL_LOCK(&jl_codegen_lock);
     auto target_info = into->withModuleDo([&](Module &M) {
         return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
     });
@@ -385,6 +399,7 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
     const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams);
     bool success = true;
     if (!sysimg) {
+        JL_LOCK(&jl_ExecutionEngine->jitlock);
         if (jl_ExecutionEngine->getGlobalValueAddress(name)) {
             success = false;
         }
@@ -402,8 +417,8 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
             });
             jl_ExecutionEngine->addModule(std::move(*into));
         }
+        JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC
     }
-    JL_UNLOCK(&jl_codegen_lock);
     if (timed) {
         if (measure_compile_time_enabled) {
             auto end = jl_hrtime();
@@ -461,19 +476,22 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN
-void jl_compile_codeinst_impl(jl_code_instance_t *ci)
+int jl_compile_codeinst_impl(jl_code_instance_t *ci)
 {
+    int newly_compiled = 0;
     if (jl_atomic_load_relaxed(&ci->invoke) != NULL) {
-        return;
+        return newly_compiled;
     }
-    JL_LOCK(&jl_codegen_lock);
+    JL_LOCK(&jl_ExecutionEngine->jitlock);
     if (jl_atomic_load_relaxed(&ci->invoke) == NULL) {
         ++SpecFPtrCount;
         uint64_t start = jl_typeinf_timing_begin();
         _jl_compile_codeinst(ci, NULL, *jl_ExecutionEngine->getContext());
         jl_typeinf_timing_end(start, 0);
+        newly_compiled = 1;
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC
+    return newly_compiled;
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -490,7 +508,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
-    JL_LOCK(&jl_codegen_lock);
+    JL_LOCK(&jl_ExecutionEngine->jitlock);
     if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
         jl_code_info_t *src = NULL;
         JL_GC_PUSH1(&src);
@@ -501,12 +519,19 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
                 src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
         }
         else {
-            src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred);
+            jl_method_instance_t *mi = unspec->def;
+            jl_code_instance_t *uninferred = jl_cached_uninferred(
+                jl_atomic_load_relaxed(&mi->cache), 1);
+            assert(uninferred);
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
             assert(src);
         }
         if (src) {
             assert(jl_is_code_info(src));
             ++UnspecFPtrCount;
+            jl_debuginfo_t *debuginfo = src->debuginfo;
+            jl_atomic_store_release(&unspec->debuginfo, debuginfo); // n.b. this assumes the field was previously NULL, which is not entirely true
+            jl_gc_wb(unspec, debuginfo);
             _jl_compile_codeinst(unspec, src, *jl_ExecutionEngine->getContext());
         }
         jl_callptr_t null = nullptr;
@@ -514,7 +539,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
         jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
         JL_GC_POP();
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC
     if (timed) {
         if (measure_compile_time_enabled) {
             auto end = jl_hrtime();
@@ -534,66 +559,40 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
     jl_code_instance_t *codeinst = jl_compile_method_internal(mi, world);
     if (codeinst) {
         uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
-        if (getwrapper)
-            return jl_dump_fptr_asm(fptr, emit_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-        if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-            // normally we prevent native code from being generated for these functions,
-            // (using sentinel value `1` instead)
-            // so create an exception here so we can print pretty our lies
-            auto ct = jl_current_task;
-            bool timed = (ct->reentrant_timing & 1) == 0;
-            if (timed)
-                ct->reentrant_timing |= 1;
-            uint64_t compiler_start_time = 0;
-            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-            if (measure_compile_time_enabled)
-                compiler_start_time = jl_hrtime();
-            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-            specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-            if (specfptr == 0) {
-                // Doesn't need SOURCE_MODE_FORCE_SOURCE_UNCACHED, because the codegen lock is held,
-                // so there's no concern that the ->inferred field will be deleted.
-                jl_code_instance_t *forced_ci = jl_type_infer(mi, world, 0, SOURCE_MODE_FORCE_SOURCE);
-                JL_GC_PUSH1(&forced_ci);
-                if (forced_ci) {
-                    // Force compile of this codeinst even though it already has an ->invoke
-                    _jl_compile_codeinst(forced_ci, NULL, *jl_ExecutionEngine->getContext());
-                    specfptr = (uintptr_t)jl_atomic_load_relaxed(&forced_ci->specptr.fptr);
-                }
-                JL_GC_POP();
-            }
-            JL_UNLOCK(&jl_codegen_lock);
-            if (timed) {
-                if (measure_compile_time_enabled) {
-                    auto end = jl_hrtime();
-                    jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
-                }
-                ct->reentrant_timing &= ~1ull;
-            }
-        }
+        if (getwrapper || specfptr == 0)
+            specfptr = fptr;
         if (specfptr != 0)
             return jl_dump_fptr_asm(specfptr, emit_mc, asm_variant, debuginfo, binary);
     }
-
-    // whatever, that didn't work - use the assembler output instead
-    jl_llvmf_dump_t llvmf_dump;
-    jl_get_llvmf_defn(&llvmf_dump, mi, world, getwrapper, true, jl_default_cgparams);
-    if (!llvmf_dump.F)
-        return jl_an_empty_string;
-    return jl_dump_function_asm(&llvmf_dump, emit_mc, asm_variant, debuginfo, binary, false);
+    return jl_an_empty_string;
 }
 
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel)
+{
+#ifdef DISABLE_OPT
+    return CodeGenOptLevel::None;
+#else
+    return optlevel == 0 ? CodeGenOptLevel::None :
+        optlevel == 1 ? CodeGenOptLevel::Less :
+        optlevel == 2 ? CodeGenOptLevel::Default :
+        CodeGenOptLevel::Aggressive;
+#endif
+}
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 {
 #ifdef DISABLE_OPT
     return CodeGenOpt::None;
 #else
-    return optlevel < 2 ? CodeGenOpt::None :
+    return optlevel == 0 ? CodeGenOpt::None :
+        optlevel == 1 ? CodeGenOpt::Less :
         optlevel == 2 ? CodeGenOpt::Default :
         CodeGenOpt::Aggressive;
 #endif
 }
+#endif
 
 static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 {
@@ -608,7 +607,7 @@ static Expected<orc::ThreadSafeModule> validateExternRelocations(orc::ThreadSafe
         auto F = dyn_cast<Function>(&GO);
         if (!F)
             return false;
-        return F->isIntrinsic() || F->getName().startswith("julia.");
+        return F->isIntrinsic() || F->getName().starts_with("julia.");
     };
     // validate the relocations for M (only for RuntimeDyld, JITLink performs its own symbol validation)
     auto Err = TSM.withModuleDo([isIntrinsicFunction](Module &M) JL_NOTSAFEPOINT {
@@ -703,22 +702,19 @@ struct JITObjectInfo {
 class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
     std::mutex PluginMutex;
     std::map<MaterializationResponsibility *, std::unique_ptr<JITObjectInfo>> PendingObjs;
-    // Resources from distinct `MaterializationResponsibility`s can get merged
-    // after emission, so we can have multiple debug objects per resource key.
-    std::map<ResourceKey, SmallVector<std::unique_ptr<JITObjectInfo>, 0>> RegisteredObjs;
 
 public:
     void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G,
                              jitlink::JITLinkContext &Ctx,
                              MemoryBufferRef InputObject) override
     {
-        // Keeping around a full copy of the input object file (and re-parsing it) is
-        // wasteful, but for now, this lets us reuse the existing debuginfo.cpp code.
-        // Should look into just directly pulling out all the information required in
-        // a JITLink pass and just keeping the required tables/DWARF sections around
-        // (perhaps using the LLVM DebuggerSupportPlugin as a reference).
         auto NewBuffer =
             MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName());
+        // Re-parsing the InputObject is wasteful, but for now, this lets us
+        // reuse the existing debuginfo.cpp code. Should look into just
+        // directly pulling out all the information required in a JITLink pass
+        // and just keeping the required tables/DWARF sections around (perhaps
+        // using the LLVM DebuggerSupportPlugin as a reference).
         auto NewObj =
             cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
 
@@ -752,13 +748,8 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
             };
 
             jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
-        }
-
-        cantFail(MR.withResourceKeyDo([&](ResourceKey K) {
-            std::lock_guard<std::mutex> lock(PluginMutex);
-            RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
             PendingObjs.erase(&MR);
-        }));
+        }
 
         return Error::success();
     }
@@ -769,32 +760,23 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
         PendingObjs.erase(&MR);
         return Error::success();
     }
+
 #if JL_LLVM_VERSION >= 160000
     Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
 #else
-    Error notifyRemovingResources(ResourceKey K) override
+    Error notifyRemovingResources(orc::ResourceKey K) override
 #endif
     {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        RegisteredObjs.erase(K);
-        // TODO: If we ever unload code, need to notify debuginfo registry.
         return Error::success();
     }
 
 #if JL_LLVM_VERSION >= 160000
-    void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) override
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
 #else
-    void notifyTransferringResources(ResourceKey DstKey, ResourceKey SrcKey) override
+    void notifyTransferringResources(orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
 #endif
-    {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        auto SrcIt = RegisteredObjs.find(SrcKey);
-        if (SrcIt != RegisteredObjs.end()) {
-            for (std::unique_ptr<JITObjectInfo> &Info : SrcIt->second)
-                RegisteredObjs[DstKey].push_back(std::move(Info));
-            RegisteredObjs.erase(SrcIt);
-        }
-    }
 
     void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &,
                           jitlink::PassConfiguration &PassConfig) override
@@ -834,12 +816,12 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
 
 class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 private:
-    std::atomic<size_t> &total_size;
+    std::atomic<size_t> &jit_bytes_size;
 
 public:
 
-    JLMemoryUsagePlugin(std::atomic<size_t> &total_size)
-        : total_size(total_size) {}
+    JLMemoryUsagePlugin(std::atomic<size_t> &jit_bytes_size)
+        : jit_bytes_size(jit_bytes_size) {}
 
     Error notifyFailed(orc::MaterializationResponsibility &MR) override {
         return Error::success();
@@ -888,7 +870,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
             }
             (void) code_size;
             (void) data_size;
-            this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
+            this->jit_bytes_size.fetch_add(graph_size, std::memory_order_relaxed);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
@@ -1004,24 +986,7 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
                              const RuntimeDyld::LoadedObjectInfo &L,
                              const std::shared_ptr<RTDyldMemoryManager> &MemMgr)
 {
-    auto SavedObject = L.getObjectForDebug(Object).takeBinary();
-    // If the debug object is unavailable, save (a copy of) the original object
-    // for our backtraces.
-    // This copy seems unfortunate, but there doesn't seem to be a way to take
-    // ownership of the original buffer.
-    if (!SavedObject.first) {
-        auto NewBuffer =
-            MemoryBuffer::getMemBufferCopy(Object.getData(), Object.getFileName());
-        auto NewObj =
-            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
-        SavedObject = std::make_pair(std::move(NewObj), std::move(NewBuffer));
-    }
-    const object::ObjectFile *DebugObj = SavedObject.first.release();
-    SavedObject.second.release();
-
     StringMap<object::SectionRef> loadedSections;
-    // Use the original Object, not the DebugObject, as this is used for the
-    // RuntimeDyld::LoadedObjectInfo lookup.
     for (const object::SectionRef &lSection : Object.sections()) {
         auto sName = lSection.getName();
         if (sName) {
@@ -1038,7 +1003,9 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
         return L.getSectionLoadAddress(search->second);
     };
 
-    jl_register_jit_object(*DebugObj, getLoadAddress,
+    auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op.
+    jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object,
+        getLoadAddress,
 #if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
         [MemMgr](void *p) { return lookupWriteAddressFor(MemMgr.get(), p); }
 #else
@@ -1208,7 +1175,7 @@ namespace {
                 {
                     if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
                             // Each function is printed as a YAML object with several attributes
@@ -1261,7 +1228,7 @@ namespace {
                         // Print LLVM function statistics _after_ optimization
                         ios_printf(stream, "  after: \n");
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
                             Stat(F).dump(stream);
@@ -1448,7 +1415,7 @@ struct JuliaOJIT::DLSymOptimizer {
     void operator()(Module &M) {
         for (auto &GV : M.globals()) {
             auto Name = GV.getName();
-            if (Name.startswith("jlplt") && Name.endswith("got")) {
+            if (Name.starts_with("jlplt") && Name.ends_with("got")) {
                 auto fname = GV.getAttribute("julia.fname").getValueAsString().str();
                 void *addr;
                 if (GV.hasAttribute("julia.libname")) {
@@ -1472,9 +1439,11 @@ struct JuliaOJIT::DLSymOptimizer {
                     if (named) {
                         auto T = GV.getValueType();
                         assert(T->isPointerTy());
+                        #if JL_LLVM_VERSION < 170000
                         if (!T->isOpaquePointerTy()) {
                             T = T->getNonOpaquePointerElementType();
                         }
+                        #endif
                         init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, GV.getName() + ".jit", init, &M);
                     }
                     GV.setInitializer(init);
@@ -1525,9 +1494,11 @@ struct JuliaOJIT::DLSymOptimizer {
                         if (named) {
                             auto T = CI->getType();
                             assert(T->isPointerTy());
+                            #if JL_LLVM_VERSION < 170000
                             if (!T->isOpaquePointerTy()) {
                                 T = T->getNonOpaquePointerElementType();
                             }
+                            #endif
                             init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, CI->getName() + ".jit", init, &M);
                         }
                         // DCE and SimplifyCFG will kill the branching structure around
@@ -1571,6 +1542,7 @@ void fixupTM(TargetMachine &TM) {
     }
 }
 
+#if JL_LLVM_VERSION < 170000
 extern int jl_opaque_ptrs_set;
 void SetOpaquePointer(LLVMContext &ctx) {
     if (jl_opaque_ptrs_set)
@@ -1581,6 +1553,7 @@ void SetOpaquePointer(LLVMContext &ctx) {
     ctx.setOpaquePointers(true);
 #endif
 }
+#endif
 
 llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     // Mark our address spaces as non-integral
@@ -1603,7 +1576,9 @@ JuliaOJIT::JuliaOJIT()
     DLSymOpt(std::make_unique<DLSymOptimizer>(false)),
     ContextPool([](){
         auto ctx = std::make_unique<LLVMContext>();
+        #if JL_LLVM_VERSION < 170000
         SetOpaquePointer(*ctx);
+        #endif
         return orc::ThreadSafeContext(std::move(ctx));
     }),
 #ifdef JL_USE_JITLINK
@@ -1628,6 +1603,7 @@ JuliaOJIT::JuliaOJIT()
     ExternalCompileLayer(ES, LockLayer,
         std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM))
 {
+    JL_MUTEX_INIT(&this->jitlock, "JuliaOJIT");
 #ifdef JL_USE_JITLINK
 # if defined(LLVM_SHLIB)
     // When dynamically linking against LLVM, use our custom EH frame registration code
@@ -1640,7 +1616,7 @@ JuliaOJIT::JuliaOJIT()
         ES, std::move(ehRegistrar)));
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
-    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(total_size));
+    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(jit_bytes_size));
 #else
     ObjectLayer.setNotifyLoaded(
         [this](orc::MaterializationResponsibility &MR,
@@ -1693,7 +1669,7 @@ JuliaOJIT::JuliaOJIT()
                   DL.getGlobalPrefix(),
                   [&](const orc::SymbolStringPtr &S) {
                         const char *const atomic_prefix = "__atomic_";
-                        return (*S).startswith(atomic_prefix);
+                        return (*S).starts_with(atomic_prefix);
                   })));
         }
     }
@@ -1726,8 +1702,38 @@ JuliaOJIT::JuliaOJIT()
     };
     cantFail(GlobalJD.define(orc::symbolAliases(jl_crt)));
 
+#ifdef _OS_OPENBSD_
+    orc::SymbolMap i128_crt;
+
+    i128_crt[mangle("__divti3")] = JITEvaluatedSymbol::fromPointer(&__divti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__modti3")] = JITEvaluatedSymbol::fromPointer(&__modti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__udivti3")] = JITEvaluatedSymbol::fromPointer(&__udivti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__umodti3")] = JITEvaluatedSymbol::fromPointer(&__umodti3, JITSymbolFlags::Exported);
+
+    cantFail(GlobalJD.define(orc::absoluteSymbols(i128_crt)));
+#endif
+
 #ifdef MSAN_EMUTLS_WORKAROUND
     orc::SymbolMap msan_crt;
+    #if JL_LLVM_VERSION >= 170000
+    msan_crt[mangle("__emutls_get_address")] = {ExecutorAddr::fromPtr(msan_workaround::getTLSAddress), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_param_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_retval_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin))), JITSymbolFlags::Exported};
+    #else
     msan_crt[mangle("__emutls_get_address")] = JITEvaluatedSymbol::fromPointer(msan_workaround::getTLSAddress, JITSymbolFlags::Exported);
     msan_crt[mangle("__emutls_v.__msan_param_tls")] = JITEvaluatedSymbol::fromPointer(
         reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param)), JITSymbolFlags::Exported);
@@ -1745,11 +1751,16 @@ JuliaOJIT::JuliaOJIT()
         reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size)), JITSymbolFlags::Exported);
     msan_crt[mangle("__emutls_v.__msan_origin_tls")] = JITEvaluatedSymbol::fromPointer(
         reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
+    #endif
     cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
     orc::SymbolMap asan_crt;
+    #if JL_LLVM_VERSION >= 170000
+    asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&___asan_globals_registered), JITSymbolFlags::Exported};
+    #else
     asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
+    #endif
     cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
 #endif
 }
@@ -1764,7 +1775,11 @@ orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name)
 
 void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
 {
+    #if JL_LLVM_VERSION >= 170000
+    cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), {ExecutorAddr::fromPtr((void*)Addr), JITSymbolFlags::Exported}}})));
+    #else
     cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), JITEvaluatedSymbol::fromPointer((void*)Addr)}})));
+    #endif
 }
 
 void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
@@ -1802,7 +1817,7 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
     auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports);
     if (!Lookups) {
         ES.reportError(Lookups.takeError());
-        errs() << "Failed to lookup symbols in module!";
+        errs() << "Failed to lookup symbols in module!\n";
         if (CurrentlyCompiling) {
             CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
         } else {
@@ -1810,7 +1825,11 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
         }
     }
     for (auto &Sym : *Lookups) {
+        #if JL_LLVM_VERSION >= 170000
+        assert(Sym.second.getAddress());
+        #else
         assert(Sym.second);
+        #endif
         (void) Sym;
     }
 }
@@ -1839,6 +1858,48 @@ Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr<MemoryBuffer>
     return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
 }
 
+#if JL_LLVM_VERSION >= 170000
+Expected<ExecutorSymbolDef> JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
+{
+    orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD};
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1);
+    auto Sym = ES.lookup(SearchOrder, Name);
+    return Sym;
+}
+
+Expected<ExecutorSymbolDef> JuliaOJIT::findUnmangledSymbol(StringRef Name)
+{
+    return findSymbol(getMangledName(Name), true);
+}
+
+Expected<ExecutorSymbolDef> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
+{
+    orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD};
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
+    auto Sym = ES.lookup(SearchOrder, getMangledName(Name));
+    return Sym;
+}
+
+uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
+{
+    auto addr = findSymbol(getMangledName(Name), false);
+    if (!addr) {
+        consumeError(addr.takeError());
+        return 0;
+    }
+    return cantFail(std::move(addr)).getAddress().getValue();
+}
+
+uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
+{
+    auto addr = findSymbol(getMangledName(Name), false);
+    if (!addr) {
+        consumeError(addr.takeError());
+        return 0;
+    }
+    return cantFail(std::move(addr)).getAddress().getValue();
+}
+#else
 JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
 {
     orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD};
@@ -1854,7 +1915,7 @@ JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name)
     return findSymbol(getMangledName(Name), true);
 }
 
-Expected<JITEvaluatedSymbol> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
+Expected<JL_JITSymbol> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
 {
     orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD};
     ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
@@ -1869,7 +1930,7 @@ uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(addr.getAddress());
+    return cantFail(addr).getAddress();
 }
 
 uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
@@ -1879,19 +1940,19 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(addr.getAddress());
+    return cantFail(addr).getAddress();
 }
+#endif
 
-StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst)
+StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst)
 {
     std::lock_guard<std::mutex> lock(RLST_mutex);
-    assert(Addr != (uint64_t)&jl_fptr_wait_for_compiled);
+    assert(Addr != (uint64_t)jl_fptr_wait_for_compiled_addr);
     std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
     if (fname->empty()) {
         std::string string_fname;
         raw_string_ostream stream_fname(string_fname);
         // try to pick an appropriate name that describes it
-        jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
         if (Addr == (uintptr_t)invoke) {
             stream_fname << "jsysw_";
         }
@@ -1920,8 +1981,13 @@ llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     orc::SymbolMap GDBFunctions;
+    #if JL_LLVM_VERSION >= 170000
+    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = {ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBAllocAction), JITSymbolFlags::Exported | JITSymbolFlags::Callable};
+    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = {ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper), JITSymbolFlags::Exported | JITSymbolFlags::Callable};
+    #else
     GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
     GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    #endif
     cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
     if (TM->getTargetTriple().isOSBinFormatMachO())
         ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
@@ -1962,19 +2028,20 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
     return getMangledName(GV->getName());
 }
 
-#ifdef JL_USE_JITLINK
 size_t JuliaOJIT::getTotalBytes() const
 {
-    return total_size.load(std::memory_order_relaxed);
+    auto bytes = jit_bytes_size.load(std::memory_order_relaxed);
+#ifndef JL_USE_JITLINK
+    size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
+    bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+#endif
+    return bytes;
 }
-#else
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
 
-size_t JuliaOJIT::getTotalBytes() const
+void JuliaOJIT::addBytes(size_t bytes)
 {
-    return getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+    jit_bytes_size.fetch_add(bytes, std::memory_order_relaxed);
 }
-#endif
 
 void JuliaOJIT::printTimers()
 {
@@ -2026,8 +2093,7 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
                     //        Init.push_back(cast_or_null<Constant>(Op));
                     //    for (auto &Op : sCA->operands())
                     //        Init.push_back(cast_or_null<Constant>(Op));
-                    //    Type *Int8PtrTy = Type::getInt8PtrTy(dest.getContext());
-                    //    ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
+                    //    ArrayType *ATy = ArrayType::get(PointerType::get(dest.getContext()), Init.size());
                     //    GlobalVariable *GV = new GlobalVariable(dest, ATy, dG->isConstant(),
                     //            GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init), "",
                     //            dG->getThreadLocalMode(), dG->getType()->getAddressSpace());
@@ -2047,7 +2113,11 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
                 }
                 // Reparent the global variable:
                 SG.removeFromParent();
+                #if JL_LLVM_VERSION >= 170000
+                dest.insertGlobalVariable(&SG);
+                #else
                 dest.getGlobalList().push_back(&SG);
+                #endif
                 // Comdat is owned by the Module
                 SG.setComdat(nullptr);
             }
@@ -2094,7 +2164,11 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
                     }
                 }
                 SG.removeFromParent();
+                #if JL_LLVM_VERSION >= 170000
+                dest.insertAlias(&SG);
+                #else
                 dest.getAliasList().push_back(&SG);
+                #endif
             }
 
             // metadata nodes need to be explicitly merged not just copied
@@ -2152,8 +2226,15 @@ static void jl_decorate_module(Module &M) {
         // Add special values used by debuginfo to build the UnwindData table registration for Win64
         // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
         // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
-        M.appendModuleInlineAsm("\
-    .section .text                  \n\
+        std::string inline_asm = "\
+    .section ";
+        inline_asm +=
+#if JL_LLVM_VERSION >= 180000
+    ".ltext,\"ax\",@progbits";
+#else
+    ".text";
+#endif
+        inline_asm +=              "\n\
     .type   __UnwindData,@object    \n\
     .p2align        2, 0x90         \n\
     __UnwindData:                   \n\
@@ -2164,7 +2245,9 @@ static void jl_decorate_module(Module &M) {
         .p2align        2, 0x90     \n\
     __catchjmp:                     \n\
         .zero   12                  \n\
-        .size   __catchjmp, 12");
+        .size   __catchjmp, 12";
+
+        M.appendModuleInlineAsm(inline_asm);
     }
 }
 
@@ -2252,3 +2335,9 @@ size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
 }
+
+// API for adding bytes to record being owned by the JIT
+void jl_jit_add_bytes(size_t bytes)
+{
+    jl_ExecutionEngine->addBytes(bytes);
+}
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 931f0070042ac..107782e354d4a 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -26,6 +26,7 @@
 #include "julia_internal.h"
 #include "platform.h"
 #include "llvm-codegen-shared.h"
+#include "llvm-version.h"
 #include <stack>
 #include <queue>
 
@@ -90,6 +91,7 @@ struct OptimizationOptions {
     bool enable_vector_pipeline;
     bool remove_ni;
     bool cleanup;
+    bool warn_missed_transformations;
 
     static constexpr OptimizationOptions defaults(
         bool lower_intrinsics=true,
@@ -103,12 +105,13 @@ struct OptimizationOptions {
         bool enable_loop_optimizations=true,
         bool enable_vector_pipeline=true,
         bool remove_ni=true,
-        bool cleanup=true) {
+        bool cleanup=true,
+        bool warn_missed_transformations=false) {
         return {lower_intrinsics, dump_native, external_use, llvm_only,
                 always_inline, enable_early_simplifications,
                 enable_early_optimizations, enable_scalar_optimizations,
                 enable_loop_optimizations, enable_vector_pipeline,
-                remove_ni, cleanup};
+                remove_ni, cleanup, warn_missed_transformations};
     }
 };
 
@@ -261,9 +264,7 @@ jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &M,
         jl_method_instance_t *mi,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
-        jl_codegen_params_t &params,
-        size_t min_world, size_t max_world);
+        jl_codegen_params_t &params);
 
 jl_llvm_functions_t jl_emit_codeinst(
         orc::ThreadSafeModule &M,
@@ -342,11 +343,13 @@ class MaxAlignedAllocImpl
 };
 using MaxAlignedAlloc = MaxAlignedAllocImpl<>;
 
+#if JL_LLVM_VERSION < 170000
 typedef JITSymbol JL_JITSymbol;
 // The type that is similar to SymbolInfo on LLVM 4.0 is actually
 // `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol
 // is expected.
 typedef JITSymbol JL_SymbolInfo;
+#endif
 
 using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
 using OptimizerResultT = Expected<orc::ThreadSafeModule>;
@@ -520,16 +523,22 @@ class JuliaOJIT {
                             bool ShouldOptimize = false) JL_NOTSAFEPOINT;
     Error addObjectFile(orc::JITDylib &JD,
                         std::unique_ptr<MemoryBuffer> Obj) JL_NOTSAFEPOINT;
-    Expected<JITEvaluatedSymbol> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
     orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; };
     orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; }
     orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; }
 
-    JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
-    JL_JITSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    #if JL_LLVM_VERSION >= 170000
+    Expected<llvm::orc::ExecutorSymbolDef> findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
+    #else
+    JITEvaluatedSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
+    JITEvaluatedSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    Expected<JITEvaluatedSymbol> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
+    #endif
     uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT;
     uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT;
-    StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
+    StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
     auto getContext() JL_NOTSAFEPOINT {
         return *ContextPool;
     }
@@ -551,6 +560,7 @@ class JuliaOJIT {
     TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;
 
     size_t getTotalBytes() const JL_NOTSAFEPOINT;
+    void addBytes(size_t bytes) JL_NOTSAFEPOINT;
     void printTimers() JL_NOTSAFEPOINT;
 
     jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
@@ -568,6 +578,8 @@ class JuliaOJIT {
     // Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls
     void optimizeDLSyms(Module &M);
 
+    jl_mutex_t jitlock;
+
 private:
 
     const std::unique_ptr<TargetMachine> TM;
@@ -595,10 +607,10 @@ class JuliaOJIT {
 
     ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
 
+    std::atomic<size_t> jit_bytes_size{0};
 #ifndef JL_USE_JITLINK
     const std::shared_ptr<RTDyldMemoryManager> MemMgr;
 #else
-    std::atomic<size_t> total_size{0};
     const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
 #endif
     ObjLayerT ObjectLayer;
@@ -624,11 +636,18 @@ Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {
     return *_shared_module;
 }
 void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT;
+
+#if JL_LLVM_VERSION < 170000
 void SetOpaquePointer(LLVMContext &ctx) JL_NOTSAFEPOINT;
+#endif
 
 void optimizeDLSyms(Module &M);
 
 // NewPM
 #include "passes.h"
 
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#endif
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index f40b252180a65..8711c14514145 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -35,6 +35,7 @@
     XX(jl_const_type) \
     XX(jl_core_module) \
     XX(jl_datatype_type) \
+    XX(jl_debuginfo_type) \
     XX(jl_densearray_type) \
     XX(jl_diverror_exception) \
     XX(jl_emptysvec) \
@@ -50,6 +51,7 @@
     XX(jl_floatingpoint_type) \
     XX(jl_function_type) \
     XX(jl_binding_type) \
+    XX(jl_binding_partition_type) \
     XX(jl_globalref_type) \
     XX(jl_gotoifnot_type) \
     XX(jl_enternode_type) \
@@ -137,6 +139,7 @@
     XX(jl_uint8_type) \
     XX(jl_undefref_exception) \
     XX(jl_undefvarerror_type) \
+    XX(jl_fielderror_type) \
     XX(jl_unionall_type) \
     XX(jl_uniontype_type) \
     XX(jl_upsilonnode_type) \
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 48bbec09688b7..7abf2b055bb8c 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -97,7 +97,6 @@
     XX(jl_cstr_to_string) \
     XX(jl_current_exception) \
     XX(jl_debug_method_invalidation) \
-    XX(jl_declare_constant) \
     XX(jl_defines_or_exports_p) \
     XX(jl_deprecate_binding) \
     XX(jl_dlclose) \
@@ -109,6 +108,7 @@
     XX(jl_egal__bits) \
     XX(jl_egal__bitstag) \
     XX(jl_eh_restore_state) \
+    XX(jl_eh_restore_state_noexcept) \
     XX(jl_enter_handler) \
     XX(jl_enter_threaded_region) \
     XX(jl_environ) \
@@ -139,13 +139,8 @@
     XX(jl_gc_add_ptr_finalizer) \
     XX(jl_gc_add_quiescent) \
     XX(jl_gc_allocobj) \
-    XX(jl_gc_alloc_0w) \
-    XX(jl_gc_alloc_1w) \
-    XX(jl_gc_alloc_2w) \
-    XX(jl_gc_alloc_3w) \
     XX(jl_gc_alloc_typed) \
     XX(jl_gc_big_alloc) \
-    XX(jl_gc_big_alloc_instrumented) \
     XX(jl_gc_collect) \
     XX(jl_gc_conservative_gc_support_enabled) \
     XX(jl_gc_counted_calloc) \
@@ -166,15 +161,13 @@
     XX(jl_gc_pool_live_bytes) \
     XX(jl_gc_live_bytes) \
     XX(jl_gc_managed_malloc) \
-    XX(jl_gc_managed_realloc) \
     XX(jl_gc_mark_queue_obj) \
     XX(jl_gc_mark_queue_objarray) \
     XX(jl_gc_max_internal_obj_size) \
     XX(jl_gc_new_weakref) \
     XX(jl_gc_new_weakref_th) \
     XX(jl_gc_num) \
-    XX(jl_gc_pool_alloc) \
-    XX(jl_gc_pool_alloc_instrumented) \
+    XX(jl_gc_small_alloc) \
     XX(jl_gc_queue_multiroot) \
     XX(jl_gc_queue_root) \
     XX(jl_gc_safepoint) \
@@ -191,8 +184,9 @@
     XX(jl_gc_total_hrtime) \
     XX(jl_gdblookup) \
     XX(jl_generating_output) \
-    XX(jl_generic_function_def) \
+    XX(jl_declare_const_gf) \
     XX(jl_gensym) \
+    XX(jl_getaffinity) \
     XX(jl_getallocationgranularity) \
     XX(jl_getnameinfo) \
     XX(jl_getpagesize) \
@@ -366,6 +360,7 @@
     XX(jl_pointerref) \
     XX(jl_pointerset) \
     XX(jl_pop_handler) \
+    XX(jl_pop_handler_noexcept) \
     XX(jl_preload_sysimg_so) \
     XX(jl_prepend_cwd) \
     XX(jl_printf) \
@@ -404,6 +399,7 @@
     XX(jl_safepoint_suspend_thread) \
     XX(jl_safepoint_resume_thread) \
     XX(jl_SC_CLK_TCK) \
+    XX(jl_setaffinity) \
     XX(jl_set_ARGS) \
     XX(jl_set_const) \
     XX(jl_set_errno) \
@@ -424,7 +420,6 @@
     XX(jl_set_zero_subnormals) \
     XX(jl_sigatomic_begin) \
     XX(jl_sigatomic_end) \
-    XX(jl_sig_throw) \
     XX(jl_spawn) \
     XX(jl_specializations_get_linfo) \
     XX(jl_specializations_lookup) \
@@ -457,6 +452,8 @@
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
     XX(jl_threadpoolid) \
+    XX(jl_get_ptls_rng) \
+    XX(jl_set_ptls_rng) \
     XX(jl_throw) \
     XX(jl_throw_out_of_memory_error) \
     XX(jl_too_few_args) \
@@ -467,8 +464,6 @@
     XX(jl_try_substrtof) \
     XX(jl_tty_set_mode) \
     XX(jl_typeassert) \
-    XX(jl_typeinf_lock_begin) \
-    XX(jl_typeinf_lock_end) \
     XX(jl_typeinf_timing_begin) \
     XX(jl_typeinf_timing_end) \
     XX(jl_typename_str) \
@@ -547,7 +542,6 @@
     YY(jl_type_to_llvm) \
     YY(jl_getUnwindInfo) \
     YY(jl_get_libllvm) \
-    YY(jl_build_newpm_pipeline) \
     YY(jl_register_passbuilder_callbacks) \
     YY(LLVMExtraMPMAddCPUFeaturesPass) \
     YY(LLVMExtraMPMAddRemoveNIPass) \
diff --git a/src/jlapi.c b/src/jlapi.c
index 276a792b2cd66..a3621385a437e 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -158,7 +158,7 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         r = NULL;
     }
     return r;
@@ -170,9 +170,9 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
  * @return A pointer to `jl_value_t` representing the current exception.
  *         Returns `NULL` if no exception is currently thrown.
  */
-JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_value_t *jl_current_exception(jl_task_t *ct) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_excstack_t *s = jl_current_task->excstack;
+    jl_excstack_t *s = ct->excstack;
     return s && s->top != 0 ? jl_excstack_exception(s, s->top) : jl_nothing;
 }
 
@@ -300,7 +300,7 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t n
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
@@ -328,7 +328,7 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
@@ -360,7 +360,7 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
@@ -394,7 +394,7 @@ JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
@@ -431,7 +431,7 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
@@ -447,6 +447,7 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t *s = (jl_value_t*)jl_symbol(fld);
         int i = jl_field_index((jl_datatype_t*)jl_typeof(o), (jl_sym_t*)s, 1);
@@ -454,7 +455,7 @@ JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
@@ -852,6 +853,7 @@ JL_DLLEXPORT int jl_set_fenv_rounding(int i)
 
 static int exec_program(char *program)
 {
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_load(jl_main_module, program);
     }
@@ -860,7 +862,7 @@ static int exec_program(char *program)
         //       printing directly to STDERR_FILENO.
         int shown_err = 0;
         jl_printf(JL_STDERR, "error during bootstrap:\n");
-        jl_value_t *exc = jl_current_exception();
+        jl_value_t *exc = jl_current_exception(ct);
         jl_value_t *showf = jl_base_module ? jl_get_function(jl_base_module, "show") : NULL;
         if (showf) {
             jl_value_t *errs = jl_stderr_obj();
@@ -889,8 +891,8 @@ static NOINLINE int true_main(int argc, char *argv[])
     jl_function_t *start_client = jl_base_module ?
         (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
 
+    jl_task_t *ct = jl_current_task;
     if (start_client) {
-        jl_task_t *ct = jl_current_task;
         int ret = 1;
         JL_TRY {
             size_t last_age = ct->world_age;
@@ -902,7 +904,7 @@ static NOINLINE int true_main(int argc, char *argv[])
             ct->world_age = last_age;
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception(), ct);
+            jl_no_exc_handler(jl_current_exception(ct), ct);
         }
         return ret;
     }
@@ -946,7 +948,7 @@ static NOINLINE int true_main(int argc, char *argv[])
                 line = NULL;
             }
             jl_printf((JL_STREAM*)STDERR_FILENO, "\nparser error:\n");
-            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
             jl_print_backtrace(); // written to STDERR_FILENO
         }
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index b46663c560346..2c5f42eda5ce8 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -32,8 +32,6 @@
 ;; this is overwritten when we run in actual julia
 (define (defined-julia-global v) #f)
 (define (nothrow-julia-global v) #f)
-(define (julia-current-file) 'none)
-(define (julia-current-line) 0)
 
 ;; parser entry points
 
@@ -182,7 +180,10 @@
      ;; Abuse scm_to_julia here to convert arguments to warn. This is meant for
      ;; `Expr`s but should be good enough provided we're only passing simple
      ;; numbers, symbols and strings.
-     ((lowering-warning (lambda lst (set! warnings (cons (cons 'warn lst) warnings)))))
+     ((lowering-warning (lambda (level group warn_file warn_line . lst)
+        (let ((line (if (= warn_line 0) line warn_line))
+              (file (if (eq? warn_file 'none) file warn_file)))
+          (set! warnings (cons (list* 'warn level group (symbol (string file line)) file line lst) warnings))))))
      (let ((thunk (if stmt
                       (expand-to-thunk-stmt- expr file line)
                       (expand-to-thunk- expr file line))))
diff --git a/src/jloptions.c b/src/jloptions.c
index 2ba5174e5c730..4cdec2c7b367f 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -100,132 +100,169 @@ JL_DLLEXPORT void jl_init_options(void)
                         0, // strip-ir
                         0, // permalloc_pkgimg
                         0, // heap-size-hint
+                        0, // trace_compile_timing
     };
     jl_options_initialized = 1;
 }
 
 static const char usage[] = "\n    julia [switches] -- [programfile] [args...]\n\n";
 static const char opts[]  =
-    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation):\n\n"
-    " -v, --version              Display version information\n"
-    " -h, --help                 Print this message (--help-hidden for more)\n"
-    " --help-hidden              Uncommon options not shown by `-h`\n\n"
+    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package\n"
+    "precompilation):\n\n"
+    " Option                                        Description\n"
+    " ---------------------------------------------------------------------------------------------------\n"
+    " -v, --version                                 Display version information\n"
+    " -h, --help                                    Print command-line options (this message)\n"
+    " --help-hidden                                 Print uncommon options not shown by `-h`\n\n"
 
     // startup options
-    " --project[={<dir>|@.}]     Set <dir> as the active project/environment\n"
-    " -J, --sysimage <file>      Start up with the given system image file\n"
-    " -H, --home <dir>           Set location of `julia` executable\n"
-    " --startup-file={yes*|no}   Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH`\n"
-    "                            environment variable is unset, load `~/.julia/config/startup.jl`\n"
-    " --handle-signals={yes*|no} Enable or disable Julia's default signal handlers\n"
-    " --sysimage-native-code={yes*|no}\n"
-    "                            Use native code from system image if available\n"
-    " --compiled-modules={yes*|no|existing|strict}\n"
-    "                            Enable or disable incremental precompilation of modules\n"
-    " --pkgimages={yes*|no|existing}\n"
-    "                            Enable or disable usage of native code caching in the form of pkgimages ($)\n\n"
+    " --project[={<dir>|@temp|@.}]                  Set <dir> as the active project/environment.\n"
+    "                                               Or, create a temporary environment with `@temp`\n"
+    "                                               The default @. option will search through parent\n"
+    "                                               directories until a Project.toml or JuliaProject.toml\n"
+    "                                               file is found.\n"
+    " -J, --sysimage <file>                         Start up with the given system image file\n"
+    " -H, --home <dir>                              Set location of `julia` executable\n"
+    " --startup-file={yes*|no}                      Load `JULIA_DEPOT_PATH/config/startup.jl`; \n"
+    "                                               if `JULIA_DEPOT_PATH` environment variable is unset,\n"
+    "                                               load `~/.julia/config/startup.jl`\n"
+    " --handle-signals={yes*|no}                    Enable or disable Julia's default signal handlers\n"
+    " --sysimage-native-code={yes*|no}              Use native code from system image if available\n"
+    " --compiled-modules={yes*|no|existing|strict}  Enable or disable incremental precompilation of\n"
+    "                                               modules. The `existing` option allows use of existing\n"
+    "                                               compiled modules that were previously precompiled,\n"
+    "                                               but disallows creation of new precompile files.\n"
+    "                                               The `strict` option is similar, but will error if no\n"
+    "                                               precompile file is found.\n"
+    " --pkgimages={yes*|no|existing}                Enable or disable usage of native code caching in the\n"
+    "                                               form of pkgimages. The `existing` option allows use\n"
+    "                                               of existing pkgimages but disallows creation of new\n"
+    "                                               ones ($)\n\n"
 
     // actions
-    " -e, --eval <expr>          Evaluate <expr>\n"
-    " -E, --print <expr>         Evaluate <expr> and display the result\n"
-    " -m, --module <Package> [args]\n"
-    "                            Run entry point of `Package` (`@main` function) with `args'.\n"
-    " -L, --load <file>          Load <file> immediately on all processors\n\n"
+    " -e, --eval <expr>                             Evaluate <expr>\n"
+    " -E, --print <expr>                            Evaluate <expr> and display the result\n"
+    " -m, --module <Package> [args]                 Run entry point of `Package` (`@main` function) with\n"
+    "                                               `args'.\n"
+    " -L, --load <file>                             Load <file> immediately on all processors\n\n"
 
     // parallel options
-    " -t, --threads {auto|N[,auto|M]}\n"
-    "                           Enable N[+M] threads; N threads are assigned to the `default`\n"
-    "                           threadpool, and if M is specified, M threads are assigned to the\n"
-    "                           `interactive` threadpool; \"auto\" tries to infer a useful\n"
-    "                           default number of threads to use but the exact behavior might change\n"
-    "                           in the future. Currently sets N to the number of CPUs assigned to\n"
-    "                           this Julia process based on the OS-specific affinity assignment\n"
-    "                           interface if supported (Linux and Windows) or to the number of CPU\n"
-    "                           threads if not supported (MacOS) or if process affinity is not\n"
-    "                           configured, and sets M to 1.\n"
-    " --gcthreads=N[,M]         Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC.\n"
-    "                           N is set to half of the number of compute threads and M is set to 0 if unspecified.\n"
-    " -p, --procs {N|auto}      Integer value N launches N additional local worker processes\n"
-    "                           \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
-    " --machine-file <file>     Run processes on hosts listed in <file>\n\n"
+    " -t, --threads {auto|N[,auto|M]}               Enable N[+M] threads; N threads are assigned to the\n"
+    "                                               `default` threadpool, and if M is specified, M\n"
+    "                                               threads are assigned to the `interactive`\n"
+    "                                               threadpool; `auto` tries to infer a useful\n"
+    "                                               default number of threads to use but the exact\n"
+    "                                               behavior might change in the future. Currently sets\n"
+    "                                               N to the number of CPUs assigned to this Julia\n"
+    "                                               process based on the OS-specific affinity assignment\n"
+    "                                               interface if supported (Linux and Windows) or to the\n"
+    "                                               number of CPU threads if not supported (MacOS) or if\n"
+    "                                               process affinity is not configured, and sets M to 1.\n"
+    " --gcthreads=N[,M]                             Use N threads for the mark phase of GC and M (0 or 1)\n"
+    "                                               threads for the concurrent sweeping phase of GC.\n"
+    "                                               N is set to the number of compute threads and\n"
+    "                                               M is set to 0 if unspecified.\n"
+    " -p, --procs {N|auto}                          Integer value N launches N additional local worker\n"
+    "                                               processes `auto` launches as many workers as the\n"
+    "                                               number of local CPU threads (logical cores).\n"
+    " --machine-file <file>                         Run processes on hosts listed in <file>\n\n"
 
     // interactive options
-    " -i, --interactive          Interactive mode; REPL runs and `isinteractive()` is true\n"
-    " -q, --quiet                Quiet startup: no banner, suppress REPL warnings\n"
-    " --banner={yes|no|short|auto*}\n"
-    "                            Enable or disable startup banner\n"
-    " --color={yes|no|auto*}     Enable or disable color text\n"
-    " --history-file={yes*|no}   Load or save history\n\n"
+    " -i, --interactive                             Interactive mode; REPL runs and\n"
+    "                                               `isinteractive()` is true.\n"
+    " -q, --quiet                                   Quiet startup: no banner, suppress REPL warnings\n"
+    " --banner={yes|no|short|auto*}                 Enable or disable startup banner\n"
+    " --color={yes|no|auto*}                        Enable or disable color text\n"
+    " --history-file={yes*|no}                      Load or save history\n\n"
 
     // error and warning options
-    " --depwarn={yes|no*|error}  Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)\n"
-    " --warn-overwrite={yes|no*} Enable or disable method overwrite warnings\n"
-    " --warn-scope={yes*|no}     Enable or disable warning for ambiguous top-level scope\n\n"
+    " --depwarn={yes|no*|error}                     Enable or disable syntax and method deprecation\n"
+    "                                               warnings (`error` turns warnings into errors)\n"
+    " --warn-overwrite={yes|no*}                    Enable or disable method overwrite warnings\n"
+    " --warn-scope={yes*|no}                        Enable or disable warning for ambiguous top-level\n"
+    "                                               scope\n\n"
 
     // code generation options
-    " -C, --cpu-target <target>  Limit usage of CPU features up to <target>; set to `help` to see the available options\n"
-    " -O, --optimize={0,1,2*,3}  Set the optimization level (level 3 if `-O` is used without a level) ($)\n"
-    " --min-optlevel={0*,1,2,3}  Set a lower bound on the optimization level\n"
+    " -C, --cpu-target <target>                     Limit usage of CPU features up to <target>; set to\n"
+    "                                               `help` to see the available options\n"
+    " -O, --optimize={0|1|2*|3}                     Set the optimization level (level 3 if `-O` is used\n"
+    "                                               without a level) ($)\n"
+    " --min-optlevel={0*|1|2|3}                     Set a lower bound on the optimization level\n"
 #ifdef JL_DEBUG_BUILD
-        " -g, --debug-info=[{0,1,2*}] Set the level of debug info generation in the julia-debug build ($)\n"
+    " -g, --debug-info=[{0|1|2*}]                   Set the level of debug info generation in the\n"
+    "                                               julia-debug build ($)\n"
 #else
-        " -g, --debug-info=[{0,1*,2}] Set the level of debug info generation (level 2 if `-g` is used without a level) ($)\n"
+    " -g, --debug-info=[{0|1*|2}]                   Set the level of debug info generation (level 2 if\n"
+    "                                               `-g` is used without a level) ($)\n"
 #endif
-    " --inline={yes*|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
-    " --check-bounds={yes|no|auto*}\n"
-    "                            Emit bounds checks always, never, or respect @inbounds declarations ($)\n"
+    " --inline={yes*|no}                            Control whether inlining is permitted, including\n"
+    "                                               overriding @inline declarations\n"
+    " --check-bounds={yes|no|auto*}                 Emit bounds checks always, never, or respect\n"
+    "                                               @inbounds declarations ($)\n"
+    " --math-mode={ieee|user*}                      Always follow `ieee` floating point semantics or\n"
+    "                                               respect `@fastmath` declarations\n\n"
 #ifdef USE_POLLY
-    " --polly={yes*|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
+    " --polly={yes*|no}                             Enable or disable the polyhedral optimizer Polly\n"
+    "                                               (overrides @polly declaration)\n"
 #endif
 
     // instrumentation options
-    " --code-coverage[={none*|user|all}]\n"
-    "                            Count executions of source lines (omitting setting is equivalent to `user`)\n"
-    " --code-coverage=@<path>\n"
-    "                            Count executions but only in files that fall under the given file path/directory.\n"
-    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
-    "                            current directory.\n"
+    " --code-coverage[={none*|user|all}]            Count executions of source lines (omitting setting is\n"
+    "                                               equivalent to `user`)\n"
+    " --code-coverage=@<path>                       Count executions but only in files that fall under\n"
+    "                                               the given file path/directory. The `@` prefix is\n"
+    "                                               required to select this option. A `@` with no path\n"
+    "                                               will track the current directory.\n"
 
-    " --code-coverage=tracefile.info\n"
-    "                            Append coverage information to the LCOV tracefile (filename supports format tokens)\n"
+    " --code-coverage=tracefile.info                Append coverage information to the LCOV tracefile\n"
+    "                                               (filename supports format tokens)\n"
 // TODO: These TOKENS are defined in `runtime_ccall.cpp`. A more verbose `--help` should include that list here.
-    " --track-allocation[={none*|user|all}]\n"
-    "                            Count bytes allocated by each source line (omitting setting is equivalent to `user`)\n"
-    " --track-allocation=@<path>\n"
-    "                            Count bytes but only in files that fall under the given file path/directory.\n"
-    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
-    "                            current directory.\n"
-    " --bug-report=KIND          Launch a bug report session. It can be used to start a REPL, run a script, or evaluate\n"
-    "                            expressions. It first tries to use BugReporting.jl installed in current environment and\n"
-    "                            fallbacks to the latest compatible BugReporting.jl if not. For more information, see\n"
-    "                            --bug-report=help.\n\n"
-    " --heap-size-hint=<size>    Forces garbage collection if memory usage is higher than the given value.\n"
-    "                            The value may be specified as a number of bytes, optionally in units of\n"
-    "                            KB, MB, GB, or TB, or as a percentage of physical memory with %.\n\n"
+    " --track-allocation[={none*|user|all}]         Count bytes allocated by each source line (omitting\n"
+    "                                               setting is equivalent to `user`)\n"
+    " --track-allocation=@<path>                    Count bytes but only in files that fall under the\n"
+    "                                               given file path/directory. The `@` prefix is required\n"
+    "                                               to select this option. A `@` with no path will track\n"
+    "                                               the current directory.\n"
+    " --bug-report=KIND                             Launch a bug report session. It can be used to start\n"
+    "                                               a REPL, run a script, or evaluate expressions. It\n"
+    "                                               first tries to use BugReporting.jl installed in\n"
+    "                                               current environment and fallbacks to the latest\n"
+    "                                               compatible BugReporting.jl if not. For more\n"
+    "                                               information, see --bug-report=help.\n\n"
+    " --heap-size-hint=<size>                       Forces garbage collection if memory usage is higher\n"
+    "                                               than the given value. The value may be specified as a\n"
+    "                                               number of bytes, optionally in units of KB, MB, GB,\n"
+    "                                               or TB, or as a percentage of physical memory with %.\n\n"
 ;
 
 static const char opts_hidden[]  =
     "Switches (a '*' marks the default value, if applicable):\n\n"
+    " Option                                        Description\n"
+    " ---------------------------------------------------------------------------------------------------\n"
     // code generation options
-    " --compile={yes*|no|all|min}\n"
-    "                          Enable or disable JIT compiler, or request exhaustive or minimal compilation\n\n"
+    " --compile={yes*|no|all|min}                   Enable or disable JIT compiler, or request exhaustive\n"
+    "                                               or minimal compilation\n\n"
 
     // compiler output options
-    " --output-o <name>        Generate an object file (including system image data)\n"
-    " --output-ji <name>       Generate a system image data file (.ji)\n"
-    " --strip-metadata         Remove docstrings and source location info from system image\n"
-    " --strip-ir               Remove IR (intermediate representation) of compiled functions\n\n"
+    " --output-o <name>                             Generate an object file (including system image data)\n"
+    " --output-ji <name>                            Generate a system image data file (.ji)\n"
+    " --strip-metadata                              Remove docstrings and source location info from\n"
+    "                                               system image\n"
+    " --strip-ir                                    Remove IR (intermediate representation) of compiled\n"
+    "                                               functions\n\n"
 
     // compiler debugging (see the devdocs for tips on using these options)
-    " --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
-    " --output-bc <name>       Generate LLVM bitcode (.bc)\n"
-    " --output-asm <name>      Generate an assembly file (.s)\n"
-    " --output-incremental={yes|no*}\n"
-    "                          Generate an incremental output file (rather than complete)\n"
-    " --trace-compile={stderr,name}\n"
-    "                          Print precompile statements for methods compiled during execution or save to a path\n"
-    " --image-codegen          Force generate code in imaging mode\n"
-    " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
+    " --output-unopt-bc <name>                      Generate unoptimized LLVM bitcode (.bc)\n"
+    " --output-bc <name>                            Generate LLVM bitcode (.bc)\n"
+    " --output-asm <name>                           Generate an assembly file (.s)\n"
+    " --output-incremental={yes|no*}                Generate an incremental output file (rather than\n"
+    "                                               complete)\n"
+    " --trace-compile={stderr|name}                 Print precompile statements for methods compiled\n"
+    "                                               during execution or save to a path\n"
+    " --trace-compile-timing                        If --trace-compile is enabled show how long each took to\n"
+    "                                               compile in ms\n"
+    " --image-codegen                               Force generate code in imaging mode\n"
+    " --permalloc-pkgimg={yes|no*}                  Copy the data section of package images into memory\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -246,6 +283,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_inline,
            opt_polly,
            opt_trace_compile,
+           opt_trace_compile_timing,
            opt_math_mode,
            opt_worker,
            opt_bind_to,
@@ -322,6 +360,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "inline",          required_argument, 0, opt_inline },
         { "polly",           required_argument, 0, opt_polly },
         { "trace-compile",   required_argument, 0, opt_trace_compile },
+        { "trace-compile-timing",  no_argument, 0, opt_trace_compile_timing },
         { "math-mode",       required_argument, 0, opt_math_mode },
         { "handle-signals",  required_argument, 0, opt_handle_signals },
         // hidden command line options
@@ -760,7 +799,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_errorf("julia: invalid argument to --inline (%s)", optarg);
             }
             break;
-       case opt_polly:
+        case opt_polly:
             if (!strcmp(optarg,"yes"))
                 jl_options.polly = JL_OPTIONS_POLLY_ON;
             else if (!strcmp(optarg,"no"))
@@ -769,11 +808,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_errorf("julia: invalid argument to --polly (%s)", optarg);
             }
             break;
-         case opt_trace_compile:
+        case opt_trace_compile:
             jl_options.trace_compile = strdup(optarg);
             if (!jl_options.trace_compile)
                 jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
             break;
+        case opt_trace_compile_timing:
+            jl_options.trace_compile_timing = 1;
+            break;
         case opt_math_mode:
             if (!strcmp(optarg,"ieee"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF;
@@ -782,7 +824,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else if (!strcmp(optarg,"user"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_DEFAULT;
             else
-                jl_errorf("julia: invalid argument to --math-mode (%s)", optarg);
+                jl_errorf("julia: invalid argument to --math-mode={ieee|user} (%s)", optarg);
             break;
         case opt_worker:
             jl_options.worker = 1;
@@ -859,7 +901,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 if (isnan(sz) || sz < 0) {
                     jl_errorf("julia: invalid argument to --heap-size-hint (%s)", optarg);
                 }
-                jl_options.heap_size_hint = sz < UINT64_MAX ? (uint64_t)sz : UINT64_MAX;
+                const long double limit = ldexpl(1.0, 64); // UINT64_MAX + 1
+                jl_options.heap_size_hint = sz < limit ? (uint64_t)sz : UINT64_MAX;
             }
             if (jl_options.heap_size_hint == 0)
                 jl_errorf("julia: invalid memory size specified in --heap-size-hint");
diff --git a/src/jloptions.h b/src/jloptions.h
index 8649c405112d7..aac2a64a373a8 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -61,6 +61,7 @@ typedef struct {
     int8_t strip_ir;
     int8_t permalloc_pkgimg;
     uint64_t heap_size_hint;
+    int8_t trace_compile_timing;
 } jl_options_t;
 
 #endif
diff --git a/src/jltypes.c b/src/jltypes.c
index 896e8f7b676c8..fbc8e9f7f7f16 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -594,6 +594,52 @@ static int simple_subtype(jl_value_t *a, jl_value_t *b, int hasfree, int isUnion
     return 0;
 }
 
+// merge Union{Tuple{}, Tuple{T}, Tuple{T, T, Vararg{T}}} into Tuple{Vararg{T}}
+// assumes temp is already sorted by number of type parameters
+STATIC_INLINE void merge_vararg_unions(jl_value_t **temp, size_t nt)
+{
+    for (size_t i = nt-1; i > 0; i--) {
+        // match types of form Tuple{T, ..., Vararg{T}}
+        jl_value_t *tt = temp[i];
+        if (!(tt && jl_is_tuple_type(tt))) continue;
+        size_t nfields = jl_nparams(tt);
+        if (nfields <= 1) continue;
+        jl_value_t *va = jl_tparam(tt, nfields-1);
+        if (jl_vararg_kind(va) != JL_VARARG_UNBOUND) continue;
+        jl_value_t *t = jl_unwrap_vararg(va);
+        for (size_t j = 0; j < nfields-1; j++)
+            if (!jl_egal(jl_tparam(tt, j), t)) goto outer_loop;
+
+        // look for Tuple{T, T, ...} then Tuple{T, ...}, etc
+        size_t min_elements = nfields-1;
+        for (long j = i-1; j >= 0; j--) {
+            jl_value_t *ttj = temp[j];
+            if (!jl_is_tuple_type(ttj)) break;
+            size_t nfieldsj = jl_nparams(ttj);
+            if (nfieldsj >= min_elements) continue;
+            if (nfieldsj != min_elements-1) break;
+            for (size_t k = 0; k < nfieldsj; k++)
+                if (!jl_egal(jl_tparam(ttj, k), t)) goto inner_loop;
+
+            temp[j] = NULL;
+            min_elements--;
+ inner_loop:
+            continue;
+        }
+
+        if (min_elements == nfields-1) continue;
+        jl_value_t** params;
+        JL_GC_PUSHARGS(params, min_elements+1);
+        for (size_t j = 0; j < min_elements; j++)
+            params[j] = t;
+        params[min_elements] = va;
+        temp[i] = jl_apply_type((jl_value_t*)jl_tuple_type, params, min_elements+1);
+        JL_GC_POP();
+ outer_loop:
+        continue;
+    }
+}
+
 JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
 {
     if (n == 0)
@@ -625,6 +671,7 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     jl_value_t **ptu = &temp[nt];
     *ptu = jl_bottom_type;
     int k;
@@ -730,6 +777,7 @@ jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     temp[nt] = jl_bottom_type;
     size_t k;
     for (k = nt; k-- > 0; ) {
@@ -876,14 +924,14 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
         if (T_has_tv) {
             jl_value_t *wrapped = jl_type_unionall(v, vm->T);
             JL_GC_PUSH1(&wrapped);
-            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1);
+            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1, 0);
             JL_GC_POP();
             return wrapped;
         }
         else {
             assert(N_has_tv);
             assert(vm->N == (jl_value_t*)v);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1, 0);
         }
     }
     if (!jl_is_type(body) && !jl_is_typevar(body))
@@ -1346,7 +1394,7 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env, int check);
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check, int nothrow);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
@@ -1354,7 +1402,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
                                      jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1, 0);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
@@ -1478,13 +1526,13 @@ JL_EXTENSION struct _jl_typestack_t {
     struct _jl_typestack_t *prev;
 };
 
-static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check);
+static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow);
 static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable);
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p)
 {
     jl_typeenv_t env = { u->var, p, NULL };
-    return inst_type_w_(u->body, &env, NULL, 1);
+    return inst_type_w_(u->body, &env, NULL, 1, 0);
 }
 
 jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
@@ -1493,18 +1541,27 @@ jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
     jl_value_t *t = NULL;
     JL_GC_PUSH2(&v, &t);
     jl_typeenv_t env = { u->var, (jl_value_t *)v, NULL };
-    t = inst_type_w_(u->body, &env, NULL, 0);
+    t = inst_type_w_(u->body, &env, NULL, 0, 0);
     t = jl_new_struct(jl_unionall_type, v, t);
     JL_GC_POP();
     return (jl_unionall_t*)t;
 }
 
+jl_value_t *jl_substitute_var_nothrow(jl_value_t *t, jl_tvar_t *var, jl_value_t *val, int nothrow)
+{
+    if (val == (jl_value_t*)var)
+        return t;
+    nothrow = jl_is_typevar(val) ? 0 : nothrow;
+    jl_typeenv_t env = { var, val, NULL };
+    return inst_type_w_(t, &env, NULL, 1, nothrow);
+}
+
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val)
 {
     if (val == (jl_value_t*)var)
         return t;
     jl_typeenv_t env = { var, val, NULL };
-    return inst_type_w_(t, &env, NULL, 1);
+    return inst_type_w_(t, &env, NULL, 1, 0);
 }
 
 jl_value_t *jl_unwrap_unionall(jl_value_t *v)
@@ -1550,6 +1607,118 @@ jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u)
     return t;
 }
 
+// Create a copy of type expression t where any occurrence of data type x is replaced by y.
+// If x does not occur in t, return t without any copy.
+// For example, jl_substitute_datatype(Foo{Bar}, Foo{T}, Qux{S}) is Qux{Bar}, with T and S
+// free type variables.
+// To substitute type variables, use jl_substitute_var instead.
+jl_value_t *jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y)
+{
+    if jl_is_datatype(t) {
+        jl_datatype_t *typ = (jl_datatype_t*)t;
+        // For datatypes call itself recursively on the parameters to form new parameters.
+        // Then, if typename(t) == typename(x), rewrap the wrapper of y around the new
+        // parameters. Otherwise, do the same around the wrapper of t.
+        // This ensures that the types and supertype are properly set.
+        // Start by check whether there is a parameter that needs replacing.
+        long i_firstnewparam = -1;
+        size_t nparams = jl_svec_len(typ->parameters);
+        jl_value_t *firstnewparam = NULL;
+        JL_GC_PUSH1(&firstnewparam);
+        for (size_t i = 0; i < nparams; i++) {
+            jl_value_t *param = NULL;
+            JL_GC_PUSH1(&param);
+            param = jl_svecref(typ->parameters, i);
+            firstnewparam = jl_substitute_datatype(param, x, y);
+            if (param != firstnewparam) {
+                i_firstnewparam = i;
+                JL_GC_POP();
+                break;
+            }
+            JL_GC_POP();
+        }
+        // If one of the parameters needs to be updated, or if the type name is that to
+        // substitute, create a new datataype
+        if (i_firstnewparam != -1 || typ->name == x->name) {
+            jl_datatype_t *uw = typ->name == x->name ? y : typ; // substitution occurs here
+            jl_value_t *wrapper = uw->name->wrapper;
+            jl_datatype_t *w = (jl_datatype_t*)jl_unwrap_unionall(wrapper);
+            jl_svec_t *sv = jl_alloc_svec_uninit(jl_svec_len(uw->parameters));
+            JL_GC_PUSH1(&sv);
+            jl_value_t **vals = jl_svec_data(sv);
+            // no JL_GC_PUSHARGS(vals, ...) since GC is already aware of sv
+            for (long i = 0; i < i_firstnewparam; i++) { // copy the identical parameters
+                vals[i] = jl_svecref(typ->parameters, i); // value
+            }
+            if (i_firstnewparam != -1) { // insert the first non-identical parameter
+                vals[i_firstnewparam] = firstnewparam;
+            }
+            for (size_t i = i_firstnewparam+1; i < nparams; i++) { // insert the remaining parameters
+                vals[i] = jl_substitute_datatype(jl_svecref(typ->parameters, i), x, y);
+            }
+            if (jl_is_tuple_type(wrapper)) {
+                // special case for tuples, since the wrapper (Tuple) does not have as
+                // many parameters as t (it only has a Vararg instead).
+                t = jl_apply_tuple_type(sv, 0);
+            } else {
+                t = jl_instantiate_type_in_env((jl_value_t*)w, (jl_unionall_t*)wrapper, vals);
+            }
+            JL_GC_POP();
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_unionall(t) { // recursively call itself on body and var bounds
+        jl_unionall_t* ut = (jl_unionall_t*)t;
+        jl_value_t *lb = NULL;
+        jl_value_t *ub = NULL;
+        jl_value_t *body = NULL;
+        JL_GC_PUSH3(&lb, &ub, &body);
+        lb = jl_substitute_datatype(ut->var->lb, x, y);
+        ub = jl_substitute_datatype(ut->var->ub, x, y);
+        body = jl_substitute_datatype(ut->body, x, y);
+        if (lb != ut->var->lb || ub != ut->var->ub) {
+            jl_tvar_t *newtvar = jl_new_typevar(ut->var->name, lb, ub);
+            JL_GC_PUSH1(&newtvar);
+            body = jl_substitute_var(body, ut->var, (jl_value_t*)newtvar);
+            t = jl_new_struct(jl_unionall_type, newtvar, body);
+            JL_GC_POP();
+        }
+        else if (body != ut->body) {
+            t = jl_new_struct(jl_unionall_type, ut->var, body);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_uniontype(t) { // recursively call itself on a and b
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        jl_value_t *a = NULL;
+        jl_value_t *b = NULL;
+        JL_GC_PUSH2(&a, &b);
+        a = jl_substitute_datatype(u->a, x, y);
+        b = jl_substitute_datatype(u->b, x, y);
+        if (a != u->a || b != u->b) {
+            t = jl_new_struct(jl_uniontype_type, a, b);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_vararg(t) { // recursively call itself on T
+        jl_vararg_t *vt = (jl_vararg_t*)t;
+        if (vt->T) { // vt->T could be NULL
+            jl_value_t *rT = NULL;
+            JL_GC_PUSH1(&rT);
+            rT = jl_substitute_datatype(vt->T, x, y);
+            if (rT != vt->T) {
+                jl_task_t *ct = jl_current_task;
+                t = jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+                jl_set_typetagof((jl_vararg_t *)t, jl_vararg_tag, 0);
+                ((jl_vararg_t *)t)->T = rT;
+                ((jl_vararg_t *)t)->N = vt->N;
+            }
+            JL_GC_POP();
+        }
+    }
+    return t;
+}
+
 static jl_value_t *lookup_type_stack(jl_typestack_t *stack, jl_datatype_t *tt, size_t ntp,
                                      jl_value_t **iparams)
 {
@@ -1716,7 +1885,7 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
 }
 
-static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np)
+static int check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np, int nothrow)
 {
     jl_value_t *wrapper = tn->wrapper;
     jl_value_t **bounds;
@@ -1734,6 +1903,10 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
         assert(jl_is_unionall(wrapper));
         jl_tvar_t *tv = ((jl_unionall_t*)wrapper)->var;
         if (!within_typevar(params[i], bounds[2*i], bounds[2*i+1])) {
+            if (nothrow) {
+                JL_GC_POP();
+                return 1;
+            }
             if (tv->lb != bounds[2*i] || tv->ub != bounds[2*i+1])
                 // pass a new version of `tv` containing the instantiated bounds
                 tv = jl_new_typevar(tv->name, bounds[2*i], bounds[2*i+1]);
@@ -1743,12 +1916,26 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
         int j;
         for (j = 2*i + 2; j < 2*np; j++) {
             jl_value_t *bj = bounds[j];
-            if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type)
-                bounds[j] = jl_substitute_var(bj, tv, params[i]);
+            if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type) {
+                int isub = j & 1;
+                // use different nothrow level for lb and ub substitution.
+                // TODO: This assuming the top instantiation could only start with
+                // `nothrow == 2` or `nothrow == 0`. If `nothrow` is initially set to 1
+                // then we might miss some inner error, perhaps the normal path should
+                // also follow this rule？
+                jl_value_t *nb = jl_substitute_var_nothrow(bj, tv, params[i], nothrow ? (isub ? 2 : 1) : 0 );
+                if (nb == NULL) {
+                    assert(nothrow);
+                    JL_GC_POP();
+                    return 1;
+                }
+                bounds[j] = nb;
+            }
         }
         wrapper = ((jl_unionall_t*)wrapper)->body;
     }
     JL_GC_POP();
+    return 0;
 }
 
 static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
@@ -1846,7 +2033,7 @@ static jl_value_t *normalize_unionalls(jl_value_t *t)
     else if (jl_is_unionall(t)) {
         jl_unionall_t *u = (jl_unionall_t*)t;
         jl_value_t *body = normalize_unionalls(u->body);
-        JL_GC_PUSH1(&body);
+        JL_GC_PUSH2(&body, &t);
         if (body != u->body) {
             t = jl_new_struct(jl_unionall_type, u->var, body);
             u = (jl_unionall_t*)t;
@@ -1868,20 +2055,25 @@ static jl_value_t *normalize_unionalls(jl_value_t *t)
 }
 
 // used to expand an NTuple to a flat representation
-static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check)
+static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check, int nothrow)
 {
     jl_value_t *p = NULL;
     JL_GC_PUSH1(&p);
     if (check) {
         // Since we are skipping making the Vararg and skipping checks later,
         // we inline the checks from jl_wrap_vararg here now
-        if (!jl_valid_type_param(t))
+        if (!jl_valid_type_param(t)) {
+            if (nothrow) {
+                JL_GC_POP();
+                return NULL;
+            }
             jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+        }
         // jl_wrap_vararg sometimes simplifies the type, so we only do this 1 time, instead of for each n later
         t = normalize_unionalls(t);
         p = t;
         jl_value_t *tw = extract_wrapper(t);
-        if (tw && t != tw && jl_types_equal(t, tw))
+        if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
             t = tw;
         p = t;
         check = 0; // remember that checks are already done now
@@ -1895,7 +2087,7 @@ static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check)
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env, int check)
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check, int nothrow)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
@@ -1926,8 +2118,11 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                     break;
                 }
             }
-            if (pi == jl_bottom_type)
+            if (pi == jl_bottom_type) {
+                if (nothrow)
+                    return NULL;
                 jl_errorf("Tuple field type cannot be Union{}");
+            }
             if (cacheable && !jl_is_concrete_type(pi))
                 cacheable = 0;
         }
@@ -1962,7 +2157,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             // normalize types equal to wrappers (prepare for Typeofwrapper)
             jl_value_t *tw = extract_wrapper(pi);
             if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) &&
-                    jl_types_equal(pi, tw)) {
+                    !jl_has_free_typevars(pi) && jl_types_equal(pi, tw)) {
                 iparams[i] = tw;
                 if (p) jl_gc_wb(p, tw);
             }
@@ -1987,7 +2182,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     // for whether this is even valid
     if (check && !istuple) {
         assert(ntp > 0);
-        check_datatype_parameters(tn, iparams, ntp);
+        if (check_datatype_parameters(tn, iparams, ntp, nothrow))
+            return NULL;
     }
     else if (ntp == 0 && jl_emptytuple_type != NULL) {
         // empty tuple type case
@@ -2014,7 +2210,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (nt == 0 || !jl_has_free_typevars(va0)) {
                 if (ntp == 1) {
                     JL_GC_POP();
-                    return jl_tupletype_fill(nt, va0, 0);
+                    return jl_tupletype_fill(nt, va0, 0, 0);
                 }
                 size_t i, l;
                 p = jl_alloc_svec(ntp - 1 + nt);
@@ -2023,7 +2219,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 l = ntp - 1 + nt;
                 for (; i < l; i++)
                     jl_svecset(p, i, va0);
-                jl_value_t *ndt = jl_apply_tuple_type(p, check);
+                size_t np = jl_svec_len(p);
+                jl_value_t **pp = jl_svec_data(p);
+                jl_value_t *ndt = inst_datatype_inner(jl_anytuple_type, p, pp, np, NULL, NULL, check, nothrow);
                 JL_GC_POP();
                 return ndt;
             }
@@ -2122,6 +2320,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     ndt->parameters = p;
     jl_gc_wb(ndt, ndt->parameters);
     ndt->types = NULL; // to be filled in below
+    int invalid = 0;
     if (istuple) {
         ndt->types = p; // TODO: this may need to filter out certain types
     }
@@ -2129,38 +2328,70 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         jl_value_t *names_tup = jl_svecref(p, 0);
         jl_value_t *values_tt = jl_svecref(p, 1);
         if (!jl_has_free_typevars(names_tup) && !jl_has_free_typevars(values_tt)) {
-            if (!jl_is_tuple(names_tup))
-                jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
+            if (!jl_is_tuple(names_tup)) {
+                if (!nothrow)
+                    jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
+                invalid = 1;
+            }
             size_t nf = jl_nfields(names_tup);
             for (size_t i = 0; i < nf; i++) {
                 jl_value_t *ni = jl_fieldref(names_tup, i);
-                if (!jl_is_symbol(ni))
-                    jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
+                if (!jl_is_symbol(ni)) {
+                    if (!nothrow)
+                        jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
+                    invalid = 1; break;
+                }
                 for (size_t j = 0; j < i; j++) {
-                    if (ni == jl_fieldref_noalloc(names_tup, j))
-                        jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
+                    if (ni == jl_fieldref_noalloc(names_tup, j)) {
+                        if (!nothrow)
+                            jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
+                        invalid = 1; break;
+                    }
+                }
+                if (invalid) break;
+            }
+            if (values_tt == jl_bottom_type && nf > 0) {
+                ndt->types = jl_svec_fill(nf, jl_bottom_type);
+            }
+            else {
+                if (!jl_is_datatype(values_tt)) {
+                    // should have been checked within `check_datatype_parameters`.
+                    jl_error("NamedTuple field type must be a tuple datatype");
+                }
+                if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf) {
+                    if (!nothrow)
+                        jl_error("NamedTuple names and field types must have matching lengths");
+                    invalid = 1;
                 }
+                ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             }
-            if (!jl_is_datatype(values_tt))
-                jl_error("NamedTuple field type must be a tuple type");
-            if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf)
-                jl_error("NamedTuple names and field types must have matching lengths");
-            ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             jl_gc_wb(ndt, ndt->types);
         }
         else {
-            ndt->types = jl_emptysvec; // XXX: this is essentially always false
+            ndt->types = jl_emptysvec; // XXX: this is essentially always incorrect
         }
     }
     else if (tn == jl_genericmemoryref_typename || tn == jl_genericmemory_typename) {
         jl_value_t *isatomic = jl_svecref(p, 0);
-        if (!jl_is_typevar(isatomic) && !jl_is_symbol(isatomic))
-            jl_type_error_rt("GenericMemory", "isatomic parameter", (jl_value_t*)jl_symbol_type, isatomic);
+        if (!jl_is_typevar(isatomic) && !jl_is_symbol(isatomic)) {
+            if (!nothrow)
+                jl_type_error_rt("GenericMemory", "isatomic parameter", (jl_value_t*)jl_symbol_type, isatomic);
+            invalid = 1;
+        }
         jl_value_t *addrspace = jl_svecref(p, 2);
-        if (!jl_is_typevar(addrspace) && !jl_is_addrspace(addrspace))
-            jl_type_error_rt("GenericMemory", "addrspace parameter", (jl_value_t*)jl_addrspace_type, addrspace);
+        if (!jl_is_typevar(addrspace) && !jl_is_addrspace(addrspace)) {
+            if (!nothrow)
+                jl_type_error_rt("GenericMemory", "addrspace parameter", (jl_value_t*)jl_addrspace_type, addrspace);
+            invalid = 1;
+        }
     }
 
+    if (nothrow && invalid) {
+        if (cacheable)
+            JL_UNLOCK(&typecache_lock);
+        JL_GC_POP();
+        return NULL;
+    }
     jl_datatype_t *primarydt = ((jl_datatype_t*)jl_unwrap_unionall(tn->wrapper));
     jl_precompute_memoized_dt(ndt, cacheable);
     if (primarydt->layout)
@@ -2170,7 +2401,14 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         ndt->super = jl_any_type;
     }
     else if (dt->super) {
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, check);
+        jl_value_t *super = inst_type_w_((jl_value_t*)dt->super, env, stack, check, nothrow);
+        if (nothrow && super == NULL) {
+            if (cacheable)
+                JL_UNLOCK(&typecache_lock);
+            JL_GC_POP();
+            return NULL;
+        }
+        ndt->super = (jl_datatype_t *)super;
         jl_gc_wb(ndt, ndt->super);
     }
     jl_svec_t *ftypes = dt->types;
@@ -2218,7 +2456,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
 
 static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params, int check)
 {
-    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, check);
+    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, check, 0);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check)
@@ -2257,7 +2495,7 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1, 0);
         JL_GC_POP();
     }
     return tt;
@@ -2273,7 +2511,7 @@ static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *s
     for (i = 0; i < lp; i++) {
         pi = jl_svecref(p, i);
         JL_TRY {
-            pi = inst_type_w_(pi, env, stack, 1);
+            pi = inst_type_w_(pi, env, stack, 1, 0);
             if (!jl_is_type(pi) && !jl_is_typevar(pi)) {
                 pi = jl_bottom_type;
             }
@@ -2288,7 +2526,7 @@ static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *s
     return np;
 }
 
-static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check)
+static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow)
 {
     jl_datatype_t *tt = (jl_datatype_t*)t;
     jl_svec_t *tp = tt->parameters;
@@ -2315,9 +2553,11 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         if (T != NULL && N != NULL && jl_is_long(N)) { // TODO: && !jl_has_free_typevars(T) to match inst_datatype_inner, or even && jl_is_concrete_type(T)
             // Since this is skipping jl_wrap_vararg, we inline the checks from it here
             ssize_t nt = jl_unbox_long(N);
-            if (nt < 0)
-                jl_errorf("Vararg length is negative: %zd", nt);
-            return jl_tupletype_fill(nt, T, check);
+            if (nt >= 0)
+                return jl_tupletype_fill(nt, T, check, nothrow);
+            if (nothrow)
+                return NULL;
+            jl_errorf("Vararg length is negative: %zd", nt);
         }
     }
     jl_value_t **iparams;
@@ -2329,23 +2569,36 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams[0] = (jl_value_t*)ip_heap;
         iparams = jl_svec_data(ip_heap);
     }
-    int bound = 0;
-    int i;
+    int i, bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check, nothrow);
+        if (pi == NULL) {
+            assert(nothrow);
+            if (nothrow == 1 || (i == ntp-1 && jl_is_vararg(elt))) {
+                t = NULL;
+                break;
+            }
+            else {
+                pi = jl_bottom_type;
+            }
+        }
         iparams[i] = pi;
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
     }
-    if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check);
+    if (t != NULL && bound)
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check, nothrow);
     JL_GC_POP();
     return t;
 }
 
-static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check)
+// `nothrow` means that when type checking fails, the type instantiation should
+// return `NULL` instead of immediately throwing an error. If `nothrow` == 2 then
+// we further assume that the imprecise instantiation for non invariant parameters
+// is acceptable, and inner error (`NULL`) would be ignored.
+static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow)
 {
     size_t i;
     if (jl_is_typevar(t)) {
@@ -2365,42 +2618,73 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         jl_value_t *var = NULL;
         jl_value_t *newbody = NULL;
         JL_GC_PUSH3(&lb, &var, &newbody);
-        lb = inst_type_w_(ua->var->lb, env, stack, check);
-        var = inst_type_w_(ua->var->ub, env, stack, check);
-        if (lb != ua->var->lb || var != ua->var->ub) {
-            var = (jl_value_t*)jl_new_typevar(ua->var->name, lb, var);
-        }
-        else {
-            var = (jl_value_t*)ua->var;
-        }
-        jl_typeenv_t newenv = { ua->var, var, env };
-        newbody = inst_type_w_(ua->body, &newenv, stack, check);
-        if (newbody == (jl_value_t*)jl_emptytuple_type) {
-            // NTuple{0} => Tuple{} can make a typevar disappear
-            t = (jl_value_t*)jl_emptytuple_type;
+        // set nothrow <= 1 to ensure lb's accuracy.
+        lb = inst_type_w_(ua->var->lb, env, stack, check, nothrow ? 1 : 0);
+        if (lb == NULL) {
+            assert(nothrow);
+            t = NULL;
+        }
+        if (t != NULL) {
+            var = inst_type_w_(ua->var->ub, env, stack, check, nothrow);
+            if (var == NULL) {
+                if (lb == jl_bottom_type)
+                    var = jl_bottom_type;
+                else
+                    t = NULL;
+            }
+            else if (lb != ua->var->lb || var != ua->var->ub) {
+                var = (jl_value_t*)jl_new_typevar(ua->var->name, lb, var);
+            }
+            else {
+                var = (jl_value_t*)ua->var;
+            }
         }
-        else if (newbody != ua->body || var != (jl_value_t*)ua->var) {
-            // if t's parameters are not bound in the environment, return it uncopied (#9378)
-            t = jl_new_struct(jl_unionall_type, var, newbody);
+        if (t != NULL) {
+            jl_typeenv_t newenv = { ua->var, var, env };
+            newbody = inst_type_w_(ua->body, &newenv, stack, check, nothrow);
+            if (newbody == NULL) {
+                t = NULL;
+            }
+            else if (!jl_has_typevar(newbody, (jl_tvar_t *)var)) {
+                // inner instantiation might make a typevar disappear, e.g.
+                // NTuple{0,T} => Tuple{}
+                t = newbody;
+            }
+            else if (newbody != ua->body || var != (jl_value_t*)ua->var) {
+                // if t's parameters are not bound in the environment, return it uncopied (#9378)
+                t = jl_new_struct(jl_unionall_type, var, newbody);
+            }
         }
         JL_GC_POP();
         return t;
     }
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
-        jl_value_t *a = inst_type_w_(u->a, env, stack, check);
+        jl_value_t *a = inst_type_w_(u->a, env, stack, check, nothrow);
         jl_value_t *b = NULL;
         JL_GC_PUSH2(&a, &b);
-        b = inst_type_w_(u->b, env, stack, check);
+        b = inst_type_w_(u->b, env, stack, check, nothrow);
+        if (nothrow) {
+            // ensure jl_type_union nothrow.
+            if (a && !(jl_is_typevar(a) || jl_is_type(a)))
+                a = NULL;
+            if (b && !(jl_is_typevar(b) || jl_is_type(b)))
+                b = NULL;
+        }
         if (a != u->a || b != u->b) {
-            if (check) {
-                jl_value_t *uargs[2] = {a, b};
-                t = jl_type_union(uargs, 2);
-            }
-            else {
+            if (!check) {
                 // fast path for `jl_rename_unionall`.
                 t = jl_new_struct(jl_uniontype_type, a, b);
             }
+            else if (a == NULL || b == NULL) {
+                assert(nothrow);
+                t = nothrow == 1 ? NULL : a == NULL ? b : a;
+            }
+            else {
+                assert(a != NULL && b != NULL);
+                jl_value_t *uargs[2] = {a, b};
+                t = jl_type_union(uargs, 2);
+            }
         }
         JL_GC_POP();
         return t;
@@ -2411,13 +2695,22 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         jl_value_t *N = NULL;
         JL_GC_PUSH2(&T, &N);
         if (v->T) {
-            T = inst_type_w_(v->T, env, stack, check);
-            if (v->N)
-                N = inst_type_w_(v->N, env, stack, check);
-        }
-        if (T != v->T || N != v->N) {
-            t = (jl_value_t*)jl_wrap_vararg(T, N, check);
+            T = inst_type_w_(v->T, env, stack, check, nothrow);
+            if (T == NULL) {
+                if (nothrow == 2)
+                    T = jl_bottom_type;
+                else
+                    t = NULL;
+            }
+            if (t && v->N) {
+                // set nothrow <= 1 to ensure invariant parameter's accuracy.
+                N = inst_type_w_(v->N, env, stack, check, nothrow ? 1 : 0);
+                if (N == NULL)
+                    t = NULL;
+            }
         }
+        if (t && (T != v->T || N != v->N))
+            t = (jl_value_t*)jl_wrap_vararg(T, N, check, nothrow);
         JL_GC_POP();
         return t;
     }
@@ -2429,20 +2722,26 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         return t;
     jl_typename_t *tn = tt->name;
     if (tn == jl_tuple_typename)
-        return inst_tuple_w_(t, env, stack, check);
+        return inst_tuple_w_(t, env, stack, check, nothrow);
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
     int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
+        // set nothrow <= 1 to ensure invariant parameter's accuracy.
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check, nothrow ? 1 : 0);
+        if (pi == NULL) {
+            assert(nothrow);
+            t = NULL;
+            break;
+        }
         iparams[i] = pi;
         bound |= (pi != elt);
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
-    if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check);
+    if (t != NULL && bound)
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check, nothrow);
     JL_GC_POP();
     return t;
 }
@@ -2453,7 +2752,7 @@ static jl_value_t *instantiate_with(jl_value_t *t, jl_value_t **env, size_t n, j
         jl_typeenv_t en = { (jl_tvar_t*)env[0], env[1], te };
         return instantiate_with(t, &env[2], n-1, &en );
     }
-    return inst_type_w_(t, te, NULL, 1);
+    return inst_type_w_(t, te, NULL, 1, 0);
 }
 
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n)
@@ -2467,7 +2766,7 @@ static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *en
     if (jl_is_unionall(env->body))
         return _jl_instantiate_type_in_env(ty, (jl_unionall_t*)env->body, vals + 1, &en, stack);
     else
-        return inst_type_w_(ty, &en, stack, 1);
+        return inst_type_w_(ty, &en, stack, 1, 0);
 }
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals)
@@ -2489,8 +2788,10 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t)
     return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t);
 }
 
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow)
 {
+    int valid = 1;
+    jl_vararg_t *vm = NULL;
     jl_task_t *ct = jl_current_task;
     JL_GC_PUSH1(&t);
     if (check) {
@@ -2501,30 +2802,44 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
                 // values and not the bounds of variables.
                 /*
                 jl_tvar_t *N = (jl_tvar_t*)n;
-                if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
-                    jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                if (valid && !(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) {
+                    if (!nothrow)
+                        jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                    invalid = 1;
+                }
                 */
             }
-            else if (!jl_is_long(n)) {
-                jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+            else if (valid && !jl_is_long(n)) {
+                if (!nothrow)
+                    jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+                valid = 0;
             }
-            else if (jl_unbox_long(n) < 0) {
-                jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+            else if (valid && jl_unbox_long(n) < 0) {
+                if (!nothrow)
+                    jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+                valid = 0;
             }
         }
         if (t) {
-            if (!jl_valid_type_param(t))
-                jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
-            t = normalize_unionalls(t);
-            jl_value_t *tw = extract_wrapper(t);
-            if (tw && t != tw && jl_types_equal(t, tw))
-                t = tw;
-        }
-    }
-    jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
-    jl_set_typetagof(vm, jl_vararg_tag, 0);
-    vm->T = t;
-    vm->N = n;
+            if (valid && !jl_valid_type_param(t)) {
+                if (!nothrow)
+                    jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+                valid = 0;
+            }
+            if (valid) {
+                t = normalize_unionalls(t);
+                jl_value_t *tw = extract_wrapper(t);
+                if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
+                    t = tw;
+            }
+        }
+    }
+    if (valid) {
+        vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+        jl_set_typetagof(vm, jl_vararg_tag, 0);
+        vm->T = t;
+        vm->N = n;
+    }
     JL_GC_POP();
     return vm;
 }
@@ -2585,7 +2900,7 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
         for (i = 0; i < n; i++)
             env[i].val = jl_svecref(ndt->parameters, i);
 
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1);
+        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1, 0);
         jl_gc_wb(ndt, ndt->super);
     }
 
@@ -2692,25 +3007,26 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_typename_type->name->mt = jl_nonfunction_mt;
     jl_typename_type->super = jl_any_type;
     jl_typename_type->parameters = jl_emptysvec;
-    jl_typename_type->name->n_uninitialized = 15 - 2;
-    jl_typename_type->name->names = jl_perm_symsvec(15, "name", "module",
+    jl_typename_type->name->n_uninitialized = 16 - 2;
+    jl_typename_type->name->names = jl_perm_symsvec(16, "name", "module",
                                                     "names", "atomicfields", "constfields",
                                                     "wrapper", "Typeofwrapper", "cache", "linearcache",
                                                     "mt", "partial",
                                                     "hash", "n_uninitialized",
                                                     "flags", // "abstract", "mutable", "mayinlinealloc",
-                                                    "max_methods");
+                                                    "max_methods", "constprop_heuristic");
     const static uint32_t typename_constfields[1] = { 0x00003a27 }; // (1<<0)|(1<<1)|(1<<2)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13) ; TODO: put back (1<<3)|(1<<4) in this list
     const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
     jl_typename_type->name->constfields = typename_constfields;
     jl_typename_type->name->atomicfields = typename_atomicfields;
     jl_precompute_memoized_dt(jl_typename_type, 1);
-    jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
+    jl_typename_type->types = jl_svec(16, jl_symbol_type, jl_any_type /*jl_module_type*/,
                                       jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
                                       jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
                                       jl_methtable_type, jl_any_type,
                                       jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
                                       jl_any_type /*jl_uint8_type*/,
+                                      jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/);
 
     jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
@@ -2809,7 +3125,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_vararg_type->name->mayinlinealloc = 0;
     jl_vararg_type->ismutationfree = 1;
 
-    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0));
+    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0, 0));
     jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params,
                                        jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0);
     jl_tuple_typename = jl_anytuple_type->name;
@@ -2820,6 +3136,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_anytuple_type->layout = NULL;
 
     jl_typeofbottom_type->super = jl_wrap_Type(jl_bottom_type);
+    jl_typeofbottom_type->super->layout = jl_typeofbottom_type->layout; // the only abstract type with a layout
     jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec, 0);
     jl_emptytuple = jl_gc_permobj(0, jl_emptytuple_type);
     jl_emptytuple_type->instance = jl_emptytuple;
@@ -2943,12 +3260,21 @@ void jl_init_types(void) JL_GC_DISABLED
     assert(jl_module_type->instance == NULL);
     jl_compute_field_offsets(jl_module_type);
 
+    jl_binding_partition_type =
+        jl_new_datatype(jl_symbol("BindingPartition"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(5, "restriction", "min_world", "max_world", "next", "reserved"),
+                        jl_svec(5, jl_uint64_type /* Special GC-supported union of Any and flags*/,
+                        jl_ulong_type, jl_ulong_type, jl_any_type/*jl_binding_partition_type*/, jl_ulong_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_partition_atomicfields[] = { 0b01101 }; // Set fields 1, 3, 4 as atomic
+    jl_binding_partition_type->name->atomicfields = binding_partition_atomicfields;
+
     jl_binding_type =
         jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "value", "globalref", "owner", "ty", "flags"),
-                        jl_svec(5, jl_any_type, jl_any_type/*jl_globalref_type*/, jl_any_type/*jl_binding_type*/, jl_type_type, jl_uint8_type),
+                        jl_perm_symsvec(4, "globalref", "value", "partitions", "flags"),
+                        jl_svec(4, jl_any_type/*jl_globalref_type*/, jl_any_type, jl_binding_partition_type, jl_uint8_type),
                         jl_emptysvec, 0, 1, 0);
-    const static uint32_t binding_atomicfields[] = { 0x0015 }; // Set fields 1, 3, 4 as atomic
+    const static uint32_t binding_atomicfields[] = { 0x0005 }; // Set fields 1, 3 as atomic
     jl_binding_type->name->atomicfields = binding_atomicfields;
     const static uint32_t binding_constfields[] = { 0x0002 }; // Set fields 2 as constant
     jl_binding_type->name->constfields = binding_constfields;
@@ -3035,7 +3361,7 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_perm_symsvec(2, "ref", "size"),
                         jl_svec(2,
                             jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, jl_svecref(tv, 0), cpumem),
-                            jl_apply_type1((jl_value_t*)jl_tuple_type, (jl_value_t*)jl_wrap_vararg((jl_value_t*)jl_long_type, jl_svecref(tv, 1), 0))),
+                            jl_apply_type1((jl_value_t*)jl_tuple_type, (jl_value_t*)jl_wrap_vararg((jl_value_t*)jl_long_type, jl_svecref(tv, 1), 0, 0))),
                             jl_emptysvec, 0, 1, 2)->name;
     jl_array_type = (jl_unionall_t*)jl_array_typename->wrapper;
 
@@ -3074,10 +3400,10 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_emptysvec, 0, 0, 2);
 
     jl_lineinfonode_type =
-        jl_new_datatype(jl_symbol("LineInfoNode"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "module", "method", "file", "line", "inlined_at"),
-                        jl_svec(5, jl_module_type, jl_any_type, jl_symbol_type, jl_int32_type, jl_int32_type),
-                        jl_emptysvec, 0, 0, 5);
+        jl_new_datatype(jl_symbol("LegacyLineInfoNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(3, "file", "line", "inlined_at"),
+                        jl_svec(3, jl_symbol_type, jl_int32_type, jl_int32_type),
+                        jl_emptysvec, 0, 0, 3);
 
     jl_gotonode_type =
         jl_new_datatype(jl_symbol("GotoNode"), core, jl_any_type, jl_emptysvec,
@@ -3139,44 +3465,64 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(1, jl_slotnumber_type),
                         jl_emptysvec, 0, 0, 1);
 
+    jl_debuginfo_type =
+        jl_new_datatype(jl_symbol("DebugInfo"), core,
+                        jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(4,
+                            "def",
+                            "linetable",
+                            "edges",
+                            "codelocs"),
+                        jl_svec(4,
+                            jl_any_type, // union(jl_method_instance_type, jl_method_type, jl_symbol_type),
+                            jl_any_type, // union(jl_nothing, jl_debuginfo_type)
+                            jl_simplevector_type, // memory{debuginfo}
+                            jl_string_type),
+                        jl_emptysvec, 0, 0, 4);
+    jl_debuginfo_type->name->mayinlinealloc = 0;
+
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(20,
+                        jl_perm_symsvec(22,
                             "code",
-                            "codelocs",
+                            "debuginfo",
                             "ssavaluetypes",
                             "ssaflags",
-                            "linetable",
                             "slotnames",
                             "slotflags",
                             "slottypes",
+                            "rettype",
                             "parent",
-                            "method_for_inference_limit_heuristics",
                             "edges",
                             "min_world",
                             "max_world",
+                            "method_for_inference_limit_heuristics",
+                            "nargs",
                             "propagate_inbounds",
                             "has_fcall",
                             "nospecializeinfer",
+                            "isva",
                             "inlining",
                             "constprop",
                             "purity",
                             "inlining_cost"),
-                        jl_svec(20,
+                        jl_svec(22,
                             jl_array_any_type,
-                            jl_array_int32_type,
+                            jl_debuginfo_type,
                             jl_any_type,
                             jl_array_uint32_type,
-                            jl_any_type,
                             jl_array_symbol_type,
                             jl_array_uint8_type,
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
-                            jl_any_type,
+                            jl_any_type, // prefers svec, but tolerates Vector{Any}
                             jl_ulong_type,
                             jl_ulong_type,
+                            jl_any_type,
+                            jl_ulong_type,
+                            jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
@@ -3185,12 +3531,12 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_uint16_type,
                             jl_uint16_type),
                         jl_emptysvec,
-                        0, 1, 20);
+                        0, 1, 22);
 
     jl_method_type =
         jl_new_datatype(jl_symbol("Method"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(30,
+                        jl_perm_symsvec(31,
                             "name",
                             "module",
                             "file",
@@ -3203,6 +3549,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "slot_syms",
                             "external_mt",
                             "source", // !const
+                            "debuginfo", // !const
                             "unspecialized", // !const
                             "generator", // !const
                             "roots", // !const
@@ -3221,7 +3568,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "constprop",
                             "max_varargs",
                             "purity"),
-                        jl_svec(30,
+                        jl_svec(31,
                             jl_symbol_type,
                             jl_module_type,
                             jl_symbol_type,
@@ -3234,6 +3581,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_string_type,
                             jl_any_type,
                             jl_any_type,
+                            jl_debuginfo_type,
                             jl_any_type, // jl_method_instance_type
                             jl_any_type,
                             jl_array_any_type,
@@ -3254,7 +3602,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 10);
-    //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25);
+    //const static uint32_t method_constfields[1] = { 0b0 }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<9)|(1<<10)|(1<<17)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25)|(1<<26)|(1<<27)|(1<<28)|(1<<29)|(1<<30);
     //jl_method_type->name->constfields = method_constfields;
     const static uint32_t method_atomicfields[1] = { 0x00000030 }; // (1<<4)|(1<<5)
     jl_method_type->name->atomicfields = method_atomicfields;
@@ -3262,32 +3610,28 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_method_instance_type =
         jl_new_datatype(jl_symbol("MethodInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(9,
+                        jl_perm_symsvec(7,
                             "def",
                             "specTypes",
                             "sparam_vals",
-                            "uninferred",
                             "backedges",
                             "cache",
-                            "inInference",
                             "cache_with_orig",
                             "precompiled"),
-                        jl_svec(9,
+                        jl_svec(7,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
                             jl_simplevector_type,
-                            jl_any_type,
                             jl_array_any_type,
-                            jl_any_type,
-                            jl_bool_type,
+                            jl_any_type/*jl_code_instance_type*/,
                             jl_bool_type,
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 3);
     // These fields should be constant, but Serialization wants to mutate them in initialization
-    //const static uint32_t method_instance_constfields[1] = { 0x00000007 }; // (1<<0)|(1<<1)|(1<<2);
-    const static uint32_t method_instance_atomicfields[1] = { 0x00000128 }; // (1<<3)|(1<<5)|(1<<8);
-    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe. TODO: except inInference
+    //const static uint32_t method_instance_constfields[1] = { 0b0000111 }; // fields 1, 2, 3
+    const static uint32_t method_instance_atomicfields[1]  = { 0b1010000 }; // fields 5, 7
+    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe.
     //jl_method_instance_type->name->constfields = method_instance_constfields;
     jl_method_instance_type->name->atomicfields = method_instance_atomicfields;
 
@@ -3304,9 +3648,9 @@ void jl_init_types(void) JL_GC_DISABLED
                             "exctype",
                             "rettype_const",
                             "inferred",
-                            //"edges",
+                            "debuginfo", // TODO: rename to edges?
                             //"absolute_max",
-                            "ipo_purity_bits", "purity_bits",
+                            "ipo_purity_bits",
                             "analysis_results",
                             "specsigflags", "precompile", "relocatability",
                             "invoke", "specptr"), // function object decls
@@ -3320,9 +3664,9 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
-                            //jl_any_type,
+                            jl_debuginfo_type,
                             //jl_bool_type,
-                            jl_uint32_type, jl_uint32_type,
+                            jl_uint32_type,
                             jl_any_type,
                             jl_bool_type,
                             jl_bool_type,
@@ -3331,10 +3675,11 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_emptysvec,
                         0, 1, 1);
     jl_svecset(jl_code_instance_type->types, 2, jl_code_instance_type);
-    const static uint32_t code_instance_constfields[1]  = { 0b00000101011100011 }; // Set fields 1, 2, 6-8, 10, 12 as const
-    const static uint32_t code_instance_atomicfields[1] = { 0b11011010100011100 }; // Set fields 3-5, 9, 11, 13-14, 16-17 as atomic
-    //Fields 4-5 are only operated on by construction and deserialization, so are const at runtime
-    //Fields 12 and 16 must be protected by locks, and thus all operations on jl_code_instance_t are threadsafe
+    const static uint32_t code_instance_constfields[1]  = { 0b00000100011100011 }; // Set fields 1, 2, 6-8, 12 as const
+    const static uint32_t code_instance_atomicfields[1] = { 0b11011011100011100 }; // Set fields 3-5, 9, 10, 11, 13-14, 16-17 as atomic
+    // Fields 4-5 are only operated on by construction and deserialization, so are effectively const at runtime
+    // Fields ipo_purity_bits and analysis_results are not currently threadsafe or reliable, as they get mutated after optimization, but are not declared atomic
+    // and there is no way to tell (during inference) if their value is finalized yet (to wait for them to be narrowed if applicable)
     jl_code_instance_type->name->constfields = code_instance_constfields;
     jl_code_instance_type->name->atomicfields = code_instance_atomicfields;
 
@@ -3359,8 +3704,11 @@ void jl_init_types(void) JL_GC_DISABLED
                                        jl_emptysvec, 0, 0, 4);
 
     // all Kinds share the Type method table (not the nonfunction one)
-    jl_unionall_type->name->mt = jl_uniontype_type->name->mt = jl_datatype_type->name->mt =
-        jl_type_type_mt;
+    jl_unionall_type->name->mt =
+        jl_uniontype_type->name->mt =
+        jl_datatype_type->name->mt =
+        jl_typeofbottom_type->name->mt =
+            jl_type_type_mt;
 
     jl_intrinsic_type = jl_new_primitivetype((jl_value_t*)jl_symbol("IntrinsicFunction"), core,
                                              jl_builtin_type, jl_emptysvec, 32);
@@ -3435,6 +3783,8 @@ void jl_init_types(void) JL_GC_DISABLED
     XX(task);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
+    const static uint32_t task_atomicfields[1] = {0x00001000}; // Set fields 13 as atomic
+    jl_task_type->name->atomicfields = task_atomicfields;
 
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
@@ -3465,6 +3815,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_typename_type->types, 12, jl_int32_type);
     jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 15, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
     jl_svecset(jl_methtable_type->types, 5, jl_module_type);
     jl_svecset(jl_methtable_type->types, 6, jl_array_any_type);
@@ -3472,12 +3823,13 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_methtable_type->types, 8, jl_long_type); // uint32_t plus alignment
     jl_svecset(jl_methtable_type->types, 9, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
-    jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
-    jl_svecset(jl_method_instance_type->types, 5, jl_code_instance_type);
+    jl_svecset(jl_method_type->types, 13, jl_method_instance_type);
+    //jl_svecset(jl_debuginfo_type->types, 0, jl_method_instance_type); // union(jl_method_instance_type, jl_method_type, jl_symbol_type)
+    jl_svecset(jl_method_instance_type->types, 4, jl_code_instance_type);
     jl_svecset(jl_code_instance_type->types, 15, jl_voidpointer_type);
     jl_svecset(jl_code_instance_type->types, 16, jl_voidpointer_type);
-    jl_svecset(jl_binding_type->types, 1, jl_globalref_type);
-    jl_svecset(jl_binding_type->types, 2, jl_binding_type);
+    jl_svecset(jl_binding_type->types, 0, jl_globalref_type);
+    jl_svecset(jl_binding_partition_type->types, 3, jl_binding_partition_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
@@ -3489,6 +3841,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_compute_field_offsets(jl_unionall_type);
     jl_compute_field_offsets(jl_simplevector_type);
     jl_compute_field_offsets(jl_symbol_type);
+    jl_compute_field_offsets(jl_binding_partition_type);
 
     // override ismutationfree for builtin types that are mutable for identity
     jl_string_type->ismutationfree = jl_string_type->isidentityfree = 1;
@@ -3504,9 +3857,6 @@ void jl_init_types(void) JL_GC_DISABLED
     // Module object identity is determined by its name and parent name.
     jl_module_type->isidentityfree = 1;
 
-    // override the preferred layout for a couple types
-    jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
-
     export_jl_small_typeof();
 }
 
@@ -3515,6 +3865,8 @@ static jl_value_t *core(const char *name)
     return jl_get_global(jl_core_module, jl_symbol(name));
 }
 
+jl_debuginfo_t *jl_nulldebuginfo;
+
 // fetch references to things defined in boot.jl
 void post_boot_hooks(void)
 {
@@ -3551,6 +3903,7 @@ void post_boot_hooks(void)
     jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
     jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
     jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
+    jl_fielderror_type     = (jl_datatype_t*)core("FieldError");
     jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
     jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
     jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
@@ -3570,6 +3923,7 @@ void post_boot_hooks(void)
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
+    jl_nulldebuginfo = (jl_debuginfo_t*)core("NullDebugInfo");
 
     jl_init_box_caches();
 
@@ -3579,7 +3933,7 @@ void post_boot_hooks(void)
     for (size_t i = 0; i < jl_svec_len(bindings); i++) {
         if (table[i] != jl_nothing) {
             jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+            jl_value_t *v = jl_get_binding_value(b);
             if (v) {
                 if (jl_is_unionall(v))
                     v = jl_unwrap_unionall(v);
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index d3d4f7dfe3a56..d6bc03091f37b 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -225,13 +225,23 @@
                  (if lb (list lb ub) (list ub))
                  (if lb (list lb '(core Any)) '())))))
 
+(define (is-method? x)
+  (if (and (pair? x) (eq? (car x) 'method))
+      (let ((name (cadr x)))
+        (if (and (pair? name) (eq? (car name) 'globalref))
+            (let ((name (caddr name)))
+              (if (symbol? name)
+                  #t
+                  #f))
+            (if (symbol? name)
+                #t
+                #f)))
+      #f))
+
 (define (method-expr-name m)
   (let ((name (cadr m)))
-     (let ((name (if (or (length= m 2) (not (pair? name)) (not (quoted? name))) name (cadr name))))
-       (cond ((not (pair? name)) name)
-             ((eq? (car name) 'outerref) (cadr name))
-             ;((eq? (car name) 'globalref) (caddr name))
-             (else name)))))
+      (cond ((globalref? name) (caddr name))
+            (else name))))
 
 ;; extract static parameter names from a (method ...) expression
 (define (method-expr-static-parameters m)
@@ -248,8 +258,7 @@
 
 (define (nodot-sym-ref? e)
   (or (symbol? e)
-      (and (length= e 3) (eq? (car e) 'globalref))
-      (and (length= e 2) (eq? (car e) 'outerref))))
+      (and (length= e 3) (eq? (car e) 'globalref))))
 
 ;; expressions of the form a.b.c... where everything is a symbol
 (define (sym-ref? e)
@@ -376,7 +385,7 @@
             (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x))))
                            (let* ((gen    (generated-version body))
                                   (nongen (non-generated-version body))
-                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter))))
+                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter '()))))
                                   (gf     (make-generator-function gname names anames gen)))
                              (set! body (insert-after-meta
                                          nongen
@@ -516,7 +525,7 @@
                                         ""
                                         "#")
                                     (or und '_) "#"
-                                    (string (current-julia-module-counter)))))))
+                                    (string (current-julia-module-counter '())))))))
       ;; this is a hack: nest these statements inside a call so they get closure
       ;; converted together, allowing all needed types to be defined before any methods.
       `(call (core ifelse) (false) (false) (block
@@ -764,7 +773,7 @@
                           ,@(map make-decl field-names field-types))
                     (block
                      ,@locs
-                     (new (outerref ,name) ,@field-names)))
+                     (new (globalref (thismodule) ,name) ,@field-names)))
           #f))
          (any-ctor (if (or (not all-ctor) (any (lambda (t) (not (equal? t '(core Any))))
                                  field-types))
@@ -816,10 +825,10 @@
     (if (> nnv (length params))
         (error "too many type parameters specified in \"new{...}\"")))
   (let* ((Texpr (if (null? type-params)
-                    `(outerref ,Tname)
+                    `(globalref (thismodule) ,Tname)
                     (if selftype?
                         '|#ctor-self#|
-                        `(curly (outerref ,Tname)
+                        `(curly (globalref (thismodule) ,Tname)
                                 ,@type-params))))
          (tn (if (symbol? Texpr) Texpr (make-ssavalue)))
          (field-convert (lambda (fld fty val)
@@ -987,22 +996,22 @@
                      (error (string "field name \"" (deparse v) "\" is not a symbol"))))
                field-names)
      `(block
-       (global ,name) (const ,name)
+       (global ,name)
        (scope-block
         (block
          (hardscope)
          (local-def ,name)
          ,@(map (lambda (v) `(local ,v)) params)
          ,@(map (lambda (n v) (make-assignment n (bounds-to-TypeVar v #t))) params bounds)
-         (toplevel-only struct (outerref ,name))
+         (toplevel-only struct (globalref (thismodule) ,name))
          (= ,name (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@params)
                         (call (core svec) ,@(map quotify field-names))
                         (call (core svec) ,@attrs)
                         ,mut ,min-initialized))
          (call (core _setsuper!) ,name ,super)
-         (if (isdefined (outerref ,name))
+         (if (isdefined (globalref (thismodule) ,name) (false))
              (block
-              (= ,prev (outerref ,name))
+              (= ,prev (globalref (thismodule) ,name))
               (if (call (core _equiv_typedef) ,prev ,name)
                   ;; if this is compatible with an old definition, use the existing type object
                   ;; and its parameters
@@ -1015,8 +1024,8 @@
                                                       (quote parameters))))
                                '()))
                   ;; otherwise do an assignment to trigger an error
-                  (= (outerref ,name) ,name)))
-             (= (outerref ,name) ,name))
+                  (const (globalref (thismodule) ,name) ,name)))
+             (const (globalref (thismodule) ,name) ,name))
          (call (core _typebody!) ,name (call (core svec) ,@field-types))
          (null)))
        ;; "inner" constructors
@@ -1053,7 +1062,7 @@
   (receive
    (params bounds) (sparam-name-bounds params)
    `(block
-     (global ,name) (const ,name)
+     (global ,name)
      (scope-block
       (block
        (local-def ,name)
@@ -1063,17 +1072,17 @@
        (= ,name (call (core _abstracttype) (thismodule) (inert ,name) (call (core svec) ,@params)))
        (call (core _setsuper!) ,name ,super)
        (call (core _typebody!) ,name)
-       (if (&& (isdefined (outerref ,name))
-               (call (core _equiv_typedef) (outerref ,name) ,name))
+       (if (&& (isdefined (globalref (thismodule) ,name) (false))
+               (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name))
            (null)
-           (= (outerref ,name) ,name))
+           (const (globalref (thismodule) ,name) ,name))
        (null))))))
 
 (define (primitive-type-def-expr n name params super)
   (receive
    (params bounds) (sparam-name-bounds params)
    `(block
-     (global ,name) (const ,name)
+     (global ,name)
      (scope-block
       (block
        (local-def ,name)
@@ -1083,10 +1092,10 @@
        (= ,name (call (core _primitivetype) (thismodule) (inert ,name) (call (core svec) ,@params) ,n))
        (call (core _setsuper!) ,name ,super)
        (call (core _typebody!) ,name)
-       (if (&& (isdefined (outerref ,name))
-               (call (core _equiv_typedef) (outerref ,name) ,name))
+       (if (&& (isdefined (globalref (thismodule) ,name) (false))
+               (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name))
            (null)
-           (= (outerref ,name) ,name))
+           (const (globalref (thismodule) ,name) ,name))
        (null))))))
 
 ;; take apart a type signature, e.g. T{X} <: S{Y}
@@ -1255,7 +1264,7 @@
                    (list a)))
          ;; TODO: always use a specific special name like #anon# or _, then ignore
          ;; this as a local variable name.
-         (name (symbol (string "#" (current-julia-module-counter)))))
+         (name (symbol (string "#" (current-julia-module-counter '())))))
     (expand-forms
      `(block (local ,name)
              (function
@@ -1443,34 +1452,57 @@
           (else
            (error "invalid \"try\" form")))))
 
-(define (expand-unionall-def name type-ex)
+(define (expand-unionall-def name type-ex (allow-local #t))
   (if (and (pair? name)
            (eq? (car name) 'curly))
       (let ((name   (cadr name))
-            (params (cddr name)))
+            (params (cddr name))
+            (rr     (make-ssavalue)))
         (if (null? params)
             (error (string "empty type parameter list in \"" (deparse `(= (curly ,name) ,type-ex)) "\"")))
-        `(block
-          (const-if-global ,name)
-          ,(expand-forms
-            `(= ,name (where ,type-ex ,@params)))))
+          (expand-forms
+            `(block
+              (= ,rr (where ,type-ex ,@params))
+              (,(if allow-local 'assign-const-if-global 'const) ,name ,rr)
+              ,rr)))
       (expand-forms
        `(const (= ,name ,type-ex)))))
 
-;; take apart e.g. `const a::Int = 0` into `const a; a::Int = 0`
-(define (expand-const-decl e)
-  (let ((arg (cadr e)))
-    (if (atom? arg)
-        e
-        (case (car arg)
-          ((global local local-def)
-           (for-each (lambda (b) (if (not (assignment? b))
-                                     (error "expected assignment after \"const\"")))
-                     (cdr arg))
-           (expand-forms (expand-decls (car arg) (cdr arg) #t)))
-          ((= |::|)
-           (expand-forms (expand-decls 'const (cdr e) #f)))
-          (else (error "expected assignment after \"const\""))))))
+(define (filter-not-underscore syms)
+  (filter (lambda (x) (not (underscore-symbol? x))) syms))
+
+;; Expand `[global] const a::T = val`
+(define (expand-const-decl e (mustassgn #f))
+  (if (length= e 3) e
+    (let ((arg (cadr e)))
+      (if (atom? arg)
+          (if mustassgn
+            (error "expected assignment after \"const\"")
+            e)
+          (case (car arg)
+            ((global)
+              (expand-const-decl `(const ,(cadr arg)) #t))
+            ((=)
+              (cond
+                ;; `const f() = ...` - The `const` here is inoperative, but the syntax happened to work in earlier versions, so simply strip `const`.
+                ;; TODO: Consider whether to keep this in 2.0.
+                ((eventually-call? (cadr arg))
+                 (expand-forms arg))
+                ((and (pair? (cadr arg)) (eq? (caadr arg) 'curly))
+                 (expand-unionall-def (cadr arg) (caddr arg)))
+                ((and (pair? (cadr arg)) (eq? (caadr arg) 'tuple) (not (has-parameters? (cdr (cadr arg)))))
+                  ;; We need this case because `(f(), g()) = (1, 2)` goes through here, which cannot go via the `local` lowering below,
+                  ;; because the symbols come out wrong. Sigh... So much effort for such a syntax corner case.
+                  (expand-tuple-destruct (cdr (cadr arg)) (caddr arg) (lambda (assgn) `(,(car e) ,assgn))))
+                (else
+                 (let ((rr (make-ssavalue)))
+                   (expand-forms `(block
+                           (= ,rr ,(caddr arg))
+                           (scope-block (block (hardscope)
+                            (local (= ,(cadr arg) ,rr))
+                            ,.(map (lambda (v) `(,(car e) (globalref (thismodule) ,v) ,v)) (filter-not-underscore (lhs-vars (cadr arg))))
+                            ,rr))))))))
+            (else (error "expected assignment after \"const\"")))))))
 
 (define (expand-atomic-decl e)
   (error "unimplemented or unsupported atomic declaration"))
@@ -1478,7 +1510,7 @@
 (define (expand-local-or-global-decl e)
   (if (and (symbol? (cadr e)) (length= e 2))
       e
-      (expand-forms (expand-decls (car e) (cdr e) #f))))
+      (expand-forms (expand-decls (car e) (cdr e)))))
 
 ;; given a complex assignment LHS, return the symbol that will ultimately be assigned to
 (define (assigned-name e)
@@ -1488,37 +1520,36 @@
          (assigned-name (cadr e)))
         (else e)))
 
-;; local x, y=2, z => local x;local y;local z;y = 2
-(define (expand-decls what binds const?)
+;; local x, (y=2), z => local x;local y;local z;y = 2
+(define (expand-decls what binds)
   (if (not (list? binds))
       (error (string "invalid \"" what "\" declaration")))
   (let loop ((b       binds)
-             (vars    '())
+             (decls   '())
              (assigns '()))
     (if (null? b)
         `(block
-          ,.(if const?
-                (map (lambda (x) `(const ,x)) vars)
-                '())
-          ,.(map (lambda (x) `(,what ,x)) vars)
-          ,.(reverse assigns))
+          ,.(reverse decls)
+          ,.(reverse assigns)
+          ,.(if (null? assigns) `((null)) '()))
         (let ((x (car b)))
           (cond ((or (assignment-like? x) (function-def? x))
-                 (loop (cdr b)
-                       (append (lhs-decls (assigned-name (cadr x))) vars)
+                 (let ((new-vars (lhs-decls (assigned-name (cadr x)))))
+                  (loop (cdr b)
+                       (append (map (lambda (x) `(,what ,x)) new-vars) decls)
                        (cons `(,(car x) ,(all-decl-vars (cadr x)) ,(caddr x))
-                             assigns)))
+                             assigns))))
                 ((and (pair? x) (eq? (car x) '|::|))
                  (loop (cdr b)
-                       (cons (decl-var x) vars)
-                       (cons `(decl ,@(cdr x)) assigns)))
+                       (cons `(decl ,@(cdr x)) (cons `(,what ,(decl-var x)) decls))
+                       assigns))
                 ((symbol? x)
-                 (loop (cdr b) (cons x vars) assigns))
+                 (loop (cdr b) (cons `(,what, x) decls) assigns))
                 (else
                  (error (string "invalid syntax in \"" what "\" declaration"))))))))
 
 ;; convert (lhss...) = (tuple ...) to assignments, eliminating the tuple
-(define (tuple-to-assignments lhss0 x)
+(define (tuple-to-assignments lhss0 x wrap)
   (let loop ((lhss lhss0)
              (assigned lhss0)
              (rhss (cdr x))
@@ -1540,7 +1571,7 @@
                  (loop (cdr lhss)
                        (cons L assigned)
                        (cdr rhss)
-                       (cons (make-assignment L R) stmts)
+                       (cons (wrap (make-assignment L R)) stmts)
                        after
                        (cons R elts)))
                 ((vararg? L)
@@ -1551,7 +1582,7 @@
                        `(block ,@(reverse stmts)
                                (= ,temp (tuple ,@rhss))
                                ,@(reverse after)
-                               (= ,(cadr L) ,temp)
+                               ,(wrap `(= ,(cadr L) ,temp))
                                (unnecessary (tuple ,@(reverse elts) (... ,temp)))))
                      (let ((lhss- (reverse lhss))
                            (rhss- (reverse rhss))
@@ -1583,13 +1614,13 @@
                               (assigns (if (and (length= lhss- 1) (vararg? (car lhss-)))
                                            (begin
                                              (set-car! end
-                                                       (cons `(= ,(cadar lhss-) ,temp) (car end)))
+                                                       (cons (wrap `(= ,(cadar lhss-) ,temp)) (car end)))
                                              assigns)
                                            (append (if (> n 0)
                                                        `(,@assigns (local ,st))
                                                        assigns)
                                                    (destructure- 1 (reverse lhss-) temp
-                                                                 n st end)))))
+                                                                 n st end wrap)))))
                          (loop lhs-tail
                                (append (map (lambda (x) (if (vararg? x) (cadr x) x)) lhss-) assigned)
                                rhs-tail
@@ -1602,7 +1633,7 @@
                    `(block ,@(reverse stmts)
                            ,(make-assignment temp (cadr R))
                            ,@(reverse after)
-                           (= (tuple ,@lhss) ,temp)
+                           ,(wrap `(= (tuple ,@lhss) ,temp))
                            (unnecessary (tuple ,@(reverse elts) (... ,temp))))))
                 (else
                  (let ((temp (if (eventually-call? L) (gensy) (make-ssavalue))))
@@ -1612,11 +1643,11 @@
                          (if (symbol? temp)
                              (list* (make-assignment temp R) `(local-def ,temp) stmts)
                              (cons  (make-assignment temp R) stmts))
-                         (cons (make-assignment L temp) after)
+                         (cons (wrap (make-assignment L temp)) after)
                          (cons temp elts)))))))))
 
 ;; convert (lhss...) = x to tuple indexing
-(define (lower-tuple-assignment lhss x)
+(define (lower-tuple-assignment lhss x (wrap (lambda (x i) x)))
   (let ((t (make-ssavalue)))
     `(block
       (= ,t ,x)
@@ -1631,9 +1662,10 @@
                           `(block
                             (local-def ,temp)
                             (= ,temp (call (core getfield) ,t ,i))
-                            (= ,(car lhs) ,temp)))
-                        `(= ,(car lhs)
-                            (call (core getfield) ,t ,i)))
+                            ,(wrap `(= ,(car lhs) ,temp) i)))
+                        (wrap
+                          `(= ,(car lhs)
+                            (call (core getfield) ,t ,i)) i))
                     (loop (cdr lhs)
                           (+ i 1)))))
       ,t)))
@@ -1796,7 +1828,7 @@
   (let ((copied-vars  ;; variables not declared `outer` are copied in the innermost loop
          ;; TODO: maybe filter these to remove vars not assigned in the loop
          (delete-duplicates
-          (filter (lambda (x) (not (underscore-symbol? x)))
+          (filter-not-underscore
                   (apply append
                          (map lhs-vars
                               (filter (lambda (x) (not (outer? x))) (butlast lhss))))))))
@@ -1870,8 +1902,7 @@
                         ((and flat (pair? expr) (eq? (car expr) 'flatten))
                          (expand-generator (cadr expr) #t (delete-duplicates (append outervars myvars))))
                         ((pair? outervars)
-                         `(let (block ,@(map (lambda (v) `(= ,v ,v)) (filter (lambda (x) (not (underscore-symbol? x)))
-                                                                             outervars)))
+                         `(let (block ,@(map (lambda (v) `(= ,v ,v)) (filter-not-underscore outervars)))
                             ,expr))
                         (else expr))))
         `(-> ,argname (block ,@splat ,expr)))))))
@@ -2281,10 +2312,14 @@
 ;; `end`:  car collects statements to be executed afterwards.
 ;;         In general, actual assignments should only happen after
 ;;         the whole iterator is desctructured (https://github.com/JuliaLang/julia/issues/40574)
-(define (destructure- i lhss xx n st end)
+;;
+;; The `wrap` argument is a callback that will be called on all assignments to
+;; symbols `lhss`, e.g. to insert a `const` declaration.
+(define (destructure- i lhss xx n st end wrap)
   (if (null? lhss)
       '()
       (let* ((lhs  (car lhss))
+             (wrapfirst (lambda (x i) (if (= i 1) (wrap x) x)))
              (lhs- (cond ((or (symbol? lhs) (ssavalue? lhs))
                           lhs)
                          ((vararg? lhs)
@@ -2301,30 +2336,30 @@
             (error "multiple \"...\" on lhs of assignment"))
         (if (not (eq? lhs lhs-))
             (if (vararg? lhs)
-                (set-car! end (cons (expand-forms `(= ,(cadr lhs) ,(cadr lhs-))) (car end)))
-                (set-car! end (cons (expand-forms `(= ,lhs ,lhs-)) (car end)))))
+                (set-car! end (cons (expand-forms (wrap `(= ,(cadr lhs) ,(cadr lhs-)))) (car end)))
+                (set-car! end (cons (expand-forms (wrap `(= ,lhs ,lhs-))) (car end)))))
         (if (vararg? lhs-)
             (if (= i n)
                 (if (underscore-symbol? (cadr lhs-))
                     '()
                     (list (expand-forms
-                            `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st)))))))
+                            (wrap `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st))))))))
                 (let ((tail (if (eventually-call? lhs) (gensy) (make-ssavalue))))
                   (cons (expand-forms
                           (lower-tuple-assignment
                             (list (cadr lhs-) tail)
-                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st)))))
-                        (destructure- 1 (cdr lhss) tail (- n i) st end))))
+                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st))) wrapfirst))
+                        (destructure- 1 (cdr lhss) tail (- n i) st end wrap))))
             (cons (expand-forms
                     (lower-tuple-assignment
                       (if (= i n)
                           (list lhs-)
                           (list lhs- st))
                       `(call (top indexed_iterate)
-                             ,xx ,i ,@(if (eq? i 1) '() `(,st)))))
-                  (destructure- (+ i 1) (cdr lhss) xx n st end))))))
+                             ,xx ,i ,@(if (eq? i 1) '() `(,st))) wrapfirst))
+                  (destructure- (+ i 1) (cdr lhss) xx n st end wrap))))))
 
-(define (expand-tuple-destruct lhss x)
+(define (expand-tuple-destruct lhss x (wrap identity))
   (define (sides-match? l r)
     ;; l and r either have equal lengths, or r has a trailing ...
     (cond ((null? l)          (null? r))
@@ -2337,7 +2372,7 @@
            (sides-match? lhss (cdr x)))
       ;; (a, b, ...) = (x, y, ...)
       (expand-forms
-       (tuple-to-assignments lhss x))
+       (tuple-to-assignments lhss x wrap))
       ;; (a, b, ...) = other
       (begin
         ;; like memq, but if lhs is (... sym), check against sym instead
@@ -2358,7 +2393,7 @@
           `(block
             ,@(if (> n 0) `((local ,st)) '())
             ,@ini
-            ,@(destructure- 1 lhss xx n st end)
+            ,@(destructure- 1 lhss xx n st end wrap)
             ,@(reverse (car end))
             (unnecessary ,xx))))))
 
@@ -2371,7 +2406,7 @@
       `(= ,lhs ,rhs)))
 
 (define (expand-forms e)
-  (if (or (atom? e) (memq (car e) '(quote inert top core globalref outerref module toplevel ssavalue null true false meta using import export public thismodule toplevel-only)))
+  (if (or (atom? e) (memq (car e) '(quote inert top core globalref module toplevel ssavalue null true false meta using import export public thismodule toplevel-only)))
       e
       (let ((ex (get expand-table (car e) #f)))
         (if ex
@@ -2379,6 +2414,18 @@
             (cons (car e)
                   (map expand-forms (cdr e)))))))
 
+(define (find pred e)
+  (let loop ((xs e))
+    (if (null? xs)
+        #f
+        (let ((elt (car xs)))
+          (if (pred elt)
+              elt
+              (loop (cdr xs)))))))
+
+(define (something e)
+  (find (lambda (x) (not (equal? x '(null)))) e))
+
 ;; table mapping expression head to a function expanding that form
 (define expand-table
   (table
@@ -2398,13 +2445,16 @@
 
    'opaque_closure
    (lambda (e)
-     (let* ((ty   (and (length> e 2) (expand-forms (cadr e))))
-            (F    (if (length> e 2) (caddr e) (cadr e)))
+     (let* ((argt  (something (list (expand-forms (cadr e)) #f)))
+            (rt_lb (something (list (expand-forms (caddr e)) #f)))
+            (rt_ub (something (list (expand-forms (cadddr e)) #f)))
+            (allow-partial (caddddr e))
+            (F             (cadddddr e))
             (isva (let* ((arglist (function-arglist F))
                          (lastarg (and (pair? arglist) (last arglist))))
-                    (if (and ty (any (lambda (arg)
+                    (if (and argt (any (lambda (arg)
                                        (let ((arg (if (vararg? arg) (cadr arg) arg)))
-                                         (not (equal? (arg-type arg) '(core Any)))))
+                                         (not (symbol? arg))))
                                      arglist))
                         (error "Opaque closure argument type may not be specified both in the method signature and separately"))
                     (if (or (varargexpr? lastarg) (vararg? lastarg))
@@ -2424,7 +2474,7 @@
        (let* ((argtype   (foldl (lambda (var ex) `(call (core UnionAll) ,var ,ex))
                                 (expand-forms `(curly (core Tuple) ,@argtypes))
                                 (reverse tvars))))
-         `(_opaque_closure ,(or ty argtype) ,isva ,(length argtypes) ,functionloc ,lam))))
+         `(_opaque_closure ,(or argt argtype) ,rt_lb ,rt_ub ,isva ,(length argtypes) ,allow-partial ,functionloc ,lam))))
 
    'block
    (lambda (e)
@@ -2502,7 +2552,7 @@
                                lhss)
                         (unnecessary ,rr)))))))
       ((or (and (symbol-like? lhs) (valid-name? lhs))
-           (globalref? lhs) (outerref? lhs))
+           (globalref? lhs))
        (sink-assignment lhs (expand-forms (caddr e))))
       ((atom? lhs)
        (error (string "invalid assignment location \"" (deparse lhs) "\"")))
@@ -2965,10 +3015,14 @@
                  (set! vars (cons v vars)))
              (if (not (length= e 2))
                  (find-assigned-vars- (caddr e)))))
+          ((assign-const-if-global)
+            ;; like v = val, except that if `v` turns out global(either
+            ;; implicitly or by explicit `global`), it gains an implicit `const`
+            (set! vars (cons (cadr e) vars)))
           ((=)
            (let ((v (decl-var (cadr e))))
              (find-assigned-vars- (caddr e))
-             (if (or (ssavalue? v) (globalref? v) (outerref? v) (underscore-symbol? v))
+             (if (or (ssavalue? v) (globalref? v) (underscore-symbol? v))
                  '()
                  (set! vars (cons v vars)))))
           (else
@@ -3016,7 +3070,7 @@
     (for-each (lambda (v) (push-var! tab v v)) sp)
     (for-each (lambda (v) (push-var! tab v v)) locals)
     (for-each (lambda (pair) (push-var! tab (car pair) (cdr pair))) renames)
-    (for-each (lambda (v) (push-var! tab v `(outerref ,v))) globals)
+    (for-each (lambda (v) (push-var! tab v `(globalref (thismodule) ,v))) globals)
     (for-each (lambda (v) (push-var! tab v v)) args)
     (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars tab)))
 
@@ -3046,13 +3100,13 @@
       (or (and (memq var (scope:args scope))    'argument)
           (and (memq var (scope:locals scope))  'local)
           (and (memq var (scope:globals scope))
-               (if (and exclude-top-level-globals
+              (if (and exclude-top-level-globals
                         (null? (lam:args (scope:lam scope)))
                         ;; don't inherit global decls from the outermost scope block
                         ;; in a top-level expression.
                         (or (not (scope:prev scope))
                             (not (scope:prev (scope:prev scope)))))
-                   'none 'global))
+                  'none 'global))
           (and (memq var (scope:sp scope))      'static-parameter)
           (var-kind var (scope:prev scope) exclude-top-level-globals))
       'none))
@@ -3080,16 +3134,23 @@
          (let ((val (and scope (get (scope:table scope) e #f))))
            (cond (val (car val))
                  ((underscore-symbol? e) e)
-                 (else `(outerref ,e)))))
+                 (else `(globalref (thismodule) ,e)))))
         ((or (not (pair? e)) (quoted? e) (memq (car e) '(toplevel symbolicgoto symboliclabel toplevel-only)))
          e)
         ((eq? (car e) 'global)
          (check-valid-name (cadr e))
          e)
+        ((eq? (car e) 'assign-const-if-global)
+           (if (eq? (var-kind (cadr e) scope) 'local)
+               (if (length= e 2) (null) `(= ,@(cdr e)))
+               `(const ,@(cdr e))))
         ((memq (car e) '(local local-def))
          (check-valid-name (cadr e))
          ;; remove local decls
          '(null))
+        ((memq (car e) '(using import export public))
+          ;; no scope resolution - identifiers remain raw symbols
+          e)
         ((eq? (car e) 'require-existing-local)
          (if (not (in-scope? (cadr e) scope))
              (error "no outer local variable declaration exists for \"for outer\""))
@@ -3178,7 +3239,6 @@
                                         vars)
                               t)
                             #f)))))
-
            (for-each (lambda (v)
                        (if (or (memq v locals-def) (memq v local-decls))
                            (error (string "variable \"" v "\" declared both local and global")))
@@ -3241,11 +3301,9 @@
                   (warn-var?! (cadr e) scope)
                   (= *scopewarn-opt* 1))
              (let* ((v    (cadr e))
-                    (loc  (extract-line-file loc))
-                    (line (if (= (car loc) 0) (julia-current-line) (car loc)))
-                    (file (if (eq? (cadr loc) 'none) (julia-current-file) (cadr loc))))
+                    (loc  (extract-line-file loc)))
                (lowering-warning
-                1000 'warn (symbol (string file line)) file line
+                1000 'warn (cadr loc) (car loc)
                 (string "Assignment to `" v "` in soft scope is ambiguous "
                         "because a global variable by the same name exists: "
                         "`" v "` will be treated as a new local. "
@@ -3270,7 +3328,7 @@
 (define (free-vars- e tab)
   (cond ((or (eq? e UNUSED) (underscore-symbol? e)) tab)
         ((symbol? e) (put! tab e #t))
-        ((and (pair? e) (eq? (car e) 'outerref)) tab)
+        ((and (pair? e) (eq? (car e) 'globalref)) tab)
         ((and (pair? e) (eq? (car e) 'break-block)) (free-vars- (caddr e) tab))
         ((and (pair? e) (eq? (car e) 'with-static-parameters)) (free-vars- (cadr e) tab))
         ((or (atom? e) (quoted? e)) tab)
@@ -3435,14 +3493,14 @@ f(x) = yt(x)
         (s (make-ssavalue)))
     `((thunk ,(linearize `(lambda ()
          (() () 0 ())
-         (block (global ,name) (const ,name)
+         (block (global ,name)
                 ,@(map (lambda (p n) `(= ,p (call (core TypeVar) ',n (core Any)))) P names)
                 (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@P)
                             (call (core svec) ,@(map quotify fields))
                             (call (core svec))
                             (false) ,(length fields)))
                 (call (core _setsuper!) ,s ,super)
-                (= (outerref ,name) ,s)
+                (const (globalref (thismodule) ,name) ,s)
                 (call (core _typebody!) ,s (call (core svec) ,@types))
                 (return (null)))))))))
 
@@ -3450,13 +3508,13 @@ f(x) = yt(x)
   (let ((s (make-ssavalue)))
     `((thunk ,(linearize `(lambda ()
        (() () 0 ())
-       (block (global ,name) (const ,name)
+       (block (global ,name)
               (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec))
                           (call (core svec) ,@(map quotify fields))
                           (call (core svec))
                           (false) ,(length fields)))
               (call (core _setsuper!) ,s ,super)
-              (= (outerref ,name) ,s)
+              (const (globalref (thismodule) ,name) ,s)
               (call (core _typebody!) ,s
                     (call (core svec) ,@(map (lambda (v) '(core Box)) fields)))
               (return (null)))))))))
@@ -3492,9 +3550,9 @@ f(x) = yt(x)
 (define (clear-capture-bits vinfos)
   (map vinfo:not-capt vinfos))
 
-(define (convert-lambda lam fname interp capt-sp opaq)
+(define (convert-lambda lam fname interp capt-sp opaq parsed-method-stack)
   (let ((body (add-box-inits-to-body
-               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam)))))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq parsed-method-stack (table) (vinfo-to-table (car (lam:vinfo lam)))))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
@@ -3569,7 +3627,7 @@ f(x) = yt(x)
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp opaq globals locals)
+(define (convert-assignment var rhs0 fname lam interp opaq parsed-method-stack globals locals)
   (cond
     ((symbol? var)
      (let* ((vi (get locals var #f))
@@ -3587,7 +3645,7 @@ f(x) = yt(x)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
-                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam))
+                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq parsed-method-stack (table) locals) #t lam))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
@@ -3601,7 +3659,7 @@ f(x) = yt(x)
                  `(block (= ,rhs1 ,rhs0)
                          ,ex
                          ,rhs1))))))
-     ((or (outerref? var) (globalref? var))
+     ((globalref? var)
       (convert-global-assignment var rhs0 globals lam))
      ((ssavalue? var)
       `(= ,var ,rhs0))
@@ -3712,7 +3770,7 @@ f(x) = yt(x)
   (Set '(quote top core lineinfo line inert local-def unnecessary copyast
          meta inbounds boundscheck loopinfo decl aliasscope popaliasscope
          thunk with-static-parameters toplevel-only
-         global globalref outerref const-if-global thismodule
+         global globalref assign-const-if-global thismodule
          const atomic null true false ssavalue isdefined toplevel module lambda
          error gc_preserve_begin gc_preserve_end import using export public inline noinline purity)))
 
@@ -3871,17 +3929,17 @@ f(x) = yt(x)
 (define (toplevel-preserving? e)
   (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table)))
   (if toplevel
       (map (lambda (x)
              (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
                                                   (and toplevel (toplevel-preserving? x))
-                                                  interp opaq globals locals))))
+                                                  interp opaq parsed-method-stack globals locals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq parsed-method-stack globals locals)) exprs)))
 
 (define (prepare-lambda! lam)
   ;; mark all non-arguments as assigned, since locals that are never assigned
@@ -3890,11 +3948,17 @@ f(x) = yt(x)
             (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
   (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+;; must start with a hash and second character must be numeric
+(define (anon-function-name? str)
+  (and (>= (string-length str) 2)
+       (char=? (string.char str 0) #\#)
+       (char-numeric? (string.char str 1))))
+
+(define (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table)))
   (if (and (not lam)
            (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3913,7 +3977,10 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals)))))
+                                 ,(let ((convt (cl-convert typ fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
+                                    (if (or (symbol-like? convt) (quoted? convt))
+                                        convt
+                                        (renumber-assigned-ssavalues convt)))))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
@@ -3937,7 +4004,7 @@ f(x) = yt(x)
        ((atom? e) e)
        (else
         (case (car e)
-          ((quote top core globalref outerref thismodule lineinfo line break inert module toplevel null true false meta) e)
+          ((quote top core globalref thismodule lineinfo line break inert module toplevel null true false meta) e)
           ((toplevel-only)
            ;; hack to avoid generating a (method x) expr for struct types
            (if (eq? (cadr e) 'struct)
@@ -3945,8 +4012,8 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals)))
-             (convert-assignment var rhs fname lam interp opaq globals locals)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
+             (convert-assignment var rhs fname lam interp opaq parsed-method-stack globals locals)))
           ((local-def) ;; make new Box for local declaration of defined variable
            (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
@@ -3963,10 +4030,6 @@ f(x) = yt(x)
            (put! globals (binding-to-globalref (cadr e)) #f)
            e)
           ((atomic) e)
-          ((const-if-global)
-           (if (local-in? (cadr e) lam locals)
-               '(null)
-               `(const ,(cadr e))))
           ((isdefined) ;; convert isdefined expr to function for closure converted variables
            (let* ((sym (cadr e))
                   (vi (and (symbol? sym) (get locals sym #f)))
@@ -3986,9 +4049,10 @@ f(x) = yt(x)
                         e))
                    (else e))))
           ((_opaque_closure)
-           (let* ((isva  (caddr e))
-                  (nargs (cadddr e))
-                  (functionloc (caddddr e))
+           (let* ((isva  (car (cddddr e)))
+                  (nargs (cadr (cddddr e)))
+                  (allow-partial (caddr (cddddr e)))
+                  (functionloc   (cadddr (cddddr e)))
                   (lam2  (last e))
                   (vis   (lam:vinfo lam2))
                   (cvs   (map car (cadr vis))))
@@ -4000,8 +4064,8 @@ f(x) = yt(x)
                                            v)))
                                    cvs)))
                `(new_opaque_closure
-                 ,(cadr e) (call (core apply_type) (core Union)) (core Any)
-                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs)))
+                 ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial
+                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) parsed-method-stack))
                  ,@var-exprs))))
           ((method)
            (let* ((name  (method-expr-name e))
@@ -4010,12 +4074,12 @@ f(x) = yt(x)
                   (vis   (if short '(() () ()) (lam:vinfo lam2)))
                   (cvs   (map car (cadr vis)))
                   (local? (lambda (s) (and lam (symbol? s) (local-in? s lam locals))))
-                  (local (and (not (outerref? (cadr e))) (local? name)))
+                  (local (and (not (globalref? (cadr e))) (local? name)))
                   (sig      (and (not short) (caddr e)))
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp opaq globals locals)))
+                                                fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -4042,22 +4106,22 @@ f(x) = yt(x)
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp opaq globals locals)
+                                          fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table)
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq parsed-method-stack (table)
                                                             (vinfo-to-table (car (lam:vinfo lam2)))))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
                                         ,body)))))
                        (else
-                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f)))
+                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f parsed-method-stack)))
                                (top-stmts (cdr exprs))
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals)
+                                   (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                            ,(julia-bq-macro newlam)))
                             ,@top-stmts))))
 
@@ -4066,9 +4130,11 @@ f(x) = yt(x)
                         (type-name  (or (get namemap name #f)
                                         (and name
                                              (symbol (string (if (= (string.char (string name) 0) #\#)
-                                                                 ""
-                                                                 "#")
-                                                             name "#" (current-julia-module-counter))))))
+                                                                  (if (anon-function-name? (string name))
+                                                                    (string "#" (current-julia-module-counter parsed-method-stack))
+                                                                    name)
+                                                                  (string "#" name))
+                                                              "#" (current-julia-module-counter parsed-method-stack))))))
                         (alldefs (expr-find-all
                                   (lambda (ex) (and (length> ex 2) (eq? (car ex) 'method)
                                                     (not (eq? ex e))
@@ -4160,12 +4226,12 @@ f(x) = yt(x)
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
                                                                       (car (lam:args lam2)))
-                                                                  #f closure-param-names #f)))))))
+                                                                  #f closure-param-names #f parsed-method-stack)))))))
                         (mk-closure  ;; expression to make the closure
                          (let* ((var-exprs (map (lambda (v)
                                                   (let ((cv (assq v (cadr (lam:vinfo lam)))))
@@ -4199,7 +4265,7 @@ f(x) = yt(x)
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp opaq globals locals)
+                           ,(convert-assignment name mk-closure fname lam interp opaq parsed-method-stack globals locals)
                            ,@typedef
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
@@ -4213,14 +4279,14 @@ f(x) = yt(x)
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp opaq globals (vinfo-to-table (car (lam:vinfo e))))))
+                                       interp opaq parsed-method-stack globals (vinfo-to-table (car (lam:vinfo e))))))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
@@ -4235,15 +4301,23 @@ f(x) = yt(x)
                            (put! globals ref #t)
                            `(block
                              (toplevel-only set_binding_type! ,(cadr e))
-                             (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
+                             (globaldecl ,ref ,(caddr e))
+                             (null)))
                          `(call (core typeassert) ,@(cdr e))))
-                   fname lam namemap defined toplevel interp opaq globals locals))))
+                   fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))))))
+
+;; wrapper for `cl-convert-`
+(define (cl-convert e fname lam namemap defined toplevel interp opaq (parsed-method-stack '()) (globals (table)) (locals (table)))
+  (if (is-method? e)
+      (let ((name (method-expr-name e)))
+        (cl-convert- e fname lam namemap defined toplevel interp opaq (cons name parsed-method-stack) globals locals))
+      (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
 
 (define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
 
@@ -4258,11 +4332,8 @@ f(x) = yt(x)
         (else (for-each linearize (cdr e))))
   e)
 
-;; N.B.: This assumes that resolve-scopes has run, so outerref is equivalent to
-;; a global in the current scope.
 (define (valid-ir-argument? e)
   (or (simple-atom? e)
-      (and (outerref? e) (nothrow-julia-global (cadr e)))
       (and (globalref? e) (nothrow-julia-global (cadr e) (caddr e)))
       (and (pair? e)
            (memq (car e) '(quote inert top core
@@ -4272,7 +4343,7 @@ f(x) = yt(x)
   (or (ssavalue? lhs)
       (valid-ir-argument? e)
       (and (symbol? lhs) (pair? e)
-           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref)))))
+           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure globalref)))))
 
 (define (valid-ir-return? e)
   ;; returning lambda directly is needed for @generated
@@ -4459,7 +4530,7 @@ f(x) = yt(x)
         ;; left-to-right evaluation semantics.
         (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
                                               (and (pair? x)
-                                                   (memq (car x) '(quote inert top core globalref outerref)))))
+                                                   (memq (car x) '(quote inert top core globalref)))))
                               lst)))
           (let loop ((lst  lst)
                      (vals '()))
@@ -4529,19 +4600,17 @@ f(x) = yt(x)
     ;; from the current function.
     (define (compile e break-labels value tail)
       (if (or (not (pair? e)) (memq (car e) '(null true false ssavalue quote inert top core copyast the_exception $
-                                                   globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall)))
+                                                   globalref thismodule cdecl stdcall fastcall thiscall llvmcall)))
           (let ((e1 (if (and arg-map (symbol? e))
                         (get arg-map e e)
                         e)))
             (if (and value (or (underscore-symbol? e)
-                               (and (pair? e) (or (eq? (car e) 'outerref)
-                                                  (eq? (car e) 'globalref))
+                               (and (pair? e) (eq? (car e) 'globalref)
                                     (underscore-symbol? (cadr e)))))
                 (error (string "all-underscore identifiers are write-only and their values cannot be used in expressions" (format-loc current-loc))))
             (cond (tail  (emit-return tail e1))
                   (value e1)
                   ((symbol? e1) (emit e1) #f)  ;; keep symbols for undefined-var checking
-                  ((and (pair? e1) (eq? (car e1) 'outerref)) (emit e1) #f)  ;; keep globals for undefined-var checking
                   ((and (pair? e1) (eq? (car e1) 'globalref)) (emit e1) #f) ;; keep globals for undefined-var checking
                   (else #f)))
           (case (car e)
@@ -4565,17 +4634,17 @@ f(x) = yt(x)
                               (cons (cadr e) (cons fptr (cdddr e)))))
                            ;; Leave a literal lambda in place for later global expansion
                            ((eq? (car e) 'new_opaque_closure)
-                            (let* ((oc_method (car (list-tail (cdr e) 3))) ;; opaque_closure_method
+                            (let* ((oc_method (car (list-tail (cdr e) 4))) ;; opaque_closure_method
                                    (lambda (list-ref oc_method 5))
                                    (lambda (linearize lambda)))
                               (append
-                               (compile-args (list-head (cdr e) 3) break-labels)
+                               (compile-args (list-head (cdr e) 4) break-labels)
                                (list (append (butlast oc_method) (list lambda)))
-                               (compile-args (list-tail (cdr e) 4) break-labels))))
+                               (compile-args (list-tail (cdr e) 5) break-labels))))
                            ;; NOTE: 1st argument to cglobal treated same as for ccall
                            ((and (length> e 2)
                                  (or (eq? (cadr e) 'cglobal)
-                                     (equal? (cadr e) '(outerref cglobal))))
+                                     (equal? (cadr e) '(globalref (thismodule) cglobal))))
                             (append (list (cadr e))
                                     (if (atom-or-not-tuple-call? (caddr e))
                                         (compile-args (list (caddr e)) break-labels)
@@ -4827,6 +4896,12 @@ f(x) = yt(x)
             ((global) ; keep global declarations as statements
              (if value (error "misplaced \"global\" declaration"))
              (emit e))
+            ((globaldecl)
+             (if value (error "misplaced \"global\" declaration"))
+             (if (atom? (caddr e)) (emit e)
+              (let ((rr (make-ssavalue)))
+                (emit `(= ,rr ,(caddr e)))
+                (emit `(globaldecl ,(cadr e) ,rr)))))
             ((local-def) #f)
             ((local) #f)
             ((moved-local)
@@ -4841,7 +4916,7 @@ f(x) = yt(x)
                          (set! global-const-error current-loc))
                      (emit e))))
             ((atomic) (error "misplaced atomic declaration"))
-            ((isdefined) (if tail (emit-return tail e) e))
+            ((isdefined throw_undef_if_not) (if tail (emit-return tail e) e))
             ((boundscheck) (if tail (emit-return tail e) e))
 
             ((method)
@@ -5021,13 +5096,13 @@ f(x) = yt(x)
              (list ,@(cadr vi)) ,(caddr vi) (list ,@(cadddr vi)))
        ,@(cdddr lam))))
 
-(define (make-lineinfo name file line (inlined-at #f))
-  `(lineinfo (thismodule) ,(if inlined-at '|macro expansion| name) ,file ,line ,(or inlined-at 0)))
+(define (make-lineinfo file line (inlined-at #f))
+  `(lineinfo ,file ,line ,(or inlined-at 0)))
 
 (define (set-lineno! lineinfo num)
-  (set-car! (cddddr lineinfo) num))
+  (set-car! (cddr lineinfo) num))
 
-(define (compact-ir body name file line)
+(define (compact-ir body file line)
   (let ((code         '(block))
         (locs         '(list))
         (linetable    '(list))
@@ -5044,7 +5119,7 @@ f(x) = yt(x)
       (or e (raise "missing value in IR"))
       (if (and (null? (cdr linetable))
                (not (and (pair? e) (eq? (car e) 'meta))))
-          (begin (set! linetable (cons (make-lineinfo name file line) linetable))
+          (begin (set! linetable (cons (make-lineinfo file line) linetable))
                  (set! linetablelen (+ linetablelen 1))
                  (set! current-loc 1)))
       (set! code (cons e code))
@@ -5071,8 +5146,8 @@ f(x) = yt(x)
                             (if (pair? (cddr e))
                                 (set! current-file (caddr e)))
                             (set! linetable (cons (if (null? locstack)
-                                                      (make-lineinfo name current-file current-line)
-                                                      (make-lineinfo name current-file current-line (caar locstack)))
+                                                      (make-lineinfo current-file current-line)
+                                                      (make-lineinfo current-file current-line (caar locstack)))
                                                   linetable))
                             (set! linetablelen (+ linetablelen 1))
                             (set! current-loc linetablelen)))))
@@ -5080,7 +5155,7 @@ f(x) = yt(x)
                    (set! locstack (cons (list current-loc current-line current-file) locstack))
                    (set! current-file (caddr e))
                    (set! current-line 0)
-                   (set! linetable (cons (make-lineinfo name current-file current-line current-loc) linetable))
+                   (set! linetable (cons (make-lineinfo current-file current-line current-loc) linetable))
                    (set! linetablelen (+ linetablelen 1))
                    (set! current-loc linetablelen))
                   ((and (length= e 2) (eq? (car e) 'meta) (eq? (cadr e) 'pop_loc))
@@ -5106,7 +5181,6 @@ f(x) = yt(x)
 
 (define (renumber-lambda lam file line)
   (let* ((stuff (compact-ir (lam:body lam)
-                            (if (null? (cadr lam)) '|top-level scope| 'none)
                             file line))
          (code (aref stuff 0))
          (locs (aref stuff 1))
@@ -5127,12 +5201,10 @@ f(x) = yt(x)
                      (if idx
                          `(static_parameter ,idx)
                          e)))))
-            ((and (pair? e) (eq? (car e) 'outerref))
-             (cadr e))
             ((nospecialize-meta? e)
              ;; convert nospecialize vars to slot numbers
              `(meta ,(cadr e) ,@(map renumber-stuff (cddr e))))
-            ((or (atom? e) (quoted? e) (eq? (car e) 'global))
+            ((or (atom? e) (quoted? e) (memq (car e) '(using import export public global toplevel)))
              e)
             ((ssavalue? e)
              (let ((idx (get ssavalue-table (cadr e) #f)))
diff --git a/src/julia.expmap.in b/src/julia.expmap.in
index 213d087fdc2ad..e5f9ee890205f 100644
--- a/src/julia.expmap.in
+++ b/src/julia.expmap.in
@@ -1,7 +1,7 @@
 @JULIA_SHLIB_SYMBOL_VERSION@ {
   global:
     pthread*;
-    __stack_chk_guard;
+    __stack_chk_*;
     asprintf;
     bitvector_*;
     ios_*;
diff --git a/src/julia.h b/src/julia.h
index 5c3a983367470..abb8a57ff13b0 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -27,30 +27,23 @@
 
 #include <setjmp.h>
 #ifndef _OS_WINDOWS_
-#  define jl_jmp_buf sigjmp_buf
-#  if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
-#    define MAX_ALIGN 8
-#  elif defined(_CPU_AARCH64_)
-// int128 is 16 bytes aligned on aarch64
-#    define MAX_ALIGN 16
-#  elif defined(_P64)
-// Generically we assume MAX_ALIGN is sizeof(void*)
-#    define MAX_ALIGN 8
-#  else
-#    define MAX_ALIGN 4
-#  endif
+    #define jl_jmp_buf sigjmp_buf
 #else
-#  include "win32_ucontext.h"
-#  define jl_jmp_buf jmp_buf
-#  define MAX_ALIGN 8
+    #include "win32_ucontext.h"
+    #define jl_jmp_buf jmp_buf
 #endif
 
 // Define the largest size (bytes) of a properly aligned object that the
-// processor family and compiler typically supports without a lock
-// (assumed to be at least a pointer size). Since C is bad at handling 16-byte
-// types, we currently use 8 here as the default.
+// processor family (MAX_ATOMIC_SIZE) and compiler (MAX_POINTERATOMIC_SIZE)
+// typically supports without a lock (assumed to be at least a pointer size)
+// with MAX_POINTERATOMIC_SIZE >= MAX_ATOMIC_SIZE.
+#ifdef _P64
+#define MAX_ATOMIC_SIZE 16
+#define MAX_POINTERATOMIC_SIZE 16
+#else
 #define MAX_ATOMIC_SIZE 8
 #define MAX_POINTERATOMIC_SIZE 8
+#endif
 
 #ifdef _P64
 #define NWORDS(sz) (((sz)+7)>>3)
@@ -77,6 +70,7 @@ typedef struct _jl_tls_states_t *jl_ptls_t;
 #ifdef JL_LIBRARY_EXPORTS
 #include "uv.h"
 #endif
+#include "gc-interface.h"
 #include "julia_atomics.h"
 #include "julia_threads.h"
 #include "julia_assert.h"
@@ -237,7 +231,10 @@ JL_DLLEXPORT extern const jl_callptr_t jl_fptr_interpret_call_addr;
 
 JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr;
 
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_wait_for_compiled_addr;
+
 typedef struct _jl_line_info_node_t {
+    JL_DATA_TYPE
     struct _jl_module_t *module;
     jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t
     jl_sym_t *file;
@@ -245,6 +242,19 @@ typedef struct _jl_line_info_node_t {
     int32_t inlined_at;
 } jl_line_info_node_t;
 
+struct jl_codeloc_t {
+    int32_t line;
+    int32_t to;
+    int32_t pc;
+};
+
+typedef struct _jl_debuginfo_t {
+    jl_value_t *def;
+    struct _jl_debuginfo_t *linetable; // or nothing
+    jl_svec_t *edges; // Memory{DebugInfo}
+    jl_value_t *codelocs; // String // Memory{UInt8} // compressed info
+} jl_debuginfo_t;
+
 // the following mirrors `struct EffectsOverride` in `base/compiler/effects.jl`
 typedef union __jl_purity_overrides_t {
     struct {
@@ -261,53 +271,57 @@ typedef union __jl_purity_overrides_t {
         uint16_t ipo_inaccessiblememonly : 1;
         uint16_t ipo_noub                : 1;
         uint16_t ipo_noub_if_noinbounds  : 1;
+        uint16_t ipo_consistent_overlay  : 1;
+        uint16_t ipo_nortcall            : 1;
     } overrides;
     uint16_t bits;
 } _jl_purity_overrides_t;
 
-#define NUM_EFFECTS_OVERRIDES 9
-#define NUM_IR_FLAGS 12
+#define NUM_EFFECTS_OVERRIDES 11
+#define NUM_IR_FLAGS 13
 
 // This type describes a single function body
 typedef struct _jl_code_info_t {
+    JL_DATA_TYPE
     // ssavalue-indexed arrays of properties:
     jl_array_t *code;  // Any array of statements
-    jl_value_t *codelocs; // Int32 array of indices into the line table
+    jl_debuginfo_t *debuginfo; // Table of edge data for each statement
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
     jl_array_t *ssaflags; // 32 bits flags associated with each statement:
         // 1 << 0 = inbounds region
         // 1 << 1 = callsite inline region
         // 1 << 2 = callsite noinline region
-        // 1 << 3 = throw block
-        // 1 << 4 = refined statement
-        // 1 << 5 = :consistent
-        // 1 << 6 = :effect_free
-        // 1 << 7 = :nothrow
-        // 1 << 8 = :terminates
-        // 1 << 9 = :noub
-        // 1 << 10 = :effect_free_if_inaccessiblememonly
-        // 1 << 11 = :inaccessiblemem_or_argmemonly
-        // 1 << 12-19 = callsite effects overrides
+        // 1 << 3 = refined statement
+        // 1 << 4 = :consistent
+        // 1 << 5 = :effect_free
+        // 1 << 6 = :nothrow
+        // 1 << 7 = :terminates
+        // 1 << 8 = :noub
+        // 1 << 9 = :effect_free_if_inaccessiblememonly
+        // 1 << 10 = :inaccessiblemem_or_argmemonly
+        // 1 << 11-19 = callsite effects overrides
     // miscellaneous data:
-    jl_value_t *linetable; // Table of locations [TODO: make this volatile like slotnames]
     jl_array_t *slotnames; // names of local variables
     jl_array_t *slotflags;  // local var bit flags
-    // the following are optional transient properties (not preserved by compression--as they typically get stored elsewhere):
+    // the following is a deprecated property (not preserved by compression)
     jl_value_t *slottypes; // inferred types of slots
+    // more inferred data:
+    jl_value_t *rettype; // return type relevant for fptr
     jl_method_instance_t *parent; // context (after inference, otherwise nothing)
+    // the following are required to cache the method correctly
+    jl_value_t *edges; // forward edge info (svec preferred, but tolerates Array{Any} and nothing token)
+    size_t min_world;
+    size_t max_world;
 
     // These may be used by generated functions to further constrain the resulting inputs.
-    // They are not used by any other part of the system and may be moved elsewhere in the
-    // future.
     jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
-    jl_value_t *edges; // forward edges to method instances that must be invalidated
-    size_t min_world;
-    size_t max_world;
+    size_t nargs;
 
     // various boolean properties:
     uint8_t propagate_inbounds;
     uint8_t has_fcall;
     uint8_t nospecializeinfer;
+    uint8_t isva;
     // uint8 settings
     uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline
     uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none
@@ -337,6 +351,7 @@ typedef struct _jl_method_t {
     jl_value_t *slot_syms; // compacted list of slot names (String)
     jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
+    jl_debuginfo_t *debuginfo;  // fixed linetable from the source argument, null if not available
     _Atomic(jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
     jl_array_t *roots;  // pointers in generated code (shared to reduce memory), or null
@@ -392,10 +407,8 @@ struct _jl_method_instance_t {
     } def; // pointer back to the context for this code
     jl_value_t *specTypes;  // argument types this was specialized for
     jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sig
-    _Atomic(jl_value_t*) uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
     jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
     _Atomic(struct _jl_code_instance_t*) cache;
-    uint8_t inInference; // flags to tell if inference is running on this object
     uint8_t cache_with_orig; // !cache_with_specTypes
     _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
 };
@@ -432,29 +445,21 @@ typedef struct _jl_code_instance_t {
     //               deleted to save space.
     // - null, indicating that inference was not yet completed or did not succeed
     _Atomic(jl_value_t *) inferred;
-    //TODO: jl_array_t *edges; // stored information about edges from this object
+    _Atomic(jl_debuginfo_t *) debuginfo; // stored information about edges from this object (set once, with a happens-before both source and invoke)
     //TODO: uint8_t absolute_max; // whether true max world is unknown
 
     // purity results
     // see also encode_effects() and decode_effects() in `base/compiler/effects.jl`,
-    uint32_t ipo_purity_bits;
-    // ipo_purity_flags:
-    //     uint8_t ipo_consistent          : 2;
-    //     uint8_t ipo_effect_free         : 2;
-    //     uint8_t ipo_nothrow             : 2;
-    //     uint8_t ipo_terminates          : 2;
-    //     uint8_t ipo_nonoverlayed        : 1;
-    //     uint8_t ipo_notaskstate         : 2;
-    //     uint8_t ipo_inaccessiblememonly : 2;
-    _Atomic(uint32_t) purity_bits;
+    _Atomic(uint32_t) ipo_purity_bits;
     // purity_flags:
-    //     uint8_t consistent          : 2;
+    //     uint8_t consistent          : 3;
     //     uint8_t effect_free         : 2;
-    //     uint8_t nothrow             : 2;
-    //     uint8_t terminates          : 2;
-    //     uint8_t nonoverlayed        : 1;
-    //     uint8_t notaskstate         : 2;
+    //     uint8_t nothrow             : 1;
+    //     uint8_t terminates          : 1;
+    //     uint8_t notaskstate         : 1;
     //     uint8_t inaccessiblememonly : 2;
+    //     uint8_t noub                : 2;
+    //     uint8_t nonoverlayed        : 2;
     jl_value_t *analysis_results; // Analysis results about this code (IPO-safe)
 
     // compilation state cache
@@ -517,6 +522,7 @@ typedef struct {
     uint8_t mayinlinealloc:1;
     uint8_t _reserved:5;
     uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
+    uint8_t constprop_heustic; // override for inference's constprop heuristic
 } jl_typename_t;
 
 typedef struct {
@@ -556,7 +562,10 @@ typedef struct {
         // metadata bit only for GenericMemory eltype layout
         uint16_t arrayelem_isboxed : 1;
         uint16_t arrayelem_isunion : 1;
-        uint16_t padding : 11;
+        // If set, this type's egality can be determined entirely by comparing
+        // the non-padding bits of this datatype.
+        uint16_t isbitsegal : 1;
+        uint16_t padding : 10;
     } flags;
     // union {
     //     jl_fielddesc8_t field8[nfields];
@@ -604,19 +613,84 @@ typedef struct _jl_weakref_t {
     jl_value_t *value;
 } jl_weakref_t;
 
+enum jl_partition_kind {
+    // Constant: This binding partition is a constant declared using `const`
+    //  ->restriction holds the constant value
+    BINDING_KIND_CONST        = 0x0,
+    // Import Constant: This binding partition is a constant declared using `import A`
+    //  ->restriction holds the constant value
+    BINDING_KIND_CONST_IMPORT = 0x1,
+    // Global: This binding partition is a global variable.
+    //  -> restriction holds the type restriction
+    BINDING_KIND_GLOBAL       = 0x2,
+    // Implicit: The binding was implicitly imported from a `using`'d module.
+    //  ->restriction holds the imported binding
+    BINDING_KIND_IMPLICIT     = 0x3,
+    // Explicit: The binding was explicitly `using`'d by name
+    //  ->restriction holds the imported binding
+    BINDING_KIND_EXPLICIT     = 0x4,
+    // Imported: The binding was explicitly `import`'d by name
+    //  ->restriction holds the imported binding
+    BINDING_KIND_IMPORTED     = 0x5,
+    // Failed: We attempted to import the binding, but the import was ambiguous
+    //  ->restriction is NULL.
+    BINDING_KIND_FAILED       = 0x6,
+    // Declared: The binding was declared using `global` or similar
+    //  ->restriction is NULL.
+    BINDING_KIND_DECLARED     = 0x7,
+    // Guard: The binding was looked at, but no global or import was resolved at the time
+    //  ->restriction is NULL.
+    BINDING_KIND_GUARD        = 0x8
+};
+
+#ifdef _P64
+// Union of a ptr and a 3 bit field.
+typedef uintptr_t jl_ptr_kind_union_t;
+#else
+typedef struct __attribute__((aligned(8))) { jl_value_t *val; size_t kind; } jl_ptr_kind_union_t;
+#endif
+typedef struct __attribute__((aligned(8))) _jl_binding_partition_t {
+    JL_DATA_TYPE
+    /* union {
+     *   // For ->kind == BINDING_KIND_GLOBAL
+     *   jl_value_t *type_restriction;
+     *   // For ->kind == BINDING_KIND_CONST(_IMPORT)
+     *   jl_value_t *constval;
+     *   // For ->kind in (BINDING_KIND_IMPLICIT, BINDING_KIND_EXPLICIT, BINDING_KIND_IMPORT)
+     *   jl_binding_t *imported;
+     * } restriction;
+     *
+     * Currently: Low 3 bits hold ->kind on _P64 to avoid needing >8 byte atomics
+     *
+     * This field is updated atomically with both kind and restriction. The following
+     * transitions are allowed and modeled by the system:
+     *
+     *  GUARD -> any
+     *  (DECLARED, FAILED) -> any non-GUARD
+     *  IMPLICIT -> {EXPLICIT, IMPORTED} (->restriction unchanged only)
+     *
+     * In addition, we permit (with warning about undefined behavior) changing the restriction
+     * pointer for CONST(_IMPORT).
+     *
+     * All other kind or restriction transitions are disallowed.
+     */
+    _Atomic(jl_ptr_kind_union_t) restriction;
+    size_t min_world;
+    _Atomic(size_t) max_world;
+    _Atomic(struct _jl_binding_partition_t*) next;
+    size_t reserved; // Reserved for ->kind. Currently this holds the low bits of ->restriction during serialization
+} jl_binding_partition_t;
+
 typedef struct _jl_binding_t {
     JL_DATA_TYPE
-    _Atomic(jl_value_t*) value;
     jl_globalref_t *globalref;  // cached GlobalRef for this binding
-    _Atomic(struct _jl_binding_t*) owner;  // for individual imported bindings (NULL until 'resolved')
-    _Atomic(jl_value_t*) ty;  // binding type
-    uint8_t constp:1;
+    _Atomic(jl_value_t*) value;
+    _Atomic(jl_binding_partition_t*) partitions;
+    uint8_t declared:1;
     uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp`
     uint8_t publicp:1; // exportp without publicp is not allowed.
-    uint8_t imported:1;
-    uint8_t usingfailed:1;
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
-    uint8_t padding:1;
+    uint8_t padding:3;
 } jl_binding_t;
 
 typedef struct {
@@ -646,6 +720,7 @@ typedef struct _jl_module_t {
 } jl_module_t;
 
 struct _jl_globalref_t {
+    JL_DATA_TYPE
     jl_module_t *mod;
     jl_sym_t *name;
     jl_binding_t *binding;
@@ -822,6 +897,7 @@ extern JL_DLLIMPORT jl_value_t *jl_bottom_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_method_instance_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_code_instance_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_code_info_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_debuginfo_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_method_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_module_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_addrspace_type JL_GLOBALLY_ROOTED;
@@ -845,6 +921,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_fielderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_missingcodeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
@@ -906,6 +983,7 @@ extern JL_DLLIMPORT jl_value_t *jl_memoryref_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_memoryref_any_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_binding_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_binding_partition_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
@@ -1027,39 +1105,14 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 
 #endif
 
-JL_DLLEXPORT int jl_gc_enable(int on);
-JL_DLLEXPORT int jl_gc_is_enabled(void);
-
-typedef enum {
-    JL_GC_AUTO = 0,         // use heuristics to determine the collection type
-    JL_GC_FULL = 1,         // force a full collection
-    JL_GC_INCREMENTAL = 2,  // force an incremental collection
-} jl_gc_collection_t;
-
-JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
-
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz);
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, struct _jl_task_t *owner) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
-JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
-// Set GC memory trigger in bytes for greedy memory collecting
-JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
-JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
-
-JL_DLLEXPORT void jl_clear_malloc_data(void);
 
 // GC write barriers
-JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const void *stored, jl_datatype_t *dt) JL_NOTSAFEPOINT;
 
 STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
@@ -1091,11 +1144,10 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
         jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
 }
 
-JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
-JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
-                                         int isaligned, jl_value_t *owner);
 JL_DLLEXPORT void jl_gc_safepoint(void);
 JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);
+JL_DLLEXPORT void jl_safepoint_suspend_all_threads(struct _jl_task_t *ct);
+JL_DLLEXPORT void jl_safepoint_resume_all_threads(struct _jl_task_t *ct);
 JL_DLLEXPORT int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT;
 
 void *mtarraylist_get(small_arraylist_t *_a, size_t idx) JL_NOTSAFEPOINT;
@@ -1157,8 +1209,8 @@ STATIC_INLINE jl_value_t *jl_svecset(
 /*
   how - allocation style
   0 = data is inlined
-  1 = owns the gc-managed data, exclusively
-  2 = malloc-allocated pointer (may or may not own it)
+  1 = owns the gc-managed data, exclusively (will free it)
+  2 = malloc-allocated pointer (does not own it)
   3 = has a pointer to the object that owns the data pointer
 */
 STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
@@ -1479,6 +1531,7 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 #define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_type)
 #define jl_is_expr(v)        jl_typetagis(v,jl_expr_type)
 #define jl_is_binding(v)     jl_typetagis(v,jl_binding_type)
+#define jl_is_binding_partition(v) jl_typetagis(v,jl_binding_partition_type)
 #define jl_is_globalref(v)   jl_typetagis(v,jl_globalref_type)
 #define jl_is_gotonode(v)    jl_typetagis(v,jl_gotonode_type)
 #define jl_is_gotoifnot(v)   jl_typetagis(v,jl_gotoifnot_type)
@@ -1777,14 +1830,14 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_gensym(void);
 JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world);
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_int8(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_uint8(uint8_t x) JL_NOTSAFEPOINT;
@@ -1923,9 +1976,9 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import);
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
@@ -1940,7 +1993,8 @@ JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_s
 JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs);
 JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs);
 JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
@@ -2000,7 +2054,7 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname,
                                                jl_value_t *ty JL_MAYBE_UNROOTED,
                                                jl_value_t *got JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_sym_t *type_name, jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var);
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED,
                                               jl_value_t *t JL_MAYBE_UNROOTED);
@@ -2014,17 +2068,6 @@ JL_DLLEXPORT void JL_NORETURN jl_bounds_error_unboxed_int(void *v, jl_value_t *v
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_ints(jl_value_t *v JL_MAYBE_UNROOTED,
                                                    size_t *idxs, size_t nidxs);
 
-// Return the exception currently being handled, or `jl_nothing`.
-//
-// The catch scope is determined dynamically so this works in functions called
-// from a catch block.  The returned value is gc rooted until we exit the
-// enclosing JL_CATCH.
-// FIXME: Teach the static analyzer about this rather than using
-// JL_GLOBALLY_ROOTED which is far too optimistic.
-JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_exception_occurred(void);
-JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
-
 #define JL_NARGS(fname, min, max)                               \
     if (nargs < min) jl_too_few_args(#fname, min);              \
     else if (nargs > max) jl_too_many_args(#fname, max);
@@ -2070,6 +2113,7 @@ JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t e
 JL_DLLEXPORT void jl_restore_system_image(const char *fname);
 JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
 JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
+JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
 JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);
@@ -2145,7 +2189,9 @@ JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_value_t *data, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms);
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
 JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
-
+JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_value_t *cl, size_t pc) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts);
+JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_value_t *cl, size_t nstmts);
 
 JL_DLLEXPORT int jl_is_operator(const char *sym);
 JL_DLLEXPORT int jl_is_unary_operator(const char *sym);
@@ -2254,11 +2300,7 @@ typedef struct _jl_task_t {
     // current exception handler
     jl_handler_t *eh;
     // saved thread state
-    jl_ucontext_t ctx;
-    void *stkbuf; // malloc'd memory (either copybuf or stack)
-    size_t bufsz; // actual sizeof stkbuf
-    unsigned int copy_stack:31; // sizeof stack for copybuf
-    unsigned int started:1;
+    jl_ucontext_t ctx; // pointer into stkbuf, if suspended
 } jl_task_t;
 
 #define JL_TASK_STATE_RUNNABLE 0
@@ -2271,7 +2313,6 @@ JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
-JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
 JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
@@ -2282,11 +2323,24 @@ extern JL_DLLIMPORT int jl_task_ptls_offset;
 
 #include "julia_locks.h"   // requires jl_task_t definition
 
-JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
-JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh);
-JL_DLLEXPORT void jl_pop_handler(int n);
-JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT;
+// Return the exception currently being handled, or `jl_nothing`.
+//
+// The catch scope is determined dynamically so this works in functions called
+// from a catch block.  The returned value is gc rooted until we exit the
+// enclosing JL_CATCH.
+// FIXME: Teach the static analyzer about this rather than using
+// JL_GLOBALLY_ROOTED which is far too optimistic.
+JL_DLLEXPORT jl_value_t *jl_current_exception(jl_task_t *ct) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_exception_occurred(void);
+JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_enter_handler(jl_task_t *ct, jl_handler_t *eh) JL_NOTSAFEPOINT ;
+JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh);
+JL_DLLEXPORT void jl_eh_restore_state_noexcept(jl_task_t *ct, jl_handler_t *eh) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_pop_handler(jl_task_t *ct, int n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_pop_handler_noexcept(jl_task_t *ct, int n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_excstack_state(jl_task_t *ct) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_restore_excstack(jl_task_t *ct, size_t state) JL_NOTSAFEPOINT;
 
 #if defined(_OS_WINDOWS_)
 #if defined(_COMPILER_GCC_)
@@ -2337,24 +2391,37 @@ extern void (*real_siglongjmp)(jmp_buf _Buf, int _Value);
 
 #ifdef __clang_gcanalyzer__
 
-// This is hard. Ideally we'd teach the static analyzer about the extra control
-// flow edges. But for now, just hide this as best we can
 extern int had_exception;
-#define JL_TRY if (1)
-#define JL_CATCH if (had_exception)
+
+// The analyzer assumes that the TRY block always executes to completion.
+// This can lead to both false positives and false negatives, since it doesn't model the fact that throwing always leaves the try block early.
+#define JL_TRY                                                      \
+    int i__try, i__catch; jl_handler_t __eh; jl_task_t *__eh_ct;    \
+    __eh_ct = jl_current_task;                                      \
+    size_t __excstack_state = jl_excstack_state(__eh_ct);           \
+    jl_enter_handler(__eh_ct, &__eh);                               \
+    __eh_ct->eh = &__eh;                                            \
+    for (i__try=1; i__try; i__try=0)
+
+#define JL_CATCH                                                    \
+    if (!had_exception)                                             \
+        jl_eh_restore_state_noexcept(__eh_ct, &__eh);               \
+    else                                                            \
+        for (i__catch=1, jl_eh_restore_state(__eh_ct, &__eh); i__catch; i__catch=0, /* CATCH BLOCK; */ jl_restore_excstack(__eh_ct, __excstack_state))
 
 #else
 
-#define JL_TRY                                                    \
-    int i__tr, i__ca; jl_handler_t __eh;                          \
-    size_t __excstack_state = jl_excstack_state();                \
-    jl_enter_handler(&__eh);                                      \
-    if (!jl_setjmp(__eh.eh_ctx,0))                                \
-        for (i__tr=1; i__tr; i__tr=0, jl_eh_restore_state(&__eh))
+#define JL_TRY                                                      \
+    int i__try, i__catch; jl_handler_t __eh; jl_task_t *__eh_ct;    \
+    __eh_ct = jl_current_task;                                      \
+    size_t __excstack_state = jl_excstack_state(__eh_ct);           \
+    jl_enter_handler(__eh_ct, &__eh);                               \
+    if (!jl_setjmp(__eh.eh_ctx, 0))                                 \
+        for (i__try=1, __eh_ct->eh = &__eh; i__try; i__try=0, /* TRY BLOCK; */ jl_eh_restore_state_noexcept(__eh_ct, &__eh))
 
-#define JL_CATCH                                                \
-    else                                                        \
-        for (i__ca=1, jl_eh_restore_state(&__eh); i__ca; i__ca=0, jl_restore_excstack(__excstack_state))
+#define JL_CATCH                                                    \
+    else                                                            \
+        for (i__catch=1, jl_eh_restore_state(__eh_ct, &__eh); i__catch; i__catch=0, /* CATCH BLOCK; */ jl_restore_excstack(__eh_ct, __excstack_state))
 
 #endif
 
@@ -2389,19 +2456,24 @@ typedef struct {
 } jl_uv_file_t;
 
 #ifdef __GNUC__
-#define _JL_FORMAT_ATTR(type, str, arg) \
-    __attribute__((format(type, str, arg)))
+#  ifdef __MINGW32__
+#define _JL_FORMAT_ATTR(str, arg) \
+    __attribute__((format(__MINGW_PRINTF_FORMAT, str, arg)))
+#  else
+#define _JL_FORMAT_ATTR(str, arg) \
+    __attribute__((format(printf, str, arg)))
+#  endif
 #else
-#define _JL_FORMAT_ATTR(type, str, arg)
+#define _JL_FORMAT_ATTR(str, arg)
 #endif
 
 JL_DLLEXPORT void jl_uv_puts(struct uv_stream_s *stream, const char *str, size_t n);
 JL_DLLEXPORT int jl_printf(struct uv_stream_s *s, const char *format, ...)
-    _JL_FORMAT_ATTR(printf, 2, 3);
+    _JL_FORMAT_ATTR(2, 3);
 JL_DLLEXPORT int jl_vprintf(struct uv_stream_s *s, const char *format, va_list args)
-    _JL_FORMAT_ATTR(printf, 2, 0);
+    _JL_FORMAT_ATTR(2, 0);
 JL_DLLEXPORT void jl_safe_printf(const char *str, ...) JL_NOTSAFEPOINT
-    _JL_FORMAT_ATTR(printf, 1, 2);
+    _JL_FORMAT_ATTR(1, 2);
 
 extern JL_DLLEXPORT JL_STREAM *JL_STDIN;
 extern JL_DLLEXPORT JL_STREAM *JL_STDOUT;
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
index 1c0929717b293..1f35d3693fefd 100644
--- a/src/julia_fasttls.h
+++ b/src/julia_fasttls.h
@@ -22,14 +22,9 @@ extern "C" {
 
 typedef struct _jl_gcframe_t jl_gcframe_t;
 
-#if defined(_OS_DARWIN_)
-#include <pthread.h>
-typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific)
-#else
 typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
-#endif
 
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_)
 #define JULIA_DEFINE_FAST_TLS                                                                   \
 static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
 JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore;                                        \
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index c65586b85547a..05140e4b09ace 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -39,8 +39,8 @@ typedef void (*jl_gc_cb_notify_gc_pressure_t)(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable);
 
 // Types for custom mark and sweep functions.
-typedef uintptr_t (*jl_markfunc_t)(jl_ptls_t, jl_value_t *obj);
-typedef void (*jl_sweepfunc_t)(jl_value_t *obj);
+typedef uintptr_t (*jl_markfunc_t)(jl_ptls_t, jl_value_t *obj) JL_NOTSAFEPOINT;
+typedef void (*jl_sweepfunc_t)(jl_value_t *obj) JL_NOTSAFEPOINT;
 
 // Function to create a new foreign type with custom
 // mark and sweep functions.
@@ -60,10 +60,10 @@ JL_DLLEXPORT int jl_reinit_foreign_type(
         jl_markfunc_t markfunc,
         jl_sweepfunc_t sweepfunc);
 
-JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt);
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void);
-JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void);
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) JL_NOTSAFEPOINT;
 
 // Field layout descriptor for custom types that do
 // not fit Julia layout conventions. This is associated with
@@ -80,9 +80,9 @@ JL_DLLEXPORT void *jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty);
 // Queue an object or array of objects for scanning by the garbage collector.
 // These functions must only be called from within a root scanner callback
 // or from within a custom mark function.
-JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj);
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
-    jl_value_t **objs, size_t nobjs);
+    jl_value_t **objs, size_t nobjs) JL_NOTSAFEPOINT;
 
 // Sweep functions will not automatically be called for objects of
 // foreign types, as that may not always be desired. Only calling
@@ -133,7 +133,7 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
 // jl_typeof(obj) is an actual type object.
 //
 // NOTE: Only valid to call from within a GC context.
-JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p);
+JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT;
 
 // Return a non-null pointer to the start of the stack area if the task
 // has an associated stack buffer. In that case, *size will also contain
@@ -150,7 +150,7 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
 // and may not be tight.
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
-                                       char **total_start, char **total_end);
+                                       char **total_start, char **total_end) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 76556e288f387..f00667d016796 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -13,10 +13,13 @@
 #include "support/strtod.h"
 #include "gc-alloc-profiler.h"
 #include "support/rle.h"
+#include <ctype.h>
 #include <stdint.h>
 #include <uv.h>
 #include <llvm-c/Types.h>
 #include <llvm-c/Orc.h>
+#include <llvm-version.h>
+
 #if !defined(_WIN32)
 #include <unistd.h>
 #else
@@ -63,7 +66,8 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
        that we're resetting to. The idea is to remove the poison from the frames
        that we're skipping over, since they won't be unwound. */
     uintptr_t top = jmpbuf_sp(buf);
-    uintptr_t bottom = (uintptr_t)ct->stkbuf;
+    uintptr_t bottom = (uintptr_t)(ct->ctx.copy_stack ? (char*)ct->ptls->stackbase  - ct->ptls->stacksize : (char*)ct->ctx.stkbuf);
+    //uintptr_t bottom = (uintptr_t)&top;
     __asan_unpoison_stack_memory(bottom, top - bottom);
 }
 static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) {
@@ -98,6 +102,26 @@ JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
 JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
 
+#ifndef _OS_WINDOWS_
+    #if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
+        #define MAX_ALIGN 8
+    #elif defined(_CPU_AARCH64_) || (JL_LLVM_VERSION >= 180000 && (defined(_CPU_X86_64_) || defined(_CPU_X86_)))
+    // int128 is 16 bytes aligned on aarch64 and on x86 with LLVM >= 18
+        #define MAX_ALIGN 16
+    #elif defined(_P64)
+    // Generically we assume MAX_ALIGN is sizeof(void*)
+        #define MAX_ALIGN 8
+    #else
+        #define MAX_ALIGN 4
+    #endif
+#else
+    #if JL_LLVM_VERSION >= 180000
+        #define MAX_ALIGN 16
+    #else
+        #define MAX_ALIGN 8
+    #endif
+#endif
+
 #ifndef alignof
 #  ifndef __cplusplus
 #    ifdef __GNUC__
@@ -311,7 +335,7 @@ static inline void memassign_safe(int hasptr, char *dst, const jl_value_t *src,
     memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
 }
 
-// -- gc.c -- //
+// -- GC -- //
 
 #define GC_CLEAN  0 // freshly allocated
 #define GC_MARKED 1 // reachable and young
@@ -325,6 +349,7 @@ extern JL_DLLIMPORT jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
 extern jl_methtable_t *jl_kwcall_mt JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_method_t *jl_opaque_closure_method JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
+extern jl_debuginfo_t *jl_nulldebuginfo JL_GLOBALLY_ROOTED;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
 extern tracer_cb jl_newmeth_tracer;
@@ -333,6 +358,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
 JL_DLLEXPORT extern arraylist_t jl_image_relocs;  // external linkage: sysimg/pkgimages
+JL_DLLEXPORT extern arraylist_t jl_top_mods;  // external linkage: sysimg/pkgimages
 extern arraylist_t eytzinger_image_tree;
 extern arraylist_t eytzinger_idxs;
 
@@ -344,15 +370,10 @@ extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT extern int jl_lineno;
 JL_DLLEXPORT extern const char *jl_filename;
 
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset,
                                    int osize);
 jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
 JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
-extern uv_mutex_t gc_perm_lock;
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
 void gc_sweep_sysimg(void);
 
 
@@ -403,7 +424,7 @@ static const int jl_gc_sizeclasses[] = {
 #ifdef GC_SMALL_PAGE
 #ifdef _P64
 #  define JL_GC_N_POOLS 39
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
 #  define JL_GC_N_POOLS 40
 #else
 #  define JL_GC_N_POOLS 41
@@ -411,7 +432,7 @@ static const int jl_gc_sizeclasses[] = {
 #else
 #ifdef _P64
 #  define JL_GC_N_POOLS 49
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
 #  define JL_GC_N_POOLS 50
 #else
 #  define JL_GC_N_POOLS 51
@@ -426,7 +447,7 @@ STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
 #ifdef _P64
     (void)sz;
     return 16;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     return sz <= 4 ? 8 : 16;
 #else
     // szclass 8
@@ -458,7 +479,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
     if (sz <= 8)
         return 0;
     const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     if (sz <= 8)
         return (sz >= 4 ? 1 : 0);
     const int N = 1;
@@ -476,7 +497,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
     if (sz >= 16 && sz <= 152) {
 #ifdef _P64
         const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
         const int N = 1;
 #else
         const int N = 2;
@@ -492,17 +513,19 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
 #define GC_MAX_SZCLASS (2032-sizeof(void*))
 static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");
 
+
+// Size does NOT include the type tag!!
 STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
 {
     jl_value_t *v;
     const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
     if (sz <= GC_MAX_SZCLASS) {
         int pool_id = jl_gc_szclass(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
         int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
+        // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in
         // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
+        v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
     }
     else {
         if (allocsz < sz) // overflow in adding offs, size was "negative"
@@ -535,7 +558,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty);
 // defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it
 const extern uint64_t _jl_buff_tag[3];
 #define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16))
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void);
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT;
 
 typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer
 STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
@@ -543,17 +566,6 @@ STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
     return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
 }
 
-STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
-{
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
-                                                 sizeof(void*) * 2 : 16));
-    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
-                                                              sizeof(void*) % align);
-    uintptr_t tag = (uintptr_t)ty;
-    o->header = tag | GC_OLD_MARKED;
-    return jl_valueof(o);
-}
 jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x);
 jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
@@ -589,17 +601,10 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
 #endif
 #endif
 
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz);
-JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
-
-JL_DLLEXPORT void JL_NORETURN jl_throw_out_of_memory_error(void);
-
-
-JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
 void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT;
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
+void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT;
 void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
@@ -627,16 +632,18 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
 // -- helper types -- //
 
 typedef struct {
-    uint8_t propagate_inbounds:1;
-    uint8_t has_fcall:1;
-    uint8_t nospecializeinfer:1;
-    uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
-    uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t propagate_inbounds:1;
+    uint16_t has_fcall:1;
+    uint16_t nospecializeinfer:1;
+    uint16_t isva:1;
+    uint16_t nargsmatchesmethod:1;
+    uint16_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
 } jl_code_info_flags_bitfield_t;
 
 typedef union {
     jl_code_info_flags_bitfield_t bits;
-    uint8_t packed;
+    uint16_t packed;
 } jl_code_info_flags_t;
 
 // -- functions -- //
@@ -645,27 +652,39 @@ typedef union {
 #define SOURCE_MODE_NOT_REQUIRED            0x0
 #define SOURCE_MODE_ABI                     0x1
 #define SOURCE_MODE_FORCE_SOURCE            0x2
-#define SOURCE_MODE_FORCE_SOURCE_UNCACHED   0x3
 
-JL_DLLEXPORT jl_code_instance_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force, uint8_t source_mode);
+JL_DLLEXPORT jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner);
+JL_DLLEXPORT void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src);
+void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) JL_NOTSAFEPOINT;
+int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_code_instance_t *jl_type_infer(jl_method_instance_t *li, size_t world, uint8_t source_mode);
+JL_DLLEXPORT jl_code_info_t *jl_gdbcodetyped1(jl_method_instance_t *mi, size_t world);
 JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
-        size_t min_world, size_t max_world);
-jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT);
+        size_t min_world, size_t max_world, jl_debuginfo_t *edges);
 jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
+JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *owner,
         jl_value_t *rettype, jl_value_t *exctype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *analysis_results,
-        uint8_t relocatability);
+        uint32_t effects, jl_value_t *analysis_results,
+        uint8_t relocatability, jl_debuginfo_t *edges /* , int absolute_max*/);
+
+JL_DLLEXPORT const char *jl_debuginfo_file(jl_debuginfo_t *debuginfo) JL_NOTSAFEPOINT;
+JL_DLLEXPORT const char *jl_debuginfo_file1(jl_debuginfo_t *debuginfo) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_module_t *jl_debuginfo_module1(jl_value_t *debuginfo_def) JL_NOTSAFEPOINT;
+JL_DLLEXPORT const char *jl_debuginfo_name(jl_value_t *func) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
 jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
+jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
 jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ast);
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void);
@@ -686,7 +705,7 @@ int jl_valid_type_param(jl_value_t *v);
 
 JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs);
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
+void JL_NORETURN jl_method_error(jl_value_t *F, jl_value_t **args, size_t na, size_t world);
 JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
@@ -746,10 +765,12 @@ JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
+jl_value_t *jl_substitute_var_nothrow(jl_value_t *t, jl_tvar_t *var, jl_value_t *val, int nothrow);
 jl_unionall_t *jl_rename_unionall(jl_unionall_t *u);
 JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u);
+jl_value_t* jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y);
 int jl_count_union_components(jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
@@ -758,7 +779,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module,
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable);
 JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check);
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
 void jl_cache_type_(jl_datatype_t *type);
@@ -785,8 +806,11 @@ jl_array_t *jl_get_loaded_modules(void);
 JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t *val);
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty);
 void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
-jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
+JL_DLLEXPORT void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type);
+JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno);
 
 jl_value_t *jl_eval_global_var(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *e);
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *clos, jl_value_t **args, size_t nargs);
@@ -835,9 +859,122 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBA
 jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
     jl_value_t *source,  jl_value_t **env, size_t nenv, int do_compile);
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_value_t *uninferred_source, int isva);
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred);
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source);
 
+EXTERN_INLINE_DECLARE enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    uint8_t bits = (pku & 0x7);
+    jl_value_t *val = (jl_value_t*)(pku & ~0x7);
+
+    if (val == NULL && bits == BINDING_KIND_IMPLICIT) {
+        return BINDING_KIND_GUARD;
+    }
+
+    return (enum jl_partition_kind)bits;
+#else
+    return (enum jl_partition_kind)pku.kind;
+#endif
+}
+
+STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    jl_value_t *val = (jl_value_t*)(pku & ~0x7);
+    // This is a little bit of a lie at the moment - it is one of the things that
+    // can go wrong with binding replacement.
+    JL_GC_PROMISE_ROOTED(val);
+    return val;
+#else
+    return pku.val;
+#endif
+}
+
+STATIC_INLINE jl_ptr_kind_union_t encode_restriction(jl_value_t *val, enum jl_partition_kind kind) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    if (kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED || kind == BINDING_KIND_FAILED)
+        assert(val == NULL);
+    if (kind == BINDING_KIND_GUARD)
+        kind = BINDING_KIND_IMPLICIT;
+    assert((((uintptr_t)val) & 0x7) == 0);
+    return ((jl_ptr_kind_union_t)val) | kind;
+#else
+    jl_ptr_kind_union_t ret = { val, kind };
+    return ret;
+#endif
+}
+
+STATIC_INLINE int jl_bkind_is_some_import(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_IMPLICIT || kind == BINDING_KIND_EXPLICIT || kind == BINDING_KIND_IMPORTED;
+}
+
+STATIC_INLINE int jl_bkind_is_some_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT;
+}
+
+STATIC_INLINE int jl_bkind_is_some_guard(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_FAILED || kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED;
+}
+
+EXTERN_INLINE_DECLARE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT {
+    if (!b)
+        return NULL;
+    assert(jl_is_binding(b));
+    return jl_atomic_load_relaxed(&b->partitions);
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world);
+
+EXTERN_INLINE_DECLARE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT {
+    return decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+}
+
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT;
+
+#ifndef __clang_analyzer__
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT
+{
+    while (1) {
+        if (!*bpart)
+            return encode_restriction(NULL, BINDING_KIND_GUARD);
+        jl_ptr_kind_union_t pku = jl_atomic_load_acquire(&(*bpart)->restriction);
+        if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+            return pku;
+        *bnd = (jl_binding_t*)decode_restriction_value(pku);
+        *bpart = jl_get_binding_partition(*bnd, world);
+    }
+}
+#endif
+
+STATIC_INLINE int is10digit(char c) JL_NOTSAFEPOINT
+{
+    return (c >= '0' && c <= '9');
+}
+
+STATIC_INLINE int is_anonfn_typename(char *name)
+{
+    if (name[0] != '#' || name[1] == '#')
+        return 0;
+    char *other = strrchr(name, '#');
+    return other > &name[1] && is10digit(other[1]);
+}
+
+// Returns true for typenames of anounymous functions that have been canonicalized (i.e.
+// we mangled the name of the outermost enclosing function in their name).
+STATIC_INLINE int is_canonicalized_anonfn_typename(char *name) JL_NOTSAFEPOINT
+{
+    char *delim = strchr(&name[1], '#');
+    if (delim == NULL)
+        return 0;
+    if (delim[1] != '#')
+        return 0;
+    if (!is10digit(delim[2]))
+        return 0;
+    return 1;
+}
+
 // Each tuple can exist in one of 4 Vararg states:
 //   NONE: no vararg                            Tuple{Int,Float32}
 //   INT: vararg with integer length            Tuple{Int,Vararg{Float32,2}}
@@ -917,9 +1054,7 @@ void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo) JL_NOTSAFEPOINT;
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
-void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 void jl_init_int32_int64_cache(void);
 JL_DLLEXPORT void jl_init_options(void);
 
@@ -931,6 +1066,7 @@ void jl_init_threading(void);
 void jl_start_threads(void);
 
 // Whether the GC is running
+extern uv_mutex_t safepoint_lock;
 extern char *jl_safepoint_pages;
 STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
 {
@@ -977,9 +1113,7 @@ int jl_safepoint_consume_sigint(void);
 void jl_wake_libuv(void) JL_NOTSAFEPOINT;
 
 void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
-#if defined(_OS_DARWIN_)
-typedef pthread_key_t jl_pgcstack_key_t;
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 typedef DWORD jl_pgcstack_key_t;
 #else
 typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
@@ -1000,7 +1134,8 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
 
 size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
 
-uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
+// Query if this object is perm-allocated in an image.
+JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
@@ -1024,6 +1159,13 @@ JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee,
 JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT int jl_mi_try_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                   jl_code_instance_t *expected_ci,
+                                   jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT int jl_mi_cache_has_ci(jl_method_instance_t *mi, jl_code_instance_t *ci) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_code_instance_t *jl_cached_uninferred(jl_code_instance_t *codeinst, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, jl_code_instance_t *checked, size_t world, jl_code_instance_t *newci JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src);
 JL_DLLEXPORT extern jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
@@ -1259,28 +1401,47 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA
     return itr-2 - jl_excstack_bt_size(stack, itr);
 }
 // Exception stack manipulation
-void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size);
 
+// System util to get maximum RSS
+JL_DLLEXPORT size_t jl_maxrss(void);
+
 //--------------------------------------------------
 // congruential random number generator
 // for a small amount of thread-local randomness
 
-STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT
+//TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99
+// for better performance, it does however require making users expect a 32bit random number.
+
+STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT // Open interval [0, max)
 {
-    if (max == 0)
+    if (max < 2)
         return 0;
     uint64_t mask = ~(uint64_t)0;
-    --max;
-    mask >>= __builtin_clzll(max|1);
-    uint64_t x;
+    int zeros = __builtin_clzll(max);
+    int bits = CHAR_BIT * sizeof(uint64_t) - zeros;
+    mask = mask >> zeros;
     do {
-        *seed = 69069 * (*seed) + 362437;
-        x = *seed & mask;
-    } while (x > max);
-    return x;
+        uint64_t value = 69069 * (*seed) + 362437;
+        *seed = value;
+        uint64_t x = value & mask;
+        if (x < max) {
+            return x;
+        }
+        int bits_left = zeros;
+        while (bits_left >= bits) {
+            value >>= bits;
+            x = value & mask;
+            if (x < max) {
+                return x;
+            }
+            bits_left -= bits;
+        }
+    } while (1);
 }
+
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_rand(void);
@@ -1416,7 +1577,6 @@ JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
 
-JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
 JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
 JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
@@ -1428,7 +1588,6 @@ JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
 // -- synchronization utilities -- //
 
 extern jl_mutex_t typecache_lock;
-extern JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 
 #if defined(__APPLE__)
 void jl_mach_gc_end(void) JL_NOTSAFEPOINT;
@@ -1587,6 +1746,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_as_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_global_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_globaldecl_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_local_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_list_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_dot_sym;
@@ -1643,6 +1803,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_uninferred_sym;
 
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
@@ -1658,6 +1819,7 @@ void jl_write_malloc_log(void);
 #  define jl_unreachable() ((void)jl_assume(0))
 #endif
 
+extern uv_mutex_t symtab_lock;
 jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 
 // Tools for locally disabling spurious compiler warnings
@@ -1715,7 +1877,7 @@ JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len);
 #define IR_FLAG_INBOUNDS 0x01
 
 JL_DLLIMPORT void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
-JL_DLLIMPORT void jl_compile_codeinst(jl_code_instance_t *unspec);
+JL_DLLIMPORT int jl_compile_codeinst(jl_code_instance_t *unspec);
 JL_DLLIMPORT int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
 
 typedef struct {
@@ -1725,7 +1887,7 @@ typedef struct {
 
 JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
+JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params);
 JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
 JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw);
diff --git a/src/julia_threads.h b/src/julia_threads.h
index 3f8f5391919b4..b697a0bf030ed 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -4,8 +4,8 @@
 #ifndef JL_THREADS_H
 #define JL_THREADS_H
 
+#include "gc-tls.h"
 #include "julia_atomics.h"
-#include "work-stealing-queue.h"
 #ifndef _OS_WINDOWS_
 #include "pthread.h"
 #endif
@@ -18,6 +18,8 @@ extern "C" {
 
 JL_DLLEXPORT int16_t jl_threadid(void);
 JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT;
 
 // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
 
@@ -31,20 +33,28 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
 // JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
 // JL_HAVE_UNW_CONTEXT -- libunwind-based
 // JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
-// JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume
 
 #ifdef _OS_WINDOWS_
 #define JL_HAVE_UCONTEXT
 typedef win32_ucontext_t jl_stack_context_t;
 typedef jl_stack_context_t _jl_ucontext_t;
+
+#elif defined(_OS_OPENBSD_)
+#define JL_HAVE_UNW_CONTEXT
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+typedef unw_context_t _jl_ucontext_t;
+typedef struct {
+    jl_jmp_buf uc_mcontext;
+} jl_stack_context_t;
+
 #else
 typedef struct {
     jl_jmp_buf uc_mcontext;
 } jl_stack_context_t;
 #if !defined(JL_HAVE_UCONTEXT) && \
     !defined(JL_HAVE_ASM) && \
-    !defined(JL_HAVE_UNW_CONTEXT) && \
-    !defined(JL_HAVE_SIGALTSTACK)
+    !defined(JL_HAVE_UNW_CONTEXT)
 #if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) ||  \
      defined(_CPU_ARM_) || defined(_CPU_PPC64_))
 #define JL_HAVE_ASM
@@ -60,7 +70,7 @@ typedef struct {
 #endif
 #endif
 
-#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
+#if !defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)
 typedef jl_stack_context_t _jl_ucontext_t;
 #endif
 #pragma GCC visibility push(default)
@@ -78,9 +88,13 @@ typedef ucontext_t _jl_ucontext_t;
 
 typedef struct {
     union {
-        _jl_ucontext_t ctx;
-        jl_stack_context_t copy_ctx;
+        _jl_ucontext_t *ctx;
+        jl_stack_context_t *copy_ctx;
     };
+    void *stkbuf; // malloc'd memory (either copybuf or stack)
+    size_t bufsz; // actual sizeof stkbuf
+    unsigned int copy_stack:31; // sizeof stack for copybuf
+    unsigned int started:1;
 #if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
@@ -105,82 +119,7 @@ typedef struct {
     uint32_t count;
 } jl_mutex_t;
 
-typedef struct {
-    jl_taggedvalue_t *freelist;   // root of list of free objects
-    jl_taggedvalue_t *newpages;   // root of list of chunks of free objects
-    uint16_t osize;      // size of objects in this pool
-} jl_gc_pool_t;
-
-typedef struct {
-    _Atomic(int64_t) allocd;
-    _Atomic(int64_t) pool_live_bytes;
-    _Atomic(uint64_t) malloc;
-    _Atomic(uint64_t) realloc;
-    _Atomic(uint64_t) poolalloc;
-    _Atomic(uint64_t) bigalloc;
-    _Atomic(int64_t) free_acc;
-    _Atomic(uint64_t) alloc_acc;
-} jl_thread_gc_num_t;
-
-typedef struct {
-    // variable for tracking weak references
-    small_arraylist_t weak_refs;
-    // live tasks started on this thread
-    // that are holding onto a stack from the pool
-    small_arraylist_t live_tasks;
-
-    // variables for tracking malloc'd arrays
-    struct _mallocarray_t *mallocarrays;
-    struct _mallocarray_t *mafreelist;
-
-    // variables for tracking big objects
-    struct _bigval_t *big_objects;
-
-    // variables for tracking "remembered set"
-    arraylist_t _remset[2]; // contains jl_value_t*
-    // lower bound of the number of pointers inside remembered values
-    int remset_nptr;
-    arraylist_t *remset;
-    arraylist_t *last_remset;
-
-    // variables for allocating objects from pools
-#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
-    jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
-
-#define JL_N_STACK_POOLS 16
-    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
-} jl_thread_heap_t;
-
-typedef struct {
-    ws_queue_t chunk_queue;
-    ws_queue_t ptr_queue;
-    arraylist_t reclaim_set;
-} jl_gc_markqueue_t;
-
-typedef struct {
-    // thread local increment of `perm_scanned_bytes`
-    size_t perm_scanned_bytes;
-    // thread local increment of `scanned_bytes`
-    size_t scanned_bytes;
-    // Number of queued big objects (<= 1024)
-    size_t nbig_obj;
-    // Array of queued big objects to be moved between the young list
-    // and the old list.
-    // A set low bit means that the object should be moved from the old list
-    // to the young list (`mark_reset_age`).
-    // Objects can only be put into this list when the mark bit is flipped to
-    // `1` (atomically). Combining with the sync after marking,
-    // this makes sure that a single objects can only appear once in
-    // the lists (the mark bit cannot be flipped to `0` without sweeping)
-    void *big_obj[1024];
-} jl_gc_mark_cache_t;
-
 struct _jl_bt_element_t;
-struct _jl_gc_pagemeta_t;
-
-typedef struct {
-    _Atomic(struct _jl_gc_pagemeta_t *) bottom;
-} jl_gc_page_stack_t;
 
 // This includes all the thread local states we care about for a thread.
 // Changes to TLS field types must be reflected in codegen.
@@ -201,6 +140,10 @@ typedef struct _jl_tls_states_t {
 #define JL_GC_STATE_SAFE 2
     // gc_state = 2 means the thread is running unmanaged code that can be
     //              execute at the same time with the GC.
+#define JL_GC_PARALLEL_COLLECTOR_THREAD 3
+    // gc_state = 3 means the thread is a parallel collector thread (i.e. never runs Julia code)
+#define JL_GC_CONCURRENT_COLLECTOR_THREAD 4
+    // gc_state = 4 means the thread is a concurrent collector thread (background sweeper thread that never runs Julia code)
     _Atomic(int8_t) gc_state; // read from foreign threads
     // execution of certain certain impure
     // statements is prohibited from certain
@@ -211,21 +154,16 @@ typedef struct _jl_tls_states_t {
     int16_t disable_gc;
     // Counter to disable finalizer **on the current thread**
     int finalizers_inhibited;
-    jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
-    jl_thread_gc_num_t gc_num;
+    jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen
     volatile sig_atomic_t defer_signal;
     _Atomic(struct _jl_task_t*) current_task;
     struct _jl_task_t *next_task;
     struct _jl_task_t *previous_task;
     struct _jl_task_t *root_task;
     struct _jl_timing_block_t *timing_stack;
+    // This is the location of our copy_stack
     void *stackbase;
     size_t stacksize;
-    union {
-        _jl_ucontext_t base_ctx; // base context of stack
-        // This hack is needed to support always_copy_stacks:
-        jl_stack_context_t copy_stack_ctx;
-    };
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
     // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
@@ -248,18 +186,16 @@ typedef struct _jl_tls_states_t {
     jl_thread_t system_id;
     _Atomic(int16_t) suspend_count;
     arraylist_t finalizers;
-    jl_gc_page_stack_t page_metadata_allocd;
-    jl_gc_page_stack_t page_metadata_buffered;
-    jl_gc_markqueue_t mark_queue;
-    jl_gc_mark_cache_t gc_cache;
-    arraylist_t sweep_objs;
-    _Atomic(int64_t) gc_sweeps_requested;
     // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
+#ifdef _OS_DARWIN_
+    jl_jmp_buf *volatile safe_restore;
+#endif
 
     // currently-held locks, to be released when an exception is thrown
     small_arraylist_t locks;
+    size_t engine_nqueued;
 
     JULIA_DEBUG_SLEEPWAKE(
         uint64_t uv_run_enter;
@@ -275,10 +211,7 @@ typedef struct _jl_tls_states_t {
 #endif
 } jl_tls_states_t;
 
-#ifndef JL_LIBRARY_EXPORTS
-// deprecated (only for external consumers)
 JL_DLLEXPORT void *jl_get_ptls_states(void);
-#endif
 
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
@@ -332,6 +265,8 @@ void jl_sigint_safepoint(jl_ptls_t tls);
 STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
                                      int8_t old_state)
 {
+    assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD);
+    assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD);
     jl_atomic_store_release(&ptls->gc_state, state);
     if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
         jl_gc_safepoint_(ptls);
@@ -343,10 +278,12 @@ STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
     return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
 }
 #ifdef __clang_gcanalyzer__
-int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; // this could be a safepoint, but we will assume it is not
-void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
-int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
-void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE; // this might not be a safepoint, but we have to assume it could be (statically)
+// these might not be a safepoint (if they are no-op safe=>safe transitions), but we have to assume it could be (statically)
+// however mark a delineated region in which safepoints would be not permissible
+int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT_LEAVE;
+void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_ENTER;
+int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT_ENTER;
+void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE;
 #else
 #define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_UNSAFE)
 #define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_UNSAFE))
@@ -363,6 +300,9 @@ JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
+JL_DLLEXPORT int jl_getaffinity(int16_t tid, char *mask, int cpumasksize);
+JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index 953ecc1830142..9d2fba832839c 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -88,6 +88,8 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
     memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
     memop.isobjref = hasObjref(elty);
     auto &field = getField(offset, size, elty);
+    field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
+
     if (field.second.hasobjref != memop.isobjref)
         field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
     if (!isstore)
@@ -198,6 +200,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
                 auto elty = inst->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, 0, cur.offset,
                                                                inst->getType(),
@@ -289,6 +292,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
                 auto elty = storev->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
@@ -310,10 +314,14 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
             }
             required.use_info.hasload = true;
             auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            Type *elty = storev->getType();
             if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
-                                                               cur.offset, storev->getType(),
+                                                               cur.offset, elty,
                                                                true, required.DL)) {
                 LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             }
             required.use_info.refload = true;
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 49c3b15332a56..20e9132d10b4c 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -46,6 +46,8 @@ namespace jl_alloc {
         bool hasaggr:1;
         bool multiloc:1;
         bool hasload:1;
+        // The alloc has a unboxed object at this offset.
+        bool hasunboxed:1;
         llvm::Type *elty;
         llvm::SmallVector<MemOp,4> accesses;
         Field(uint32_t size, llvm::Type *elty)
@@ -54,6 +56,7 @@ namespace jl_alloc {
               hasaggr(false),
               multiloc(false),
               hasload(false),
+              hasunboxed(false),
               elty(elty)
         {
         }
@@ -95,6 +98,9 @@ namespace jl_alloc {
         // The alloc has an aggregate Julia object reference not in an explicit field.
         bool has_unknown_objrefaggr:1;
 
+        // The alloc has an unboxed object at an unknown offset.
+        bool has_unknown_unboxed:1;
+
         void reset()
         {
             escaped = false;
@@ -110,6 +116,7 @@ namespace jl_alloc {
             allockind = llvm::AllocFnKind::Unknown;
             has_unknown_objref = false;
             has_unknown_objrefaggr = false;
+            has_unknown_unboxed = false;
             uses.clear();
             preserves.clear();
             memops.clear();
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index 5df4f52aca425..188955fd50972 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -252,10 +252,12 @@ void Optimizer::optimizeAll()
             removeAlloc(orig);
             continue;
         }
+        bool has_unboxed = use_info.has_unknown_unboxed;
         bool has_ref = use_info.has_unknown_objref;
         bool has_refaggr = use_info.has_unknown_objrefaggr;
         for (auto memop: use_info.memops) {
             auto &field = memop.second;
+            has_unboxed |= field.hasunboxed;
             if (field.hasobjref) {
                 has_ref = true;
                 // This can be relaxed a little based on hasload
@@ -284,6 +286,19 @@ void Optimizer::optimizeAll()
             splitOnStack(orig);
             continue;
         }
+        // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine
+        // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }),
+        // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }.
+        // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value
+        if (has_unboxed && has_ref) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", orig);
+            });
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
+        }
         REMARK([&](){
             return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
                 << "GC allocation moved to stack " << ore::NV("GC Allocation", orig);
@@ -402,6 +417,8 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
         auto bb = use->getParent();
         if (!bbs.insert(bb).second)
             continue;
+        if (pred_empty(bb))
+            continue; // No predecessors so the block is dead
         assert(lifetime_stack.empty());
         Lifetime::Frame cur{bb};
         while (true) {
@@ -647,7 +664,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
         auto asize = ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz / DL.getTypeAllocSize(pass.T_prjlvalue));
         buff = prolog_builder.CreateAlloca(pass.T_prjlvalue, asize);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
+        ptr = cast<Instruction>(buff);
     }
     else {
         Type *buffty;
@@ -657,14 +674,14 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
             buffty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), sz);
         buff = prolog_builder.CreateAlloca(buffty);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
+        ptr = cast<Instruction>(buff);
     }
     insertLifetime(ptr, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz), orig_inst);
     if (sz != 0 && !has_ref) { // TODO: fix has_ref case too
         IRBuilder<> builder(orig_inst);
         initializeAlloca(builder, buff, allockind);
     }
-    Instruction *new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, JuliaType::get_pjlvalue_ty(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
+    Instruction *new_inst = cast<Instruction>(ptr);
     new_inst->takeName(orig_inst);
 
     auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
@@ -753,17 +770,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
-            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), new_i->getType()->getPointerAddressSpace());
-            auto replace_i = new_i;
-            Type *new_t = new_i->getType();
-            if (cast_t != new_t) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(cast_t->getContext().supportsTypedPointers());
-                replace_i = new BitCastInst(replace_i, cast_t, "", user);
-                replace_i->setDebugLoc(user->getDebugLoc());
-                replace_i->takeName(user);
-            }
-            push_frame(user, replace_i);
+            push_frame(user, new_i);
         }
         else if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
             SmallVector<Value *, 4> IdxOperands(gep->idx_begin(), gep->idx_end());
@@ -949,8 +956,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         }
         slot.slot = prolog_builder.CreateAlloca(allocty);
         IRBuilder<> builder(orig_inst);
-        insertLifetime(prolog_builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(prolog_builder.getContext())),
-                       ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
+        insertLifetime(slot.slot, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
         initializeAlloca(builder, slot.slot, use_info.allockind);
         slots.push_back(std::move(slot));
     }
@@ -1003,15 +1009,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         auto size = pass.DL->getTypeAllocSize(elty);
         Value *addr;
         if (offset % size == 0) {
-            addr = builder.CreateBitCast(slot.slot, elty->getPointerTo());
+            addr = slot.slot;
             if (offset != 0) {
                 addr = builder.CreateConstInBoundsGEP1_32(elty, addr, offset / size);
             }
         }
         else {
-            addr = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+            addr = slot.slot;
             addr = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), addr, offset);
-            addr = builder.CreateBitCast(addr, elty->getPointerTo());
         }
         return addr;
     };
@@ -1031,7 +1036,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 assert(slot.offset == offset);
                 newload = builder.CreateLoad(pass.T_prjlvalue, slot.slot);
                 // Assume the addrspace is correct.
-                val = builder.CreateBitCast(newload, load_ty);
+                val = newload;
             }
             else {
                 newload = builder.CreateLoad(load_ty, slot_gep(slot, offset, load_ty, builder));
@@ -1067,10 +1072,9 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                     store_ty = T_pjlvalue;
                 }
                 else {
-                    store_ty = PointerType::getWithSamePointeeType(T_pjlvalue, cast<PointerType>(store_ty)->getAddressSpace());
-                    store_val = builder.CreateBitCast(store_val, store_ty);
+                    store_ty = PointerType::get(T_pjlvalue->getContext(), store_ty->getPointerAddressSpace());
                 }
-                if (cast<PointerType>(store_ty)->getAddressSpace() != AddressSpace::Tracked)
+                if (store_ty->getPointerAddressSpace() != AddressSpace::Tracked)
                     store_val = builder.CreateAddrSpaceCast(store_val, pass.T_prjlvalue);
                 newstore = builder.CreateStore(store_val, slot.slot);
             }
@@ -1133,14 +1137,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                                 store->setOrdering(AtomicOrdering::NotAtomic);
                                 continue;
                             }
-                            auto ptr8 = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+                            Value *ptr_slot = slot.slot;
                             if (offset > slot.offset)
-                                ptr8 = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptr8,
+                                ptr_slot = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), slot.slot,
                                                                           offset - slot.offset);
                             auto sub_size = std::min(slot.offset + slot.size, offset + size) -
                                 std::max(offset, slot.offset);
                             // TODO: alignment computation
-                            builder.CreateMemSet(ptr8, val_arg, sub_size, MaybeAlign(0));
+                            builder.CreateMemSet(ptr_slot, val_arg, sub_size, MaybeAlign(0));
                         }
                         call->eraseFromParent();
                         return;
@@ -1257,8 +1261,8 @@ bool AllocOpt::doInitialization(Module &M)
 
     DL = &M.getDataLayout();
 
-    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
-    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
+    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
+    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
 
     return true;
 }
diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h
index 2c60907ca086c..956c04dbc7ded 100644
--- a/src/llvm-codegen-shared.h
+++ b/src/llvm-codegen-shared.h
@@ -125,7 +125,7 @@ struct CountTrackedPointers {
     CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false);
 };
 
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
+unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder);
 llvm::SmallVector<llvm::Value*, 0> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
 
 static inline void llvm_dump(llvm::Value *v)
@@ -186,65 +186,40 @@ static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instructi
     return inst;
 }
 
-// bitcast a value, but preserve its address space when dealing with pointer types
-static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
-{
-    using namespace llvm;
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        return builder.CreateBitCast(v, jl_value_addr);
-    }
-    else {
-        return builder.CreateBitCast(v, jl_value);
-    }
-}
-
 // Get PTLS through current task.
-static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
 {
     using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
     const int pgcstack_offset = offsetof(jl_task_t, gcstack);
-    return builder.CreateInBoundsGEP(
-            T_pjlvalue, emit_bitcast_with_builder(builder, pgcstack, T_ppjlvalue),
-            ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
-            "current_task");
+    return builder.CreateConstInBoundsGEP1_32(i8, pgcstack, -pgcstack_offset, "current_task");
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
 {
     using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
+    auto T_ptr = builder.getPtrTy();
     const int ptls_offset = offsetof(jl_task_t, ptls);
-    llvm::Value *pptls = builder.CreateInBoundsGEP(
-            T_pjlvalue, current_task,
-            ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-            "ptls_field");
-    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
-            emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    llvm::Value *pptls = builder.CreateConstInBoundsGEP1_32(i8, current_task, ptls_offset, "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(T_ptr, pptls, Align(sizeof(void *)), "ptls_load");
     // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
     tbaa_decorate(tbaa, ptls_load);
-    return builder.CreateBitCast(ptls_load, T_ppjlvalue, "ptls");
+    return ptls_load;
 }
 
 // Get signal page through current task.
-static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     // return builder.CreateCall(prepare_call(reuse_signal_page_func));
-    auto T_psize = T_size->getPointerTo();
-    auto T_ppsize = T_psize->getPointerTo();
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
-    ptls = emit_bitcast_with_builder(builder, ptls, T_ppsize);
-    llvm::Value *psafepoint = builder.CreateInBoundsGEP(
-            T_psize, ptls, ConstantInt::get(T_size, nthfield));
+    auto T_ptr = builder.getPtrTy();
+    auto i8 = builder.getInt8Ty();
+    int nthfield = offsetof(jl_tls_states_t, safepoint);
+    llvm::Value *psafepoint = builder.CreateConstInBoundsGEP1_32(i8, ptls, nthfield);
     LoadInst *ptls_load = builder.CreateAlignedLoad(
-            T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
+            T_ptr, psafepoint, Align(sizeof(void *)), "safepoint");
     tbaa_decorate(tbaa, ptls_load);
     return ptls_load;
 }
@@ -258,7 +233,7 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
 static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
 {
     using namespace llvm;
-    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
+    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
     emit_signal_fence(builder);
     Module *M = builder.GetInsertBlock()->getModule();
     LLVMContext &C = builder.getContext();
@@ -269,8 +244,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
     else {
         Function *F = M->getFunction("julia.safepoint");
         if (!F) {
-            auto T_psize = T_size->getPointerTo();
-            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
+            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false);
             F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
 #if JL_LLVM_VERSION >= 160000
             F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
@@ -287,9 +261,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
 {
     using namespace llvm;
     Type *T_int8 = state->getType();
-    llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
-    Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
-    Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef<Value*>(offset), "gc_state");
+    unsigned offset = offsetof(jl_tls_states_t, gc_state);
+    Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
     if (old_state == nullptr) {
         old_state = builder.CreateLoad(T_int8, gc_state);
         cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 2539c5cd2e37c..05d62adc57926 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -94,7 +94,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
     for (auto &F: M.functions()) {
         auto FN = F.getName();
 
-        if (FN.startswith("julia.cpu.have_fma.")) {
+        if (FN.starts_with("julia.cpu.have_fma.")) {
             for (Use &U: F.uses()) {
                 User *RU = U.getUser();
                 CallInst *I = cast<CallInst>(RU);
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index 5d0d9f5d37c40..7f1b076897fc8 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -49,37 +49,28 @@ extern JuliaOJIT *jl_ExecutionEngine;
 
 namespace {
 
-static bool have_fp16(Function &caller, const Triple &TT) {
-    Attribute FSAttr = caller.getFnAttribute("target-features");
-    StringRef FS = "";
-    if (FSAttr.isValid())
-        FS = FSAttr.getValueAsString();
-    else if (jl_ExecutionEngine)
-        FS = jl_ExecutionEngine->getTargetFeatureString();
-    // else probably called from opt, just do nothing
-    if (TT.isAArch64()) {
-        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
-            return true;
-        }
-    } else if (TT.getArch() == Triple::x86_64) {
-        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
-            return true;
-        }
-    }
-    if (caller.hasFnAttribute("julia.hasfp16")) {
-        return true;
-    }
-    return false;
+static bool have_fp16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasfp16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // llvm/llvm-project#97975: on some platforms, `half` uses excessive precision
+    if (TT.isPPC())
+        return false;
+
+    return true;
 }
 
-static bool have_bf16(Function &caller, const Triple &TT) {
-    if (caller.hasFnAttribute("julia.hasbf16")) {
-        return true;
-    }
+static bool have_bf16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasbf16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
 
-    // there's no targets that fully support bfloat yet;,
-    // AVX512BF16 only provides conversion and dot product instructions.
-    return false;
+    // https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218770199:
+    // on current versions of LLVM, bf16 always uses TypeSoftPromoteHalf
+    return true;
 }
 
 static bool demoteFloat16(Function &F)
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index 5a53ce4d8e510..0605098bec361 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -1,22 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm/ADT/Statistic.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "llvm-pass-helpers.h"
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "final_gc_lowering"
 STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
@@ -27,50 +11,6 @@ STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
 STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
 STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics");
 
-using namespace llvm;
-
-// The final GC lowering pass. This pass lowers platform-agnostic GC
-// intrinsics to platform-dependent instruction sequences. The
-// intrinsics it targets are those produced by the late GC frame
-// lowering pass.
-//
-// This pass targets typical back-ends for which the standard Julia
-// runtime library is available. Atypical back-ends should supply
-// their own lowering pass.
-
-struct FinalLowerGC: private JuliaPassContext {
-    bool runOnFunction(Function &F);
-
-private:
-    Function *queueRootFunc;
-    Function *poolAllocFunc;
-    Function *bigAllocFunc;
-    Function *allocTypedFunc;
-    Instruction *pgcstack;
-    Type *T_size;
-
-    // Lowers a `julia.new_gc_frame` intrinsic.
-    void lowerNewGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.push_gc_frame` intrinsic.
-    void lowerPushGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.pop_gc_frame` intrinsic.
-    void lowerPopGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.get_gc_frame_slot` intrinsic.
-    void lowerGetGCFrameSlot(CallInst *target, Function &F);
-
-    // Lowers a `julia.gc_alloc_bytes` intrinsic.
-    void lowerGCAllocBytes(CallInst *target, Function &F);
-
-    // Lowers a `julia.queue_gc_root` intrinsic.
-    void lowerQueueGCRoot(CallInst *target, Function &F);
-
-    // Lowers a `julia.safepoint` intrinsic.
-    void lowerSafepoint(CallInst *target, Function &F);
-};
-
 void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 {
     ++NewGCFrameCount;
@@ -103,9 +43,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
     IRBuilder<> builder(target);
     StoreInst *inst = builder.CreateAlignedStore(
                 ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
-                builder.CreateBitCast(
-                        builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),
-                        T_size->getPointerTo(), "frame.nroots"), // GEP of 0 becomes a noop and eats the name
+                builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),// GEP of 0 becomes a noop and eats the name
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(F.getContext());
@@ -118,7 +56,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     builder.CreateAlignedStore(
             gcframe,
-            builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)),
+            pgcstack,
             Align(sizeof(void*)));
     target->eraseFromParent();
 }
@@ -136,8 +74,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     inst = builder.CreateAlignedStore(
         inst,
-        builder.CreateBitCast(pgcstack,
-            PointerType::get(T_prjlvalue, 0)),
+        pgcstack,
         Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     target->eraseFromParent();
@@ -205,13 +142,13 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
         else {
             auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
             auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-            newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize, type });
+            newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type });
             if (sz > 0)
                 derefBytes = sz;
         }
     } else {
         auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
-        size = builder.CreateAdd(size, ConstantInt::get(T_size, sizeof(void*)));
+        // allocTypedFunc does not include the type tag in the allocation size!
         newI = builder.CreateCall(allocTypedFunc, { ptls, size, type });
         derefBytes = sizeof(void*);
     }
@@ -241,7 +178,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
     }
     LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
     queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
-    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
     bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
     allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
     T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h
new file mode 100644
index 0000000000000..cb485751d407b
--- /dev/null
+++ b/src/llvm-gc-interface-passes.h
@@ -0,0 +1,413 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  LLVM passes that may be partially modified by a third-party GC implementation.
+*/
+
+#include "llvm-version.h"
+#include "passes.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include <llvm-c/Core.h>
+#include <llvm-c/Types.h>
+
+#include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/BitVector.h>
+#include <llvm/ADT/SparseBitVector.h>
+#include <llvm/ADT/PostOrderIterator.h>
+#include <llvm/ADT/SetVector.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/SmallSet.h>
+#include <llvm/Analysis/CFG.h>
+#include <llvm/Analysis/InstSimplifyFolder.h>
+#include <llvm/IR/Value.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Dominators.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/ModuleSlotTracker.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Pass.h>
+#include <llvm/Support/Debug.h>
+#include <llvm/Transforms/Utils/BasicBlockUtils.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+
+#include <llvm/InitializePasses.h>
+
+#include "llvm-codegen-shared.h"
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+#include "llvm-pass-helpers.h"
+#include <map>
+#include <string>
+
+#ifndef LLVM_GC_PASSES_H
+#define LLVM_GC_PASSES_H
+
+using namespace llvm;
+
+/* Julia GC Root Placement pass. For a general overview of the design of GC
+   root lowering, see the devdocs. This file is the actual implementation.
+
+   The actual algorithm is fairly straightforward. First recall the goal of this
+   pass:
+
+   Minimize the number of needed gc roots/stores to them subject to the constraint
+   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
+   a path after this point that contains a use of this pointer) is in some gc slot.
+
+   In particular, in order to understand this algorithm, it is important to
+   realize that the only places where rootedness matters is at safepoints.
+
+   Now, the primary phases of the algorithm are:
+
+   1. Local Scan
+
+      During this step, each Basic Block is inspected and analyzed for local
+      properties. In particular, we want to determine the ordering of any of
+      the following activities:
+
+        - Any Def of a gc-tracked pointer. In general Defs are the results of
+          calls or loads from appropriate memory locations. Phi nodes and
+          selects do complicate this story slightly as described below.
+        - Any use of a gc-tracked or derived pointer. As described in the
+          devdocs, a use is in general one of
+              a) a load from a tracked/derived value
+              b) a store to a tracked/derived value
+              c) a store OF a tracked/derived value
+              d) a use of a value as a call operand (including operand bundles)
+        - Any safepoint
+
+      Crucially, we also perform pointer numbering during the local scan,
+      assigning every Def a unique integer and caching the integer for each
+      derived pointer. This allows us to operate only on the set of Defs (
+      represented by these integers) for the rest of the algorithm. We also
+      maintain some local utility information that is needed by later passes
+      (see the BBState struct for details).
+
+    2. Dataflow Computation
+
+      This computation operates entirely over the function's control flow graph
+      and does not look into a basic block. The algorithm is essentially
+      textbook iterative data flow for liveness computation. However, the
+      data flow equations are slightly more complicated because we also
+      forward propagate rootedness information in addition to backpropagating
+      liveness.
+
+    3. Live Set Computation
+
+      With the liveness information from the previous step, we can now compute,
+      for every safepoint, the set of values live at that particular safepoint.
+      There are three pieces of information being combined here:
+           i. Values that needed to be live due to local analysis (e.g. there
+              was a def, then a safepoint, then a use). This was computed during
+              local analysis.
+          ii. Values that are live across the basic block (i.e. they are live
+              at every safepoint within the basic block). This relies entirely
+              on the liveness information.
+         iii. Values that are now live-out from the basic block (i.e. they are
+              live at every safepoint following their def). During local
+              analysis, we keep, for every safepoint, those values that would
+              be live if they were live out. Here we can check if they are
+              actually live-out and make the appropriate additions to the live
+              set.
+
+       Lastly, we also explicitly compute, for each value, the list of values
+       that are simultaneously live at some safepoint. This is known as an
+       "interference graph" and is the input to the next step.
+
+    4. GC Root coloring
+
+      Two values which are not simultaneously live at a safepoint can share the
+      same slot. This is an important optimization, because otherwise long
+      functions would have exceptionally large GC slots, reducing performance
+      and bloating the size of the stack. Assigning values to these slots is
+      equivalent to doing graph coloring on the interference graph - the graph
+      where nodes are values and two values have an edge if they are
+      simultaneously live at a safepoint - which we computed in the previous
+      step. Now graph coloring in general is a hard problem. However, for SSA
+      form programs, (and most programs in general, by virtue of their
+      structure), the resulting interference graphs are chordal and can be
+      colored optimally in linear time by performing greedy coloring in a
+      perfect elimination order. Now, our interference graphs are likely not
+      entirely chordal due to some non-SSA corner cases. However, using the same
+      algorithm should still give a very good coloring while having sufficiently
+      low runtime.
+
+    5. JLCall frame optimizations
+
+      Unlike earlier iterations of the gc root placement logic, jlcall frames
+      are no longer treated as a special case and need not necessarily be sunk
+      into the gc frame. Additionally, we now emit lifetime
+      intrinsics, so regular stack slot coloring will merge any jlcall frames
+      not sunk into the gc frame. Nevertheless performing such sinking can still
+      be profitable. Since all arguments to a jlcall are guaranteed to be live
+      at that call in some gc slot, we can attempt to rearrange the slots within
+      the gc-frame, or reuse slots not assigned at that particular location
+      for the gcframe. However, even without this optimization, stack frames
+      are at most two times larger than optimal (because regular stack coloring
+      can merge the jlcall allocas).
+
+      N.B.: This step is not yet implemented.
+
+    6. Root placement
+
+      This performs the actual insertion of the GCFrame pushes/pops, zeros out
+      the gc frame and creates the stores to the gc frame according to the
+      stack slot assignment computed in the previous step. GC frames stores
+      are generally sunk right before the first safe point that use them
+      (this is beneficial for code where the primary path does not have
+      safepoints, but some other path - e.g. the error path does). However,
+      if the first safepoint is not dominated by the definition (this can
+      happen due to the non-ssa corner cases), the store is inserted right after
+      the definition.
+
+    7. Cleanup
+
+      This step performs necessary cleanup before passing the IR to codegen. In
+      particular, it removes any calls to julia_from_objref intrinsics and
+      removes the extra operand bundles from ccalls. In the future it could
+      also strip the addrspace information from all values as this
+      information is no longer needed.
+
+
+  There are a couple important special cases that deserve special attention:
+
+    A. PHIs and Selects
+
+      In general PHIs and selects are treated as separate defs for the purposes
+      of the algorithm and their operands as uses of those values. It is
+      important to consider however WHERE the uses of PHI's operands are
+      located. It is neither at the start of the basic block, because the values
+      do not dominate the block (so can't really consider them live-in), nor
+      at the end of the predecessor (because they are actually live out).
+      Instead it is best to think of those uses as living on the edge between
+      the appropriate predecessor and the block containing the PHI.
+
+      Another concern is PHIs of derived values. Since we cannot simply root
+      these values by storing them to a GC slot, we need to insert a new,
+      artificial PHI that tracks the base pointers for the derived values. E.g.
+      in:
+
+      A:
+        %Abase = load addrspace(10) *...
+        %Aderived = addrspacecast %Abase to addrspace(11)
+      B:
+        %Bbase = load addrspace(10) *...
+        %Bderived = addrspacecast %Bbase to addrspace(11)
+      C:
+        %phi = phi [%Aderived, %A
+                    %Bderived, %B]
+
+      we will insert another phi in C to track the relevant base pointers:
+
+        %philift = phi [%Abase, %A
+                        %Bbase, %B]
+
+      We then pretend, for the purposes of numbering that %phi was derived from
+      %philift. Note that in order to be able to do this, we need to be able to
+      perform this lifting either during numbering or instruction scanning.
+
+    B. Vectors of pointers/Union representations
+
+      Since this pass runs very late in the pass pipeline, it runs after the
+      various vectorization passes. As a result, we have to potentially deal
+      with vectors of gc-tracked pointers. For the purposes of most of the
+      algorithm, we simply assign every element of the vector a separate number
+      and no changes are needed. However, those parts of the algorithm that
+      look at IR need to be aware of the possibility of encountering vectors of
+      pointers.
+
+      Similarly, unions (e.g. in call returns) are represented as a struct of
+      a gc-tracked value and an argument selector. We simply assign a single
+      number to this struct and proceed as if it was a single pointer. However,
+      this again requires care at the IR level.
+
+    C. Non mem2reg'd allocas
+
+      Under some circumstances, allocas will still be present in the IR when
+      we get to this pass. We don't try very hard to handle this case, and
+      simply sink the alloca into the GCFrame.
+*/
+
+// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
+// substantially smaller here doesn't actually save much memory because of malloc overhead.
+// Too large is bad also though - 4096 was found to be a reasonable middle ground.
+using LargeSparseBitVector = SparseBitVector<4096>;
+
+struct BBState {
+    // Uses in this BB
+    // These do not get updated after local analysis
+    LargeSparseBitVector Defs;
+    LargeSparseBitVector PhiOuts;
+    LargeSparseBitVector UpExposedUses;
+    // These get updated during dataflow
+    LargeSparseBitVector LiveIn;
+    LargeSparseBitVector LiveOut;
+    SmallVector<int, 0> Safepoints;
+    int TopmostSafepoint = -1;
+    bool HasSafepoint = false;
+    // Have we gone through this basic block in our local scan yet?
+    bool Done = false;
+};
+
+struct State {
+    Function *const F;
+    DominatorTree *DT;
+
+    // The maximum assigned value number
+    int MaxPtrNumber;
+    // The maximum assigned safepoint number
+    int MaxSafepointNumber;
+    // Cache of numbers assigned to IR values. This includes caching of numbers
+    // for derived values
+    std::map<Value *, int> AllPtrNumbering;
+    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
+    // The reverse of the previous maps
+    std::map<int, Value *> ReversePtrNumbering;
+    // Neighbors in the coloring interference graph. I.e. for each value, the
+    // indices of other values that are used simultaneously at some safe point.
+    SmallVector<LargeSparseBitVector, 0> Neighbors;
+    // The result of the local analysis
+    std::map<const BasicBlock *, BBState> BBStates;
+
+    // Refinement map. If all of the values are rooted
+    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
+    // the key is already rooted (but not the other way around).
+    // A value that can be refined to -2 never need any rooting or write barrier.
+    // A value that can be refined to -1 don't need local root but still need write barrier.
+    // At the end of `LocalScan` this map has a few properties
+    // 1. Values are either < 0 or dominates the key
+    // 2. Therefore this is a DAG
+    std::map<int, SmallVector<int, 1>> Refinements;
+
+    // GC preserves map. All safepoints dominated by the map key, but not any
+    // of its uses need to preserve the values listed in the map value.
+    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
+
+    // The assignment of numbers to safepoints. The indices in the map
+    // are indices into the next three maps which store safepoint properties
+    std::map<Instruction *, int> SafepointNumbering;
+
+    // Reverse mapping index -> safepoint
+    SmallVector<Instruction *, 0> ReverseSafepointNumbering;
+
+    // Instructions that can return twice. For now, all values live at these
+    // instructions will get their own, dedicated GC frame slots, because they
+    // have unobservable control flow, so we can't be sure where they're
+    // actually live. All of these are also considered safepoints.
+    SmallVector<Instruction *, 0> ReturnsTwice;
+
+    // The set of values live at a particular safepoint
+    SmallVector< LargeSparseBitVector , 0> LiveSets;
+    // Those values that - if live out from our parent basic block - are live
+    // at this safepoint.
+    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
+    // The set of values that are kept alive by the callee.
+    SmallVector<SmallVector<int, 0>> CalleeRoots;
+    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
+    // they just get directly sunk into the root array.
+    SmallVector<AllocaInst *, 0> Allocas;
+    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
+    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
+    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
+    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
+};
+
+
+struct LateLowerGCFrame:  private JuliaPassContext {
+    function_ref<DominatorTree &()> GetDT;
+    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
+
+public:
+    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
+
+private:
+    CallInst *pgcstack;
+
+    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
+                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
+    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses);
+    void NoteUse(State &S, BBState &BBS, Value *V) {
+        NoteUse(S, BBS, V, BBS.UpExposedUses);
+    }
+
+    void LiftPhi(State &S, PHINode *Phi);
+    void LiftSelect(State &S, SelectInst *SI);
+    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
+    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
+    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
+
+    int Number(State &S, Value *V);
+    int NumberBase(State &S, Value *Base);
+    SmallVector<int, 0> NumberAll(State &S, Value *V);
+    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
+
+    void NoteOperandUses(State &S, BBState &BBS, User &UI);
+    void MaybeTrackDst(State &S, MemTransferInst *MI);
+    void MaybeTrackStore(State &S, StoreInst *I);
+    State LocalScan(Function &F);
+    void ComputeLiveness(State &S);
+    void ComputeLiveSets(State &S);
+    SmallVector<int, 0> ColorRoots(const State &S);
+    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame);
+    void PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
+    void CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified);
+    bool CleanupIR(Function &F, State *S, bool *CFGModified);
+    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
+    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
+    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
+    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
+    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
+    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
+};
+
+// The final GC lowering pass. This pass lowers platform-agnostic GC
+// intrinsics to platform-dependent instruction sequences. The
+// intrinsics it targets are those produced by the late GC frame
+// lowering pass.
+//
+// This pass targets typical back-ends for which the standard Julia
+// runtime library is available. Atypical back-ends should supply
+// their own lowering pass.
+
+struct FinalLowerGC: private JuliaPassContext {
+    bool runOnFunction(Function &F);
+
+private:
+    Function *queueRootFunc;
+    Function *smallAllocFunc;
+    Function *bigAllocFunc;
+    Function *allocTypedFunc;
+    Instruction *pgcstack;
+    Type *T_size;
+
+    // Lowers a `julia.new_gc_frame` intrinsic.
+    void lowerNewGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.push_gc_frame` intrinsic.
+    void lowerPushGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.pop_gc_frame` intrinsic.
+    void lowerPopGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.get_gc_frame_slot` intrinsic.
+    void lowerGetGCFrameSlot(CallInst *target, Function &F);
+
+    // Lowers a `julia.gc_alloc_bytes` intrinsic.
+    void lowerGCAllocBytes(CallInst *target, Function &F);
+
+    // Lowers a `julia.queue_gc_root` intrinsic.
+    void lowerQueueGCRoot(CallInst *target, Function &F);
+
+    // Lowers a `julia.safepoint` intrinsic.
+    void lowerSafepoint(CallInst *target, Function &F);
+};
+
+#endif // LLVM_GC_PASSES_H
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index 8c8e2e4546d04..5badbca807569 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -62,8 +62,8 @@ struct GCInvariantVerifier : public InstVisitor<GCInvariantVerifier> {
 };
 
 void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
-    unsigned FromAS = cast<PointerType>(I.getSrcTy())->getAddressSpace();
-    unsigned ToAS = cast<PointerType>(I.getDestTy())->getAddressSpace();
+    unsigned FromAS = I.getSrcTy()->getPointerAddressSpace();
+    unsigned ToAS = I.getDestTy()->getPointerAddressSpace();
     if (FromAS == 0)
         return;
     Check(ToAS != AddressSpace::Loaded && FromAS != AddressSpace::Loaded,
@@ -78,10 +78,10 @@ void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
 }
 
 void GCInvariantVerifier::checkStoreInst(Type *VTy, unsigned AS, Value &SI) {
-    if (VTy->isPointerTy()) {
+    if (VTy->isPtrOrPtrVectorTy()) {
         /* We currently don't obey this for arguments. That's ok - they're
            externally rooted. */
-        unsigned AS = cast<PointerType>(VTy)->getAddressSpace();
+        unsigned AS = VTy->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted &&
               AS != AddressSpace::Derived,
               "Illegal store of decayed value", &SI);
@@ -107,15 +107,15 @@ void GCInvariantVerifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
 
 void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {
     Type *Ty = LI.getType();
-    if (Ty->isPointerTy()) {
-        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isPtrOrPtrVectorTy()) {
+        unsigned AS = Ty->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted &&
               AS != AddressSpace::Derived,
               "Illegal load of gc relevant value", &LI);
     }
     Ty = LI.getPointerOperand()->getType();
-    if (Ty->isPointerTy()) {
-        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isPtrOrPtrVectorTy()) {
+        unsigned AS = Ty->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted,
               "Illegal load of callee rooted value", &LI);
     }
@@ -129,18 +129,18 @@ void GCInvariantVerifier::visitReturnInst(ReturnInst &RI) {
     if (!RI.getReturnValue())
         return;
     Type *RTy = RI.getReturnValue()->getType();
-    if (!RTy->isPointerTy())
+    if (!RTy->isPtrOrPtrVectorTy())
         return;
-    unsigned AS = cast<PointerType>(RTy)->getAddressSpace();
+    unsigned AS = RTy->getPointerAddressSpace();
     Check(!isSpecialAS(AS) || AS == AddressSpace::Tracked,
           "Only gc tracked values may be directly returned", &RI);
 }
 
 void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     Type *Ty = GEP.getType();
-    if (!Ty->isPointerTy())
+    if (!Ty->isPtrOrPtrVectorTy())
         return;
-    unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    unsigned AS = Ty->getPointerAddressSpace();
     if (!isSpecialAS(AS))
         return;
     /* We're actually ok with GEPs here, as long as they don't feed into any
@@ -170,8 +170,9 @@ void GCInvariantVerifier::visitCallInst(CallInst &CI) {
                 continue;
             }
             Type *Ty = Arg->getType();
-            Check(Ty->isPointerTy() && cast<PointerType>(Ty)->getAddressSpace() == AddressSpace::Tracked,
-                "Invalid derived pointer in jlcall", &CI);
+            Check(Ty->isPtrOrPtrVectorTy() &&
+                      Ty->getPointerAddressSpace() == AddressSpace::Tracked,
+                  "Invalid derived pointer in jlcall", &CI);
         }
     }
 }
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index e76beaa3df44f..8d80f7fd54877 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -330,10 +330,9 @@ struct JuliaLICM : public JuliaPassContext {
                     moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
                     IRBuilder<> builder(preheader->getTerminator());
                     builder.SetCurrentDebugLocation(call->getDebugLoc());
-                    auto obj_i8 = builder.CreateBitCast(call, Type::getInt8PtrTy(call->getContext(), call->getType()->getPointerAddressSpace()));
                     // Note that this alignment is assuming the GC allocates at least pointer-aligned memory
                     auto align = Align(DL.getPointerSize(0));
-                    auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
+                    auto clear_obj = builder.CreateMemSet(call, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
                     if (MSSAU.getMemorySSA()) {
                         auto clear_mdef = MSSAU.createMemoryAccessInBB(clear_obj, nullptr, clear_obj->getParent(), MemorySSA::BeforeTerminator);
                         MSSAU.insertDef(cast<MemoryDef>(clear_mdef), true);
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index b35f64be813d7..8d1d5ff73b261 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -1,365 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/BitVector.h>
-#include <llvm/ADT/SparseBitVector.h>
-#include <llvm/ADT/PostOrderIterator.h>
-#include <llvm/ADT/SetVector.h>
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/ADT/SmallSet.h>
-#include <llvm/Analysis/CFG.h>
-#include <llvm/Analysis/InstSimplifyFolder.h>
-#include <llvm/IR/Value.h>
-#include <llvm/IR/Constants.h>
-#include <llvm/IR/Dominators.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/MDBuilder.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/ModuleSlotTracker.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/BasicBlockUtils.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include <llvm/InitializePasses.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "julia_assert.h"
-#include "llvm-pass-helpers.h"
-#include <map>
-#include <string>
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "late_lower_gcroot"
 
-using namespace llvm;
-
-/* Julia GC Root Placement pass. For a general overview of the design of GC
-   root lowering, see the devdocs. This file is the actual implementation.
-
-   The actual algorithm is fairly straightforward. First recall the goal of this
-   pass:
-
-   Minimize the number of needed gc roots/stores to them subject to the constraint
-   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
-   a path after this point that contains a use of this pointer) is in some gc slot.
-
-   In particular, in order to understand this algorithm, it is important to
-   realize that the only places where rootedness matters is at safepoints.
-
-   Now, the primary phases of the algorithm are:
-
-   1. Local Scan
-
-      During this step, each Basic Block is inspected and analyzed for local
-      properties. In particular, we want to determine the ordering of any of
-      the following activities:
-
-        - Any Def of a gc-tracked pointer. In general Defs are the results of
-          calls or loads from appropriate memory locations. Phi nodes and
-          selects do complicate this story slightly as described below.
-        - Any use of a gc-tracked or derived pointer. As described in the
-          devdocs, a use is in general one of
-              a) a load from a tracked/derived value
-              b) a store to a tracked/derived value
-              c) a store OF a tracked/derived value
-              d) a use of a value as a call operand (including operand bundles)
-        - Any safepoint
-
-      Crucially, we also perform pointer numbering during the local scan,
-      assigning every Def a unique integer and caching the integer for each
-      derived pointer. This allows us to operate only on the set of Defs (
-      represented by these integers) for the rest of the algorithm. We also
-      maintain some local utility information that is needed by later passes
-      (see the BBState struct for details).
-
-    2. Dataflow Computation
-
-      This computation operates entirely over the function's control flow graph
-      and does not look into a basic block. The algorithm is essentially
-      textbook iterative data flow for liveness computation. However, the
-      data flow equations are slightly more complicated because we also
-      forward propagate rootedness information in addition to backpropagating
-      liveness.
-
-    3. Live Set Computation
-
-      With the liveness information from the previous step, we can now compute,
-      for every safepoint, the set of values live at that particular safepoint.
-      There are three pieces of information being combined here:
-           i. Values that needed to be live due to local analysis (e.g. there
-              was a def, then a safepoint, then a use). This was computed during
-              local analysis.
-          ii. Values that are live across the basic block (i.e. they are live
-              at every safepoint within the basic block). This relies entirely
-              on the liveness information.
-         iii. Values that are now live-out from the basic block (i.e. they are
-              live at every safepoint following their def). During local
-              analysis, we keep, for every safepoint, those values that would
-              be live if they were live out. Here we can check if they are
-              actually live-out and make the appropriate additions to the live
-              set.
-
-       Lastly, we also explicitly compute, for each value, the list of values
-       that are simultaneously live at some safepoint. This is known as an
-       "interference graph" and is the input to the next step.
-
-    4. GC Root coloring
-
-      Two values which are not simultaneously live at a safepoint can share the
-      same slot. This is an important optimization, because otherwise long
-      functions would have exceptionally large GC slots, reducing performance
-      and bloating the size of the stack. Assigning values to these slots is
-      equivalent to doing graph coloring on the interference graph - the graph
-      where nodes are values and two values have an edge if they are
-      simultaneously live at a safepoint - which we computed in the previous
-      step. Now graph coloring in general is a hard problem. However, for SSA
-      form programs, (and most programs in general, by virtue of their
-      structure), the resulting interference graphs are chordal and can be
-      colored optimally in linear time by performing greedy coloring in a
-      perfect elimination order. Now, our interference graphs are likely not
-      entirely chordal due to some non-SSA corner cases. However, using the same
-      algorithm should still give a very good coloring while having sufficiently
-      low runtime.
-
-    5. JLCall frame optimizations
-
-      Unlike earlier iterations of the gc root placement logic, jlcall frames
-      are no longer treated as a special case and need not necessarily be sunk
-      into the gc frame. Additionally, we now emit lifetime
-      intrinsics, so regular stack slot coloring will merge any jlcall frames
-      not sunk into the gc frame. Nevertheless performing such sinking can still
-      be profitable. Since all arguments to a jlcall are guaranteed to be live
-      at that call in some gc slot, we can attempt to rearrange the slots within
-      the gc-frame, or reuse slots not assigned at that particular location
-      for the gcframe. However, even without this optimization, stack frames
-      are at most two times larger than optimal (because regular stack coloring
-      can merge the jlcall allocas).
-
-      N.B.: This step is not yet implemented.
-
-    6. Root placement
-
-      This performs the actual insertion of the GCFrame pushes/pops, zeros out
-      the gc frame and creates the stores to the gc frame according to the
-      stack slot assignment computed in the previous step. GC frames stores
-      are generally sunk right before the first safe point that use them
-      (this is beneficial for code where the primary path does not have
-      safepoints, but some other path - e.g. the error path does). However,
-      if the first safepoint is not dominated by the definition (this can
-      happen due to the non-ssa corner cases), the store is inserted right after
-      the definition.
-
-    7. Cleanup
-
-      This step performs necessary cleanup before passing the IR to codegen. In
-      particular, it removes any calls to julia_from_objref intrinsics and
-      removes the extra operand bundles from ccalls. In the future it could
-      also strip the addrspace information from all values as this
-      information is no longer needed.
-
-
-  There are a couple important special cases that deserve special attention:
-
-    A. PHIs and Selects
-
-      In general PHIs and selects are treated as separate defs for the purposes
-      of the algorithm and their operands as uses of those values. It is
-      important to consider however WHERE the uses of PHI's operands are
-      located. It is neither at the start of the basic block, because the values
-      do not dominate the block (so can't really consider them live-in), nor
-      at the end of the predecessor (because they are actually live out).
-      Instead it is best to think of those uses as living on the edge between
-      the appropriate predecessor and the block containing the PHI.
-
-      Another concern is PHIs of derived values. Since we cannot simply root
-      these values by storing them to a GC slot, we need to insert a new,
-      artificial PHI that tracks the base pointers for the derived values. E.g.
-      in:
-
-      A:
-        %Abase = load addrspace(10) *...
-        %Aderived = addrspacecast %Abase to addrspace(11)
-      B:
-        %Bbase = load addrspace(10) *...
-        %Bderived = addrspacecast %Bbase to addrspace(11)
-      C:
-        %phi = phi [%Aderived, %A
-                    %Bderived, %B]
-
-      we will insert another phi in C to track the relevant base pointers:
-
-        %philift = phi [%Abase, %A
-                        %Bbase, %B]
-
-      We then pretend, for the purposes of numbering that %phi was derived from
-      %philift. Note that in order to be able to do this, we need to be able to
-      perform this lifting either during numbering or instruction scanning.
-
-    B. Vectors of pointers/Union representations
-
-      Since this pass runs very late in the pass pipeline, it runs after the
-      various vectorization passes. As a result, we have to potentially deal
-      with vectors of gc-tracked pointers. For the purposes of most of the
-      algorithm, we simply assign every element of the vector a separate number
-      and no changes are needed. However, those parts of the algorithm that
-      look at IR need to be aware of the possibility of encountering vectors of
-      pointers.
-
-      Similarly, unions (e.g. in call returns) are represented as a struct of
-      a gc-tracked value and an argument selector. We simply assign a single
-      number to this struct and proceed as if it was a single pointer. However,
-      this again requires care at the IR level.
-
-    C. Non mem2reg'd allocas
-
-      Under some circumstances, allocas will still be present in the IR when
-      we get to this pass. We don't try very hard to handle this case, and
-      simply sink the alloca into the GCFrame.
-*/
-
-// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
-// substantially smaller here doesn't actually save much memory because of malloc overhead.
-// Too large is bad also though - 4096 was found to be a reasonable middle ground.
-using LargeSparseBitVector = SparseBitVector<4096>;
-
-struct BBState {
-    // Uses in this BB
-    // These do not get updated after local analysis
-    LargeSparseBitVector Defs;
-    LargeSparseBitVector PhiOuts;
-    LargeSparseBitVector UpExposedUses;
-    // These get updated during dataflow
-    LargeSparseBitVector LiveIn;
-    LargeSparseBitVector LiveOut;
-    SmallVector<int, 0> Safepoints;
-    int TopmostSafepoint = -1;
-    bool HasSafepoint = false;
-    // Have we gone through this basic block in our local scan yet?
-    bool Done = false;
-};
-
-struct State {
-    Function *const F;
-    DominatorTree *DT;
-
-    // The maximum assigned value number
-    int MaxPtrNumber;
-    // The maximum assigned safepoint number
-    int MaxSafepointNumber;
-    // Cache of numbers assigned to IR values. This includes caching of numbers
-    // for derived values
-    std::map<Value *, int> AllPtrNumbering;
-    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
-    // The reverse of the previous maps
-    std::map<int, Value *> ReversePtrNumbering;
-    // Neighbors in the coloring interference graph. I.e. for each value, the
-    // indices of other values that are used simultaneously at some safe point.
-    SmallVector<LargeSparseBitVector, 0> Neighbors;
-    // The result of the local analysis
-    std::map<const BasicBlock *, BBState> BBStates;
-
-    // Refinement map. If all of the values are rooted
-    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
-    // the key is already rooted (but not the other way around).
-    // A value that can be refined to -2 never need any rooting or write barrier.
-    // A value that can be refined to -1 don't need local root but still need write barrier.
-    // At the end of `LocalScan` this map has a few properties
-    // 1. Values are either < 0 or dominates the key
-    // 2. Therefore this is a DAG
-    std::map<int, SmallVector<int, 1>> Refinements;
-
-    // GC preserves map. All safepoints dominated by the map key, but not any
-    // of its uses need to preserve the values listed in the map value.
-    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
-
-    // The assignment of numbers to safepoints. The indices in the map
-    // are indices into the next three maps which store safepoint properties
-    std::map<Instruction *, int> SafepointNumbering;
-
-    // Reverse mapping index -> safepoint
-    SmallVector<Instruction *, 0> ReverseSafepointNumbering;
-
-    // Instructions that can return twice. For now, all values live at these
-    // instructions will get their own, dedicated GC frame slots, because they
-    // have unobservable control flow, so we can't be sure where they're
-    // actually live. All of these are also considered safepoints.
-    SmallVector<Instruction *, 0> ReturnsTwice;
-
-    // The set of values live at a particular safepoint
-    SmallVector< LargeSparseBitVector , 0> LiveSets;
-    // Those values that - if live out from our parent basic block - are live
-    // at this safepoint.
-    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
-    // The set of values that are kept alive by the callee.
-    SmallVector<SmallVector<int, 0>> CalleeRoots;
-    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
-    // they just get directly sunk into the root array.
-    SmallVector<AllocaInst *, 0> Allocas;
-    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
-    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
-    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
-    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
-};
-
-
-struct LateLowerGCFrame:  private JuliaPassContext {
-    function_ref<DominatorTree &()> GetDT;
-    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
-
-public:
-    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
-
-private:
-    CallInst *pgcstack;
-
-    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
-                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
-    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses);
-    void NoteUse(State &S, BBState &BBS, Value *V) {
-        NoteUse(S, BBS, V, BBS.UpExposedUses);
-    }
-
-    void LiftPhi(State &S, PHINode *Phi);
-    void LiftSelect(State &S, SelectInst *SI);
-    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
-    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
-    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
-
-    int Number(State &S, Value *V);
-    int NumberBase(State &S, Value *Base);
-    SmallVector<int, 0> NumberAll(State &S, Value *V);
-    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
-
-    void NoteOperandUses(State &S, BBState &BBS, User &UI);
-    void MaybeTrackDst(State &S, MemTransferInst *MI);
-    void MaybeTrackStore(State &S, StoreInst *I);
-    State LocalScan(Function &F);
-    void ComputeLiveness(State &S);
-    void ComputeLiveSets(State &S);
-    SmallVector<int, 0> ColorRoots(const State &S);
-    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
-    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame);
-    void PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
-    bool CleanupIR(Function &F, State *S, bool *CFGModified);
-    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
-    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
-    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
-    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
-    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
-    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
-};
-
 static unsigned getValueAddrSpace(Value *V) {
     return V->getType()->getPointerAddressSpace();
 }
@@ -510,7 +154,7 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
                     // This could really be anything, but it's not loaded
                     // from a tracked pointer, so it doesn't matter what
                     // it is--just pick something simple.
-                    CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
+                    CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                 }
                 continue;
             }
@@ -545,12 +189,12 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
                         if (II->getIntrinsicID() == Intrinsic::masked_load) {
                             fld_idx = -1;
                             if (!isSpecialPtr(CurrentV->getType())) {
-                                CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
+                                CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                             }
                         } else {
                             if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
                                 if (!isSpecialPtr(VTy2->getElementType())) {
-                                    CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
+                                    CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                                     fld_idx = -1;
                                 }
                             }
@@ -706,11 +350,7 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
                     ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
                     "", SI);
         }
-        if (FalseElem->getType() != TrueElem->getType()) {
-            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-            assert(FalseElem->getContext().supportsTypedPointers());
-            FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
-        }
+        assert(FalseElem->getType() == TrueElem->getType());
         SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[SelectBase] = Number;
@@ -779,29 +419,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
                 BaseElem = Base;
             else
                 BaseElem = IncomingBases[i];
-            if (BaseElem->getType() != T_prjlvalue) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(BaseElem->getContext().supportsTypedPointers());
-                auto &remap = CastedRoots[i][BaseElem];
-                if (!remap) {
-                    if (auto constant = dyn_cast<Constant>(BaseElem)) {
-                        remap = ConstantExpr::getBitCast(constant, T_prjlvalue, "");
-                    } else {
-                        Instruction *InsertBefore;
-                        if (auto arg = dyn_cast<Argument>(BaseElem)) {
-                            InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt();
-                        } else {
-                            assert(isa<Instruction>(BaseElem) && "Unknown value type detected!");
-                            InsertBefore = cast<Instruction>(BaseElem)->getNextNonDebugInstruction();
-                        }
-                        while (isa<PHINode>(InsertBefore)) {
-                            InsertBefore = InsertBefore->getNextNonDebugInstruction();
-                        }
-                        remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore);
-                    }
-                }
-                BaseElem = remap;
-            }
+            assert(BaseElem->getType() == T_prjlvalue);
             lift->addIncoming(BaseElem, IncomingBB);
         }
     }
@@ -1538,7 +1156,9 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 }
                 if (CI->hasStructRetAttr()) {
                     Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
+                    #if JL_LLVM_VERSION < 170000
                     assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
+                    #endif
                     auto tracked = CountTrackedPointers(ElT, true);
                     if (tracked.count) {
                         AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
@@ -1621,7 +1241,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                         callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
                         callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
                         callee->getName() == XSTR(jl_lock_field) || callee->getName() == XSTR(jl_unlock_field) ||
-                        callee == write_barrier_func || callee == gc_loaded_func ||
+                        callee == write_barrier_func || callee == gc_loaded_func || callee == pop_handler_noexcept_func ||
                         callee->getName() == "memcmp") {
                         continue;
                     }
@@ -1874,12 +1494,11 @@ SmallVector<Value*, 0> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, I
     return Ptrs;
 }
 
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
+unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
     auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
     for (unsigned i = 0; i < Ptrs.size(); ++i) {
-        Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
-        Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
-        assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
+        Value *Elem = Ptrs[i];
+        Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*));
         StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
         shadowStore->setOrdering(AtomicOrdering::NotAtomic);
         // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
@@ -2244,9 +1863,7 @@ SmallVector<int, 0> LateLowerGCFrame::ColorRoots(const State &S) {
 Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V)
 {
     assert(T == T_size || isa<PointerType>(T));
-    auto TV = cast<PointerType>(V->getType());
-    auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
-    return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr");
+    return builder.CreateInBoundsGEP(T, V, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr");
 }
 
 Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V)
@@ -2309,6 +1926,59 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
     return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
 }
 
+void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
+    auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
+    for (auto CI : WriteBarriers) {
+        auto parent = CI->getArgOperand(0);
+        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
+                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
+            CI->eraseFromParent();
+            continue;
+        }
+        if (CFGModified) {
+            *CFGModified = true;
+        }
+        auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level");
+        int debug_info = 1;
+        if (DebugInfoMeta != nullptr) {
+            debug_info = cast<ConstantInt>(cast<ConstantAsMetadata>(DebugInfoMeta)->getValue())->getZExtValue();
+        }
+
+        IRBuilder<> builder(CI);
+        builder.SetCurrentDebugLocation(CI->getDebugLoc());
+        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED);
+        setName(parBits, "parent_bits", debug_info);
+        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED));
+        setName(parOldMarked, "parent_old_marked", debug_info);
+        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
+        builder.SetInsertPoint(mayTrigTerm);
+        setName(mayTrigTerm->getParent(), "may_trigger_wb", debug_info);
+        Value *anyChldNotMarked = NULL;
+        for (unsigned i = 1; i < CI->arg_size(); i++) {
+            Value *child = CI->getArgOperand(i);
+            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED);
+            setName(chldBit, "child_bit", debug_info);
+            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0),"child_not_marked");
+            setName(chldNotMarked, "child_not_marked", debug_info);
+            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
+        }
+        assert(anyChldNotMarked); // handled by all_of test above
+        MDBuilder MDB(parent->getContext());
+        SmallVector<uint32_t, 2> Weights{1, 9};
+        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
+                                                  MDB.createBranchWeights(Weights));
+        setName(trigTerm->getParent(), "trigger_wb", debug_info);
+        builder.SetInsertPoint(trigTerm);
+        if (CI->getCalledOperand() == write_barrier_func) {
+            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
+        }
+        else {
+            assert(false);
+        }
+        CI->eraseFromParent();
+    }
+}
+
 bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     auto T_int32 = Type::getInt32Ty(F.getContext());
     auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
@@ -2426,8 +2096,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 // the type tag. (Note that if the size is not a constant, it will call
                 // gc_alloc_obj, and will redundantly set the tag.)
                 auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
-                auto ptlsLoad = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
-                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
+                auto ptls = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
                 auto newI = builder.CreateCall(
                     allocBytesIntrinsic,
                     {
@@ -2508,7 +2177,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     // the julia.call signature is varargs, the optimizer is allowed
                     // to rewrite pointee types. It'll go away with opaque pointer
                     // types anyway.
-                    Builder.CreateAlignedStore(Builder.CreateBitCast(*arg_it, T_prjlvalue),
+                    Builder.CreateAlignedStore(*arg_it,
                             Builder.CreateInBoundsGEP(T_prjlvalue, Frame, ConstantInt::get(T_int32, slot++)),
                             Align(sizeof(void*)));
                 }
@@ -2555,55 +2224,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
             ChangesMade = true;
         }
     }
-    for (auto CI : write_barriers) {
-        auto parent = CI->getArgOperand(0);
-        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
-                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
-            CI->eraseFromParent();
-            continue;
-        }
-        if (CFGModified) {
-            *CFGModified = true;
-        }
-        auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level");
-        int debug_info = 1;
-        if (DebugInfoMeta != nullptr) {
-            debug_info = cast<ConstantInt>(cast<ConstantAsMetadata>(DebugInfoMeta)->getValue())->getZExtValue();
-        }
-
-        IRBuilder<> builder(CI);
-        builder.SetCurrentDebugLocation(CI->getDebugLoc());
-        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED);
-        setName(parBits, "parent_bits", debug_info);
-        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED));
-        setName(parOldMarked, "parent_old_marked", debug_info);
-        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
-        builder.SetInsertPoint(mayTrigTerm);
-        setName(mayTrigTerm->getParent(), "may_trigger_wb", debug_info);
-        Value *anyChldNotMarked = NULL;
-        for (unsigned i = 1; i < CI->arg_size(); i++) {
-            Value *child = CI->getArgOperand(i);
-            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED);
-            setName(chldBit, "child_bit", debug_info);
-            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0),"child_not_marked");
-            setName(chldNotMarked, "child_not_marked", debug_info);
-            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
-        }
-        assert(anyChldNotMarked); // handled by all_of test above
-        MDBuilder MDB(parent->getContext());
-        SmallVector<uint32_t, 2> Weights{1, 9};
-        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
-                                                  MDB.createBranchWeights(Weights));
-        setName(trigTerm->getParent(), "trigger_wb", debug_info);
-        builder.SetInsertPoint(trigTerm);
-        if (CI->getCalledOperand() == write_barrier_func) {
-            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
-        }
-        else {
-            assert(false);
-        }
-        CI->eraseFromParent();
-    }
+    CleanupWriteBarriers(F, S, write_barriers, CFGModified);
     if (maxframeargs == 0 && Frame) {
         Frame->eraseFromParent();
     }
@@ -2661,11 +2282,7 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
     // Pointee types don't have semantics, so the optimizer is
     // free to rewrite them if convenient. We need to change
     // it back here for the store.
-    if (Val->getType() != T_prjlvalue) {
-        // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-        assert(Val->getContext().supportsTypedPointers());
-        Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
-    }
+    assert(Val->getType() == T_prjlvalue);
     new StoreInst(Val, slotAddress, InsertBefore);
 }
 
@@ -2745,14 +2362,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
             for (CallInst *II : ToDelete) {
                 II->eraseFromParent();
             }
-            if (slotAddress->getType() != AI->getType()) {
-                // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(slotAddress->getContext().supportsTypedPointers());
-                auto BCI  = new BitCastInst(slotAddress, AI->getType());
-                BCI->insertAfter(slotAddress);
-                slotAddress = BCI;
-            }
+            assert(slotAddress->getType() == AI->getType());
             AI->replaceAllUsesWith(slotAddress);
             AI->eraseFromParent();
             AI = NULL;
@@ -2777,11 +2387,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
                 slotAddress->insertAfter(gcframe);
                 auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
                 auto Elem = MaybeExtractScalar(S, ValExpr, SI);
-                if (Elem->getType() != T_prjlvalue) {
-                    // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                    assert(Elem->getContext().supportsTypedPointers());
-                    Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
-                }
+                assert(Elem->getType() == T_prjlvalue);
                 //auto Idxs = ArrayRef<unsigned>(Tracked[i]);
                 //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
                 Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 15866d0855fc1..e09ea892ee488 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -8,7 +8,11 @@
 
 #include <llvm/ADT/DepthFirstIterator.h>
 #include <llvm/ADT/Statistic.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Triple.h>
+#else
 #include <llvm/ADT/Triple.h>
+#endif
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/Constants.h>
@@ -37,13 +41,14 @@ using namespace llvm;
 /* Lowers Julia Exception Handlers and colors EH frames.
  *
  *  Our task is to lower:
- * call void @julia.except_enter()
+ * call {i32, ptr} @julia.except_enter(ct)
  * <...>
  * call void jl_pop_handler(1)
  *
  * to
  *
- * call void @jl_enter_handler(jl_handler *%buff)
+ * call void @jl_enter_handler(ct, jl_handler *%buff)
+ * call i32 @jl_setjmp(jmpbuf[] %buff, 0)
  * <...>
  * call void jl_pop_handler(1)
  *
@@ -81,25 +86,18 @@ namespace {
  * If the module doesn't have declarations for the jl_enter_handler and setjmp
  * functions, insert them.
  */
-static void ensure_enter_function(Module &M, const Triple &TT)
+static void ensure_enter_function(Module &M, Type *T_pjlvalue, const Triple &TT)
 {
-    auto T_int8  = Type::getInt8Ty(M.getContext());
-    auto T_pint8 = PointerType::get(T_int8, 0);
+    auto T_ptr = PointerType::get(M.getContext(), 0);
     auto T_void = Type::getVoidTy(M.getContext());
     auto T_int32 = Type::getInt32Ty(M.getContext());
     if (!M.getNamedValue(XSTR(jl_enter_handler))) {
-        SmallVector<Type*, 0> ehargs(0);
-        ehargs.push_back(T_pint8);
-        Function::Create(FunctionType::get(T_void, ehargs, false),
+        Function::Create(FunctionType::get(T_void, {T_pjlvalue, T_ptr}, false),
                          Function::ExternalLinkage, XSTR(jl_enter_handler), &M);
     }
     if (!M.getNamedValue(jl_setjmp_name)) {
-        SmallVector<Type*, 0> args2(0);
-        args2.push_back(T_pint8);
-        if (!TT.isOSWindows()) {
-            args2.push_back(T_int32);
-        }
-        Function::Create(FunctionType::get(T_int32, args2, false),
+        Type *args2[] = {T_ptr, T_int32};
+        Function::Create(FunctionType::get(T_int32, ArrayRef(args2, TT.isOSWindows() ? 1 : 2), false),
                          Function::ExternalLinkage, jl_setjmp_name, &M)
             ->addFnAttr(Attribute::ReturnsTwice);
     }
@@ -111,14 +109,14 @@ static bool lowerExcHandlers(Function &F) {
     Function *except_enter_func = M.getFunction("julia.except_enter");
     if (!except_enter_func)
         return false; // No EH frames in this module
-    ensure_enter_function(M, TT);
+    ensure_enter_function(M, except_enter_func->getFunctionType()->getParamType(0), TT);
     Function *leave_func = M.getFunction(XSTR(jl_pop_handler));
+    Function *leave_noexcept_func = M.getFunction(XSTR(jl_pop_handler_noexcept));
     Function *jlenter_func = M.getFunction(XSTR(jl_enter_handler));
     Function *setjmp_func = M.getFunction(jl_setjmp_name);
-
-    auto T_pint8 = Type::getInt8PtrTy(M.getContext(), 0);
-    Function *lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
-    Function *lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
+    auto T_ptr = PointerType::get(M.getContext(), 0);
+    Function *lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_ptr });
+    Function *lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_ptr });
 
     /* Step 1: EH Depth Numbering */
     std::map<llvm::CallInst *, int> EnterDepth;
@@ -150,9 +148,9 @@ static bool lowerExcHandlers(Function &F) {
                 continue;
             if (Callee == except_enter_func)
                 EnterDepth[CI] = Depth++;
-            else if (Callee == leave_func) {
+            else if (Callee == leave_func || Callee == leave_noexcept_func) {
                 LeaveDepth[CI] = Depth;
-                Depth -= cast<ConstantInt>(CI->getArgOperand(0))->getLimitedValue();
+                Depth -= cast<ConstantInt>(CI->getArgOperand(1))->getLimitedValue();
             }
             assert(Depth >= 0);
             if (Depth > MaxDepth)
@@ -179,7 +177,7 @@ static bool lowerExcHandlers(Function &F) {
         auto *buff = new AllocaInst(Type::getInt8Ty(F.getContext()), allocaAddressSpace,
                 handler_sz, Align(16), "", firstInst);
         if (allocaAddressSpace) {
-            AddrSpaceCastInst *buff_casted = new AddrSpaceCastInst(buff, Type::getInt8PtrTy(F.getContext(), AddressSpace::Generic));
+            AddrSpaceCastInst *buff_casted = new AddrSpaceCastInst(buff, PointerType::get(F.getContext(), AddressSpace::Generic));
             buff_casted->insertAfter(buff);
             buffs.push_back(buff_casted);
         } else {
@@ -192,7 +190,7 @@ static bool lowerExcHandlers(Function &F) {
         assert(it.second >= 0);
         Instruction *buff = buffs[it.second];
         CallInst *enter = it.first;
-        auto new_enter = CallInst::Create(jlenter_func, buff, "", enter);
+        auto new_enter = CallInst::Create(jlenter_func, {enter->getArgOperand(0), buff}, "", enter);
         Value *lifetime_args[] = {
             handler_sz64,
             buff
@@ -200,10 +198,7 @@ static bool lowerExcHandlers(Function &F) {
         CallInst::Create(lifetime_start, lifetime_args, "", new_enter);
         CallInst *sj;
         if (!TT.isOSWindows()) {
-            // For LLVM 3.3 compatibility
-            Value *args[] = {buff,
-                            ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-            sj = CallInst::Create(setjmp_func, args, "", enter);
+            sj = CallInst::Create(setjmp_func, {buff, ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)}, "", enter);
         } else {
             sj = CallInst::Create(setjmp_func, buff, "", enter);
         }
@@ -213,13 +208,31 @@ static bool lowerExcHandlers(Function &F) {
             new_enter->setMetadata(LLVMContext::MD_dbg, dbg);
             sj->setMetadata(LLVMContext::MD_dbg, dbg);
         }
-        enter->replaceAllUsesWith(sj);
+        SmallVector<Instruction*> ToErase;
+        for (auto *U : enter->users()) {
+            if (auto *EEI = dyn_cast<ExtractValueInst>(U)) {
+                if (EEI->getNumIndices() == 1) {
+                    if (EEI->getIndices()[0] == 0)
+                        EEI->replaceAllUsesWith(sj);
+                    else
+                        EEI->replaceAllUsesWith(buff);
+                    ToErase.push_back(EEI);
+                }
+            }
+        }
+        for (auto *EEI : ToErase)
+            EEI->eraseFromParent();
+        if (!enter->use_empty()) {
+            Value *agg = InsertValueInst::Create(UndefValue::get(enter->getType()), sj, ArrayRef<unsigned>(0), "", enter);
+            agg = InsertValueInst::Create(agg, buff, ArrayRef<unsigned>(1), "", enter);
+            enter->replaceAllUsesWith(agg);
+        }
         enter->eraseFromParent();
     }
     // Insert lifetime end intrinsics after every leave.
     for (auto it : LeaveDepth) {
         int StartDepth = it.second - 1;
-        int npops = cast<ConstantInt>(it.first->getArgOperand(0))->getLimitedValue();
+        int npops = cast<ConstantInt>(it.first->getArgOperand(1))->getLimitedValue();
         for (int i = 0; i < npops; ++i) {
             assert(StartDepth-i >= 0);
             Value *lifetime_args[] = {
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index d1c3f9bc464eb..d544f182637b9 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -14,7 +14,11 @@
 #include <llvm/Pass.h>
 #include <llvm/ADT/BitVector.h>
 #include <llvm/ADT/Statistic.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Triple.h>
+#else
 #include <llvm/ADT/Triple.h>
+#endif
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -96,11 +100,11 @@ static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_vecca
                 }
                 if (auto callee = call->getCalledFunction()) {
                     auto name = callee->getName();
-                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
+                    if (name.starts_with("llvm.muladd.") || name.starts_with("llvm.fma.")) {
                         flag |= JL_TARGET_CLONE_MATH;
                     }
-                    else if (name.startswith("julia.cpu.")) {
-                        if (name.startswith("julia.cpu.have_fma.")) {
+                    else if (name.starts_with("julia.cpu.")) {
+                        if (name.starts_with("julia.cpu.have_fma.")) {
                             // for some platforms we know they always do (or don't) support
                             // FMA. in those cases we don't need to clone the function.
                             // always_have_fma returns an optional<bool>
@@ -673,6 +677,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
     trampoline->removeFnAttr("julia.mv.reloc");
     trampoline->removeFnAttr("julia.mv.clones");
     trampoline->addFnAttr("julia.mv.alias");
+    trampoline->setDLLStorageClass(alias->getDLLStorageClass());
     alias->eraseFromParent();
 
     uint32_t id;
@@ -754,7 +759,7 @@ std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F) const
     if (F->isDeclaration()) {
         auto extern_decl = extern_relocs.find(F);
         assert(extern_decl != extern_relocs.end() && "Missing extern relocation slot!");
-        return {(uint32_t)-1, extern_decl->second};
+        return {UINT32_MAX, extern_decl->second};
     }
     else {
         auto id = get_func_id(F);
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index 9c62a8a5711a5..cc6c73161968d 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -8,6 +8,7 @@
 #include "llvm-version.h"
 
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/DerivedTypes.h"
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Module.h>
@@ -27,7 +28,7 @@ JuliaPassContext::JuliaPassContext()
         pgcstack_getter(nullptr), adoptthread_func(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
         pointer_from_objref_func(nullptr), gc_loaded_func(nullptr), alloc_obj_func(nullptr),
-        typeof_func(nullptr), write_barrier_func(nullptr),
+        typeof_func(nullptr), write_barrier_func(nullptr), pop_handler_noexcept_func(nullptr),
         call_func(nullptr), call2_func(nullptr), call3_func(nullptr), module(nullptr)
 {
 }
@@ -53,6 +54,7 @@ void JuliaPassContext::initFunctions(Module &M)
     typeof_func = M.getFunction("julia.typeof");
     write_barrier_func = M.getFunction("julia.write_barrier");
     alloc_obj_func = M.getFunction("julia.gc_alloc_obj");
+    pop_handler_noexcept_func = M.getFunction(XSTR(jl_pop_handler_noexcept));
     call_func = M.getFunction("julia.call");
     call2_func = M.getFunction("julia.call2");
     call3_func = M.getFunction("julia.call3");
@@ -161,7 +163,7 @@ namespace jl_intrinsics {
             auto intrinsic = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx),
+                    { PointerType::get(ctx, 0),
                         T_size,
                         T_size }, // type
                     false),
@@ -256,8 +258,8 @@ namespace jl_intrinsics {
 }
 
 namespace jl_well_known {
-    static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc_instrumented);
-    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc_instrumented);
+    static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
+    static const char *GC_SMALL_ALLOC_NAME = XSTR(jl_gc_small_alloc);
     static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
     static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);
 
@@ -271,7 +273,7 @@ namespace jl_well_known {
             auto bigAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), T_size , T_size},
+                    { PointerType::get(ctx, 0), T_size , T_size},
                     false),
                 Function::ExternalLinkage,
                 GC_BIG_ALLOC_NAME);
@@ -279,20 +281,20 @@ namespace jl_well_known {
             return addGCAllocAttributes(bigAllocFunc);
         });
 
-    const WellKnownFunctionDescription GCPoolAlloc(
-        GC_POOL_ALLOC_NAME,
+    const WellKnownFunctionDescription GCSmallAlloc(
+        GC_SMALL_ALLOC_NAME,
         [](Type *T_size) {
             auto &ctx = T_size->getContext();
             auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
-            auto poolAllocFunc = Function::Create(
+            auto smallAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx), T_size },
+                    { PointerType::get(ctx, 0), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
-                GC_POOL_ALLOC_NAME);
-            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
-            return addGCAllocAttributes(poolAllocFunc);
+                GC_SMALL_ALLOC_NAME);
+            smallAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
+            return addGCAllocAttributes(smallAllocFunc);
         });
 
     const WellKnownFunctionDescription GCQueueRoot(
@@ -323,7 +325,7 @@ namespace jl_well_known {
             auto allocTypedFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx),
+                    { PointerType::get(ctx, 0),
                         T_size,
                         T_size }, // type
                     false),
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 346500df51ca1..d46f1f46634e6 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -60,6 +60,7 @@ struct JuliaPassContext {
     llvm::Function *alloc_obj_func;
     llvm::Function *typeof_func;
     llvm::Function *write_barrier_func;
+    llvm::Function *pop_handler_noexcept_func;
     llvm::Function *call_func;
     llvm::Function *call2_func;
     llvm::Function *call3_func;
@@ -146,8 +147,8 @@ namespace jl_well_known {
     // `jl_gc_big_alloc`: allocates bytes.
     extern const WellKnownFunctionDescription GCBigAlloc;
 
-    // `jl_gc_pool_alloc`: allocates bytes.
-    extern const WellKnownFunctionDescription GCPoolAlloc;
+    // `jl_gc_small_alloc`: allocates bytes.
+    extern const WellKnownFunctionDescription GCSmallAlloc;
 
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index cc7dace28b24e..4e5a2ee5e0d54 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -60,7 +60,7 @@ struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrsp
 };
 
 static unsigned getValueAddrSpace(Value *V) {
-    return cast<PointerType>(V->getType())->getAddressSpace();
+    return V->getType()->getPointerAddressSpace();
 }
 
 static bool isSpecialAS(unsigned AS) {
@@ -139,7 +139,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
                 break;
             } else {
                 // Ok, we've reached a leaf - check if it is eligible for lifting
-                if (!CurrentV->getType()->isPointerTy() ||
+                if (!CurrentV->getType()->isPtrOrPtrVectorTy() ||
                     isSpecialAS(getValueAddrSpace(CurrentV))) {
                     // If not, poison all (recursive) users of this value, to prevent
                     // looking at them again in future iterations.
@@ -163,14 +163,22 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
             Instruction *InstV = cast<Instruction>(V);
             Instruction *NewV = InstV->clone();
             ToInsert.push_back(std::make_pair(NewV, InstV));
+            #if JL_LLVM_VERSION >= 170000
+            Type *NewRetTy = PointerType::get(InstV->getType(), allocaAddressSpace);
+            #else
             Type *NewRetTy = PointerType::getWithSamePointeeType(cast<PointerType>(InstV->getType()), allocaAddressSpace);
+            #endif
             NewV->mutateType(NewRetTy);
             LiftingMap[InstV] = NewV;
             ToRevisit.push_back(NewV);
         }
     }
     auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * {
+        #if JL_LLVM_VERSION >= 170000
+        PointerType *TargetType = PointerType::get(CurrentV->getType(), allocaAddressSpace);
+        #else
         PointerType *TargetType = PointerType::getWithSamePointeeType(cast<PointerType>(CurrentV->getType()), allocaAddressSpace);
+        #endif
         while (!LiftingMap.count(CurrentV)) {
             if (isa<BitCastInst>(CurrentV))
                 CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
@@ -184,6 +192,9 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
         }
         if (LiftingMap.count(CurrentV))
             CurrentV = LiftingMap[CurrentV];
+        #if JL_LLVM_VERSION >= 170000
+        assert(CurrentV->getType() == TargetType);
+        #else
         if (CurrentV->getType() != TargetType) {
             // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
             assert(CurrentV->getContext().supportsTypedPointers());
@@ -191,6 +202,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
             ToInsert.push_back(std::make_pair(BCI, InsertPt));
             CurrentV = BCI;
         }
+        #endif
         return CurrentV;
     };
 
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index d2650d6875cd4..488dd46cade21 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -9,7 +9,11 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Triple.h>
+#else
 #include <llvm/ADT/Triple.h>
+#endif
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -91,12 +95,12 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
         if (offset) {
             SmallVector<Type*, 0> args(0);
             args.push_back(offset->getType());
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false),
+            auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), args, false),
                                      dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
             tls = builder.CreateCall(tp, {offset}, "pgcstack");
         }
         else {
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false),
+            auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false),
                                      const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
                                      false);
             tls = builder.CreateCall(tp, {}, "tls_pgcstack");
@@ -122,11 +126,10 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
         }
         if (!offset)
             offset = ConstantInt::getSigned(T_size, jl_tls_offset);
-        auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false);
+        auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false), asm_str, "=r", false);
         tls = builder.CreateCall(tp, {}, "thread_ptr");
-        tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
+        tls = builder.CreateInBoundsGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
     }
-    tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo());
     return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack");
 }
 
@@ -188,7 +191,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         builder.SetInsertPoint(fastTerm->getParent());
         fastTerm->removeFromParent();
         MDNode *tbaa = tbaa_gcframe;
-        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
+        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
         builder.Insert(fastTerm);
         phi->addIncoming(pgcstack, fastTerm->getParent());
         // emit pre-return cleanup
@@ -200,7 +203,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             for (auto &BB : *pgcstack->getParent()->getParent()) {
                 if (isa<ReturnInst>(BB.getTerminator())) {
                     builder.SetInsertPoint(BB.getTerminator());
-                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
+                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
                 }
             }
         }
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index e291e47b59b0e..89ae1d292d108 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -44,6 +44,9 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
 
         DstTy = SrcTy;
         if (auto Ty = dyn_cast<PointerType>(SrcTy)) {
+            #if JL_LLVM_VERSION >= 170000
+            DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
+            #else
             if (Ty->isOpaque()) {
                 DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
             }
@@ -53,6 +56,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
                         remapType(Ty->getNonOpaquePointerElementType()),
                         ASRemapper(Ty->getAddressSpace()));
             }
+            #endif
         }
         else if (auto Ty = dyn_cast<FunctionType>(SrcTy)) {
             SmallVector<Type *, 4> Params;
@@ -153,6 +157,10 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                     Ops.push_back(NewOp ? cast<Constant>(NewOp) : Op);
                 }
 
+                #if JL_LLVM_VERSION >= 170000
+                if (CE->getOpcode() != Instruction::GetElementPtr)
+                    DstV = CE->getWithOperands(Ops, Ty);
+                #else
                 if (CE->getOpcode() == Instruction::GetElementPtr) {
                     // GEP const exprs need to know the type of the source.
                     // asserts remapType(typeof arg0) == typeof mapValue(arg0).
@@ -166,6 +174,7 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                 }
                 else
                     DstV = CE->getWithOperands(Ops, Ty);
+                #endif
             }
         }
 
@@ -208,7 +217,12 @@ bool RemoveNoopAddrSpaceCasts(Function *F)
                 LLVM_DEBUG(
                         dbgs() << "Removing noop address space cast:\n"
                                << I << "\n");
-                ASC->replaceAllUsesWith(ASC->getOperand(0));
+                if (ASC->getType() == ASC->getOperand(0)->getType()) {
+                    ASC->replaceAllUsesWith(ASC->getOperand(0));
+                } else {
+                    // uncanonicalized addrspacecast; just use the value
+                    ASC->replaceAllUsesWith(ASC->getOperand(0));
+                }
                 NoopCasts.push_back(ASC);
             }
         }
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index f29802b438e1e..07afa8c930deb 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -177,7 +177,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution
         const MDString *S = dyn_cast<MDString>(Op);
         if (S) {
             LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
-            if (S->getString().startswith("julia")) {
+            if (S->getString().starts_with("julia")) {
                 if (S->getString().equals("julia.simdloop"))
                     simd = true;
                 if (S->getString().equals("julia.ivdep"))
diff --git a/src/llvm-version.h b/src/llvm-version.h
index f7da953a99562..2a38bb7c488b8 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -15,7 +15,7 @@
 #endif
 
 #if JL_LLVM_VERSION >= 160000
-#define JL_LLVM_OPAQUE_POINTERS 1
+    #define JL_LLVM_OPAQUE_POINTERS 1
 #endif
 
 #ifdef __cplusplus
diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp
index d56fb3a0497fa..e98c375b711b3 100644
--- a/src/llvm_api.cpp
+++ b/src/llvm_api.cpp
@@ -21,6 +21,7 @@
 #include <llvm/Support/CBindingWrapping.h>
 #include <llvm/Support/MemoryBuffer.h>
 
+#if JL_LLVM_VERSION < 180000
 namespace llvm {
 namespace orc {
 class OrcV2CAPIHelper {
@@ -38,7 +39,7 @@ class OrcV2CAPIHelper {
 };
 } // namespace orc
 } // namespace llvm
-
+#endif
 
 typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef;
 typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
@@ -46,8 +47,13 @@ typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+#if JL_LLVM_VERSION >= 180000
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::SymbolStringPoolEntryUnsafe::PoolEntry,
+                                   LLVMOrcSymbolStringPoolEntryRef)
+#else
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry,
                                    LLVMOrcSymbolStringPoolEntryRef)
+#endif
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility,
                                    LLVMOrcMaterializationResponsibilityRef)
@@ -113,7 +119,11 @@ JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef
 JLJITMangleAndIntern_impl(JuliaOJITRef JIT,
                                             const char *Name)
 {
+#if JL_LLVM_VERSION >= 180000
+    return wrap(orc::SymbolStringPoolEntryUnsafe::take(unwrap(JIT)->mangle(Name)).rawPtr());
+#else
     return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name)));
+#endif
 }
 
 JL_DLLEXPORT_CODEGEN const char *
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index 424e921a35713..f67145317dc7a 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -126,6 +126,16 @@
                                  (else '())))
                                (else '()))))))
 
+   ;; for/generator
+   (pattern-lambda (for assgn body)
+                   (if (eq? (car assgn) 'block)
+                    `(varlist ,@(map cadr (cdr assgn)))
+                     (cons 'varlist (cadr assgn))))
+   (pattern-lambda (generator body (filter filt . assgn))
+                    (cons 'varlist (map (lambda (x) (cadr x)) assgn)))
+   (pattern-lambda (generator body . assgn)
+                    (cons 'varlist (map (lambda (x) (cadr x)) assgn)))
+
    ;; macro definition
    (pattern-lambda (macro (call name . argl) body)
                    `(-> (tuple ,@argl) ,body))
@@ -184,18 +194,18 @@
       (unescape (cadr e))
       e))
 
-(define (unescape-global-lhs e env m parent-scope inarg)
+(define (unescape-global-lhs e env m lno parent-scope inarg)
   (cond ((not (pair? e)) e)
-        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m parent-scope inarg))
+        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m lno parent-scope inarg))
         ((memq (car e) '(parameters tuple))
          (list* (car e) (map (lambda (e)
-                          (unescape-global-lhs e env m parent-scope inarg))
+                          (unescape-global-lhs e env m lno parent-scope inarg))
                         (cdr e))))
         ((and (memq (car e) '(|::| kw)) (length= e 3))
-         (list (car e) (unescape-global-lhs (cadr e) env m parent-scope inarg)
-                       (resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+         (list (car e) (unescape-global-lhs (cadr e) env m lno parent-scope inarg)
+                       (resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))
         (else
-         (resolve-expansion-vars-with-new-env e env m parent-scope inarg))))
+         (resolve-expansion-vars-with-new-env e env m lno parent-scope inarg))))
 
 (define (typedef-expr-name e)
   (cond ((atom? e) e)
@@ -219,30 +229,26 @@
               lst)))
 
 ;; get the name from a function formal argument expression, allowing `(escape x)`
-(define (try-arg-name v)
-  (cond ((symbol? v) (list v))
+(define (try-arg-name v (escaped #f))
+  (cond ((symbol? v) (if escaped '() (list v)))
         ((atom? v) '())
         (else
          (case (car v)
-           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v))))
-           ((... kw =) (try-arg-name (cadr v)))
-           ((escape) (list v))
-           ((hygienic-scope) (try-arg-name (cadr v)))
+           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v) escaped)))
+           ((... kw =) (try-arg-name (cadr v) escaped))
+           ((escape) (if escaped (list (cadr v)) '()))
+           ((hygienic-scope) (try-arg-name (cadr v) escaped))
+           ((tuple) (apply nconc (map (lambda (e) (try-arg-name e escaped)) (cdr v))))
            ((meta)  ;; allow certain per-argument annotations
             (if (nospecialize-meta? v #t)
-                (try-arg-name (caddr v))
+                (try-arg-name (caddr v) escaped)
                 '()))
            (else '())))))
 
 ;; get names from a formal argument list, specifying whether to include escaped ones
 (define (safe-arg-names lst (escaped #f))
   (apply nconc
-         (map (lambda (v)
-                (let ((vv (try-arg-name v)))
-                  (if (eq? escaped (and (pair? vv) (pair? (car vv)) (eq? (caar vv) 'escape)))
-                      (if escaped (list (cadar vv)) vv)
-                      '())))
-              lst)))
+         (map (lambda (v) (try-arg-name v escaped)) lst)))
 
 ;; arg names, looking only at positional args
 (define (safe-llist-positional-args lst (escaped #f))
@@ -280,18 +286,18 @@
 ;; resolve-expansion-vars-with-new-env, but turn on `inarg` if we get inside
 ;; a formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`,
 ;; and we want `inarg` to be true for the `(x)` part.
-(define (resolve-in-lhs e env m parent-scope inarg)
-  (define (recur x) (resolve-in-lhs x env m parent-scope inarg))
-  (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+(define (resolve-in-lhs e env m lno parent-scope inarg)
+  (define (recur x) (resolve-in-lhs x env m lno parent-scope inarg))
+  (define (other x) (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
   (case (and (pair? e) (car e))
     ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e))))
     ((|::|)  `(|::| ,(recur (cadr e)) ,(other (caddr e))))
     ((call)  `(call ,(other (cadr e))
                     ,@(map (lambda (x)
-                             (resolve-expansion-vars-with-new-env x env m parent-scope #t))
+                             (resolve-expansion-vars-with-new-env x env m lno parent-scope #t))
                            (cddr e))))
     ((tuple) `(tuple ,@(map (lambda (x)
-                              (resolve-expansion-vars-with-new-env x env m parent-scope #t))
+                              (resolve-expansion-vars-with-new-env x env m lno parent-scope #t))
                             (cdr e))))
     (else (other e))))
 
@@ -328,7 +334,7 @@
                    (keywords-introduced-by x))
               env)))))))
 
-(define (resolve-expansion-vars-with-new-env x env m parent-scope inarg (outermost #f))
+(define (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg (outermost #f))
   (resolve-expansion-vars-
    x
    (if (and (pair? x) (eq? (car x) 'let))
@@ -336,7 +342,7 @@
        ;; the same expression
        env
        (new-expansion-env-for x env outermost))
-   m parent-scope inarg))
+   m lno parent-scope inarg))
 
 (define (reescape ux x)
   (if (and (pair? x) (eq? (car x) 'escape))
@@ -345,19 +351,41 @@
 
 ;; type has special behavior: identifiers inside are
 ;; field names, not expressions.
-(define (resolve-struct-field-expansion x env m parent-scope inarg)
+(define (resolve-struct-field-expansion x env m lno parent-scope inarg)
   (let ((ux (unescape x)))
     (cond
         ((atom? ux) ux)
         ((and (pair? ux) (eq? (car ux) '|::|))
          `(|::| ,(unescape (cadr ux))
-           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m parent-scope inarg)))
+           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m lno parent-scope inarg)))
         ((and (pair? ux) (memq (car ux) '(const atomic)))
-         `(,(car ux) ,(resolve-struct-field-expansion (reescape (cadr ux) x) env m parent-scope inarg)))
+         `(,(car ux) ,(resolve-struct-field-expansion (reescape (cadr ux) x) env m lno parent-scope inarg)))
         (else
-         (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))))
-
-(define (resolve-expansion-vars- e env m parent-scope inarg)
+         (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg)))))
+
+(define (resolve-letlike-assign bind env newenv m lno parent-scope inarg)
+  (if (assignment? bind)
+    (make-assignment
+      ;; expand binds in newenv with dummy RHS
+      (cadr (resolve-expansion-vars- (make-assignment (cadr bind) 0)
+                                    newenv m lno parent-scope inarg))
+      ;; expand initial values in old env
+      (resolve-expansion-vars- (caddr bind) env m lno parent-scope inarg))
+    ;; Just expand everything else that's not an assignment. N.B.: This includes
+    ;; assignments inside escapes, which probably need special handling (TODO).
+    (resolve-expansion-vars- bind newenv m lno parent-scope inarg)))
+
+(define (for-ranges-list ranges)
+  (if (eq? (car ranges) 'escape)
+    (map (lambda (range) `(escape ,range)) (for-ranges-list (cadr ranges)))
+    (if (eq? (car ranges) 'block)
+      (cdr ranges)
+      (list ranges))))
+
+(define (just-line? ex)
+  (and (pair? ex) (eq? (car ex) 'line) (atom? (cadr ex)) (or (atom? (caddr ex)) (nothing? (caddr ex)))))
+
+(define (resolve-expansion-vars- e env m lno parent-scope inarg)
   (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e))
          e)
         ((symbol? e)
@@ -376,23 +404,35 @@
                      (env (car scope))
                      (m (cadr scope))
                      (parent-scope (cdr parent-scope)))
-                (resolve-expansion-vars-with-new-env (cadr e) env m parent-scope inarg))))
+                (resolve-expansion-vars-with-new-env (cadr e) env m lno parent-scope inarg))))
            ((global)
             `(global
                ,@(map (lambda (arg)
                        (if (assignment? arg)
-                           `(= ,(unescape-global-lhs (cadr arg) env m parent-scope inarg)
-                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))
-                           (unescape-global-lhs arg env m parent-scope inarg)))
+                           `(= ,(unescape-global-lhs (cadr arg) env m lno parent-scope inarg)
+                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m lno parent-scope inarg))
+                           (unescape-global-lhs arg env m lno parent-scope inarg)))
                       (cdr e))))
+           ((toplevel) ; re-wrap Expr(:toplevel) in the current hygienic-scope(s)
+            `(toplevel
+               ,@(map (lambda (arg)
+                       ;; Minor optimization: A lot of toplevel exprs have just bare line numbers in them.
+                       ;; don't bother with the full rewrapping in that case (even though
+                       ;; this would be semantically legal) - lowering won't touch them anyways.
+                       (if (just-line? arg) arg
+                        (let loop ((parent-scope parent-scope) (m m) (lno lno) (arg arg))
+                          (let ((wrapped `(hygienic-scope ,arg ,m ,@lno)))
+                            (if (null? parent-scope) wrapped
+                              (loop (cdr parent-scope) (cadar parent-scope) (caddar parent-scope) wrapped))))))
+                        (cdr e))))
            ((using import export meta line inbounds boundscheck loopinfo inline noinline purity) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
            ((struct)
-            `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m parent-scope inarg)
+            `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m lno parent-scope inarg)
                      ,(map (lambda (x)
-                            (resolve-struct-field-expansion x env m parent-scope inarg))
+                            (resolve-struct-field-expansion x env m lno parent-scope inarg))
                            (cadddr e))))
 
            ((parameters)
@@ -403,17 +443,17 @@
                                 (x (if (and (not inarg) (symbol? ux))
                                        `(kw ,ux ,x)
                                        x)))
-                           (resolve-expansion-vars- x env m parent-scope #f)))
+                           (resolve-expansion-vars- x env m lno parent-scope #f)))
                        (cdr e))))
 
            ((->)
-            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg)
-                 ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m lno parent-scope inarg)
+                 ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))
 
            ((= function)
-             `(,(car e) ,(resolve-in-lhs (cadr e) env m parent-scope inarg)
+             `(,(car e) ,(resolve-in-lhs (cadr e) env m lno parent-scope inarg)
                         ,@(map (lambda (x)
-                                   (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                                   (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
                                  (cddr e))))
 
            ((kw)
@@ -427,55 +467,67 @@
                 `(kw (|::|
                       ,@(if argname
                             (list (if inarg
-                                      (resolve-expansion-vars- argname env m parent-scope inarg)
+                                      (resolve-expansion-vars- argname env m lno parent-scope inarg)
                                       ;; in keyword arg A=B, don't transform "A"
                                       (unescape argname)))
                             '())
-                      ,(resolve-expansion-vars- type env m parent-scope inarg))
-                     ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))))
+                      ,(resolve-expansion-vars- type env m lno parent-scope inarg))
+                     ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg))))
              (else
               `(kw ,(if inarg
-                        (resolve-expansion-vars- (cadr e) env m parent-scope inarg)
+                        (resolve-expansion-vars- (cadr e) env m lno parent-scope inarg)
                         (unescape (cadr e)))
-                   ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))))
+                   ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))))
 
            ((let)
             (let* ((newenv (new-expansion-env-for e env))
-                   (body   (resolve-expansion-vars- (caddr e) newenv m parent-scope inarg))
+                   (body   (resolve-expansion-vars- (caddr e) newenv m lno parent-scope inarg))
                    (binds  (let-binds e)))
               `(let (block
                      ,@(map
                         (lambda (bind)
-                          (if (assignment? bind)
-                              (make-assignment
-                               ;; expand binds in old env with dummy RHS
-                               (cadr (resolve-expansion-vars- (make-assignment (cadr bind) 0)
-                                                              newenv m parent-scope inarg))
-                               ;; expand initial values in old env
-                               (resolve-expansion-vars- (caddr bind) env m parent-scope inarg))
-                              (resolve-expansion-vars- bind newenv m parent-scope inarg)))
+                          (resolve-letlike-assign bind env newenv m lno parent-scope inarg))
                         binds))
                  ,body)))
+           ((for)
+            (let* ((newenv (new-expansion-env-for e env))
+                   (body   (resolve-expansion-vars- (caddr e) newenv m lno parent-scope inarg))
+                   (expanded-ranges (map (lambda (range)
+                      (resolve-letlike-assign range env newenv m lno parent-scope inarg)) (for-ranges-list (cadr e)))))
+              (if (length= expanded-ranges 1)
+                `(for ,@expanded-ranges ,body))
+                `(for (block ,@expanded-ranges) ,body)))
+           ((generator)
+            (let* ((newenv (new-expansion-env-for e env))
+                   (body   (resolve-expansion-vars- (cadr e) newenv m lno parent-scope inarg))
+                   (filt? (eq? (car (caddr e)) 'filter))
+                   (range-exprs (if filt? (cddr (caddr e)) (cddr e)))
+                   (filt (if filt? (resolve-expansion-vars- (cadr (caddr e)) newenv m lno parent-scope inarg)))
+                   (expanded-ranges (map (lambda (range)
+                      (resolve-letlike-assign range env newenv m lno parent-scope inarg)) range-exprs)))
+              (if filt?
+                `(generator ,body (filter ,filt ,@expanded-ranges))
+                `(generator ,body ,@expanded-ranges))))
            ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly
-             (let ((parent-scope (cons (list env m) parent-scope))
+             (let ((parent-scope (cons (list env m lno) parent-scope))
                    (body (cadr e))
                    (m (caddr e))
                    (lno  (cdddr e)))
-              (resolve-expansion-vars-with-new-env body env m parent-scope inarg #t)))
+              (resolve-expansion-vars-with-new-env body env m lno parent-scope inarg #t)))
            ((tuple)
             (cons (car e)
                   (map (lambda (x)
                          (if (assignment? x)
                              `(= ,(unescape (cadr x))
-                                 ,(resolve-expansion-vars-with-new-env (caddr x) env m parent-scope inarg))
-                             (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))
+                                 ,(resolve-expansion-vars-with-new-env (caddr x) env m lno parent-scope inarg))
+                             (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg)))
                        (cdr e))))
 
            ;; todo: trycatch
            (else
             (cons (car e)
                   (map (lambda (x)
-                         (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                         (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
                        (cdr e))))))))
 
 ;; decl-var that also identifies f in f()=...
@@ -576,11 +628,11 @@
         (cdr v)
         '())))
 
-(define (resolve-expansion-vars e m)
+(define (resolve-expansion-vars e m lno)
   ;; expand binding form patterns
   ;; keep track of environment, rename locals to gensyms
   ;; and wrap globals in (globalref module var) for macro's home module
-  (resolve-expansion-vars-with-new-env e '() m '() #f #t))
+  (resolve-expansion-vars-with-new-env e '() m lno '() #f #t))
 
 (define (julia-expand-quotes e)
   (cond ((not (pair? e)) e)
@@ -596,11 +648,12 @@
   (cond ((not (pair? e)) e)
         ((eq? (car e) 'inert) e)
         ((eq? (car e) 'module) e)
+        ((eq? (car e) 'toplevel) e)
         ((eq? (car e) 'hygienic-scope)
          (let ((form (cadr e)) ;; form is the expression returned from expand-macros
                (modu (caddr e)) ;; m is the macro's def module
                (lno  (cdddr e))) ;; lno is (optionally) the line number node
-           (resolve-expansion-vars form modu)))
+           (resolve-expansion-vars form modu lno)))
         (else
          (map julia-expand-macroscopes- e))))
 
diff --git a/src/method.c b/src/method.c
index ab9487bbecad4..d4457b1549353 100644
--- a/src/method.c
+++ b/src/method.c
@@ -121,11 +121,13 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 } else if (!jl_is_long(oc_nargs)) {
                     jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
                 }
-                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(oc_nargs), functionloc, ci, isva);
+                jl_method_t *m = jl_make_opaque_closure_method(module, name,
+                    jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva, /*isinferred*/0);
                 return (jl_value_t*)m;
             }
             if (e->head == jl_cfunction_sym) {
                 JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
+                jl_task_t *ct = jl_current_task;
                 jl_value_t *typ = jl_exprarg(e, 0);
                 if (!jl_is_type(typ))
                     jl_error("first parameter to :cfunction must be a type");
@@ -142,7 +144,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
                             jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -154,7 +156,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
                             jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -168,6 +170,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
             }
             if (e->head == jl_foreigncall_sym) {
                 JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, nreq, (cc, effects))
+                jl_task_t *ct = jl_current_task;
                 jl_value_t *rt = jl_exprarg(e, 1);
                 jl_value_t *at = jl_exprarg(e, 2);
                 if (!jl_is_type(rt)) {
@@ -175,7 +178,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
                             jl_error("could not evaluate ccall return type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -187,7 +190,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
                             jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -209,7 +212,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 jl_exprargset(e, 0, resolve_globals(jl_exprarg(e, 0), module, sparam_vals, binding_effects, 1));
                 i++;
             }
-            if (e->head == jl_method_sym || e->head == jl_module_sym) {
+            if (e->head == jl_method_sym || e->head == jl_module_sym || e->head == jl_throw_undef_if_not_sym) {
                 i++;
             }
             for (; i < nargs; i++) {
@@ -234,11 +237,9 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (fe_mod->istopmod && !strcmp(jl_symbol_name(fe_sym), "getproperty") && jl_is_symbol(s)) {
                     if (eager_resolve || jl_binding_resolved_p(me_mod, me_sym)) {
                         jl_binding_t *b = jl_get_binding(me_mod, me_sym);
-                        if (b && b->constp) {
-                            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-                            if (v && jl_is_module(v))
-                                return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
-                        }
+                        jl_value_t *v = jl_get_binding_value_if_const(b);
+                        if (v && jl_is_module(v))
+                            return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
                     }
                 }
             }
@@ -251,7 +252,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (jl_binding_resolved_p(fe_mod, fe_sym)) {
                     // look at some known called functions
                     jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
-                    if (b && b->constp && jl_atomic_load_relaxed(&b->value) == jl_builtin_tuple) {
+                    if (jl_get_binding_value_if_const(b) == jl_builtin_tuple) {
                         size_t j;
                         for (j = 1; j < nargs; j++) {
                             if (!jl_is_quotenode(jl_exprarg(e, j)))
@@ -263,6 +264,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                                 val = jl_interpret_toplevel_expr_in(module, (jl_value_t*)e, NULL, sparam_vals);
                             }
                             JL_CATCH {
+                                val = NULL; // To make the analyzer happy see #define JL_TRY
                             }
                             if (val)
                                 return val;
@@ -290,20 +292,144 @@ jl_value_t *expr_arg1(jl_value_t *expr) {
     return jl_array_ptr_ref(args, 0);
 }
 
+static jl_value_t *alloc_edges(arraylist_t *edges_list)
+{
+    jl_value_t *jledges = (jl_value_t*)jl_alloc_svec(edges_list->len);
+    jl_value_t *jledges2 = NULL;
+    jl_value_t *codelocs = NULL;
+    JL_GC_PUSH3(&jledges, &jledges2, &codelocs);
+    size_t i;
+    for (i = 0; i < edges_list->len; i++) {
+        arraylist_t *edge = (arraylist_t*)edges_list->items[i];
+        jl_value_t *file = (jl_value_t*)edge->items[0];
+        int32_t line = 0; // not preserved by lowering (and probably lost even before that)
+        arraylist_t *edges_list2 = (arraylist_t*)edge->items[1];
+        size_t j, nlocs = (edge->len - 2) / 3;
+        codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs * 3);
+        for (j = 0; j < nlocs; j++) {
+            jl_array_data(codelocs,int32_t)[3 * j + 0] = (intptr_t)edge->items[3 * j + 0 + 2];
+            jl_array_data(codelocs,int32_t)[3 * j + 1] = (intptr_t)edge->items[3 * j + 1 + 2];
+            jl_array_data(codelocs,int32_t)[3 * j + 2] = (intptr_t)edge->items[3 * j + 2 + 2];
+        }
+        codelocs = (jl_value_t*)jl_compress_codelocs(line, codelocs, nlocs);
+        jledges2 = alloc_edges(edges_list2);
+        jl_value_t *debuginfo = jl_new_struct(jl_debuginfo_type, file, jl_nothing, jledges2, codelocs);
+        jledges2 = NULL;
+        jl_svecset(jledges, i, debuginfo);
+        free(edges_list2);
+        free(edge);
+    }
+    JL_GC_POP();
+    return jledges;
+}
+
+static void add_edge(arraylist_t *edges_list, arraylist_t *inlinestack, int32_t *p_to, int32_t *p_pc)
+{
+    jl_value_t *locinfo = (jl_value_t*)arraylist_pop(inlinestack);
+    jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 0);
+    int32_t line = jl_unbox_int32(jl_fieldref(locinfo, 1));
+    size_t i;
+    arraylist_t *edge = NULL;
+    for (i = 0; i < edges_list->len; i++) {
+        edge = (arraylist_t*)edges_list->items[i];
+        if (edge->items[0] == filesym)
+            break;
+    }
+    if (i == edges_list->len) {
+        edge = (arraylist_t*)malloc(sizeof(arraylist_t));
+        arraylist_t *edge_list2 = (arraylist_t*)malloc(sizeof(arraylist_t));
+        arraylist_new(edge, 0);
+        arraylist_new(edge_list2, 0);
+        arraylist_push(edge, (void*)filesym);
+        arraylist_push(edge, (void*)edge_list2);
+        arraylist_push(edges_list, (void*)edge);
+    }
+    *p_to = i + 1;
+    int32_t to = 0, pc = 0;
+    if (inlinestack->len) {
+        arraylist_t *edge_list2 = (arraylist_t*)edge->items[1];
+        add_edge(edge_list2, inlinestack, &to, &pc);
+    }
+    for (i = 2; i < edge->len; i += 3) {
+        if ((intptr_t)edge->items[i + 0] == line &&
+            (intptr_t)edge->items[i + 1] == to &&
+            (intptr_t)edge->items[i + 2] == pc) {
+            break;
+        }
+    }
+    if (i == edge->len) {
+        arraylist_push(edge, (void*)(intptr_t)line);
+        arraylist_push(edge, (void*)(intptr_t)to);
+        arraylist_push(edge, (void*)(intptr_t)pc);
+    }
+    *p_pc = (i - 2) / 3 + 1;
+}
+
+jl_debuginfo_t *jl_linetable_to_debuginfo(jl_array_t *codelocs_any, jl_array_t *linetable)
+{
+    size_t nlocs = jl_array_nrows(codelocs_any);
+    jl_value_t *toplocinfo = jl_array_ptr_ref(linetable, 0);
+    jl_sym_t *topfile = (jl_sym_t*)jl_fieldref_noalloc(toplocinfo, 0);
+    int32_t topline = jl_unbox_int32(jl_fieldref(toplocinfo, 1));
+    arraylist_t inlinestack;
+    arraylist_new(&inlinestack, 0);
+    arraylist_t edges_list;
+    arraylist_new(&edges_list, 0);
+    jl_value_t *jledges = NULL;
+    jl_value_t *codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs * 3);
+    jl_debuginfo_t *debuginfo = NULL;
+    JL_GC_PUSH3(&jledges, &codelocs, &debuginfo);
+    int32_t *codelocs32 = jl_array_data(codelocs,int32_t);
+    size_t j;
+    for (j = 0; j < nlocs; j++) {
+        size_t lineidx = jl_unbox_long(jl_array_ptr_ref((jl_array_t*)codelocs_any, j)); // 1 indexed!
+        while (lineidx != 0) {
+            jl_value_t *locinfo = jl_array_ptr_ref(linetable, lineidx - 1);
+            lineidx = jl_unbox_int32(jl_fieldref(locinfo, 2));
+            arraylist_push(&inlinestack, locinfo);
+        }
+        int32_t line = 0, to = 0, pc = 0;
+        if (inlinestack.len) {
+            jl_value_t *locinfo = (jl_value_t*)arraylist_pop(&inlinestack);
+            jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 0);
+            if (filesym == topfile)
+                line = jl_unbox_int32(jl_fieldref(locinfo, 1));
+            else
+                arraylist_push(&inlinestack, locinfo);
+            if (inlinestack.len) {
+                add_edge(&edges_list, &inlinestack, &to, &pc);
+            }
+        }
+        codelocs32[j * 3 + 0] = line;
+        codelocs32[j * 3 + 1] = to;
+        codelocs32[j * 3 + 2] = pc;
+    }
+    codelocs = (jl_value_t*)jl_compress_codelocs(topline, codelocs, nlocs);
+    jledges = alloc_edges(&edges_list);
+    debuginfo = (jl_debuginfo_t*)jl_new_struct(jl_debuginfo_type, topfile, jl_nothing, jledges, codelocs);
+    JL_GC_POP();
+    return debuginfo;
+}
+
 // copy a :lambda Expr into its CodeInfo representation,
 // including popping of known meta nodes
-static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
+jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
 {
+    jl_code_info_t *li = NULL;
+    JL_GC_PUSH1(&li);
+    li = jl_new_code_info_uninit();
+
+    jl_expr_t *arglist = (jl_expr_t*)jl_exprarg(ir, 0);
+    li->nargs = jl_array_len(arglist);
+
     assert(jl_is_expr(ir));
     jl_expr_t *bodyex = (jl_expr_t*)jl_exprarg(ir, 2);
-    jl_value_t *codelocs = jl_exprarg(ir, 3);
-    li->linetable = jl_exprarg(ir, 4);
-    size_t nlocs = jl_array_nrows(codelocs);
-    li->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs);
-    size_t j;
-    for (j = 0; j < nlocs; j++) {
-        jl_array_uint32_set((jl_array_t*)li->codelocs, j, jl_unbox_long(jl_array_ptr_ref((jl_array_t*)codelocs, j)));
-    }
+
+    jl_array_t *codelocs_any = (jl_array_t*)jl_exprarg(ir, 3);
+    jl_array_t *linetable = (jl_array_t*)jl_exprarg(ir, 4);
+    li->debuginfo = jl_linetable_to_debuginfo(codelocs_any, linetable);
+    jl_gc_wb(li, li->debuginfo);
+
     assert(jl_is_expr(bodyex));
     jl_array_t *body = bodyex->args;
     li->code = body;
@@ -318,6 +444,7 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     // last(inline_flags) == 0: callsite noinline region
     arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
     arraylist_t *purity_exprs = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
+    size_t j;
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
         int is_flag_stmt = 0;
@@ -341,15 +468,29 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
                     li->constprop = 2;
                 else if (jl_is_expr(ma) && ((jl_expr_t*)ma)->head == jl_purity_sym) {
                     if (jl_expr_nargs(ma) == NUM_EFFECTS_OVERRIDES) {
-                        li->purity.overrides.ipo_consistent = jl_unbox_bool(jl_exprarg(ma, 0));
-                        li->purity.overrides.ipo_effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
-                        li->purity.overrides.ipo_nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
-                        li->purity.overrides.ipo_terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
-                        li->purity.overrides.ipo_terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
-                        li->purity.overrides.ipo_notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
-                        li->purity.overrides.ipo_inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
-                        li->purity.overrides.ipo_noub = jl_unbox_bool(jl_exprarg(ma, 7));
-                        li->purity.overrides.ipo_noub_if_noinbounds = jl_unbox_bool(jl_exprarg(ma, 8));
+                        // N.B. this code allows multiple :purity expressions to be present in a single `:meta` node
+                        int8_t consistent = jl_unbox_bool(jl_exprarg(ma, 0));
+                        if (consistent) li->purity.overrides.ipo_consistent = consistent;
+                        int8_t effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
+                        if (effect_free) li->purity.overrides.ipo_effect_free = effect_free;
+                        int8_t nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
+                        if (nothrow) li->purity.overrides.ipo_nothrow = nothrow;
+                        int8_t terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
+                        if (terminates_globally) li->purity.overrides.ipo_terminates_globally = terminates_globally;
+                        int8_t terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
+                        if (terminates_locally) li->purity.overrides.ipo_terminates_locally = terminates_locally;
+                        int8_t notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
+                        if (notaskstate) li->purity.overrides.ipo_notaskstate = notaskstate;
+                        int8_t inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
+                        if (inaccessiblememonly) li->purity.overrides.ipo_inaccessiblememonly = inaccessiblememonly;
+                        int8_t noub = jl_unbox_bool(jl_exprarg(ma, 7));
+                        if (noub) li->purity.overrides.ipo_noub = noub;
+                        int8_t noub_if_noinbounds = jl_unbox_bool(jl_exprarg(ma, 8));
+                        if (noub_if_noinbounds) li->purity.overrides.ipo_noub_if_noinbounds = noub_if_noinbounds;
+                        int8_t consistent_overlay = jl_unbox_bool(jl_exprarg(ma, 9));
+                        if (consistent_overlay) li->purity.overrides.ipo_consistent_overlay = consistent_overlay;
+                        int8_t nortcall = jl_unbox_bool(jl_exprarg(ma, 10));
+                        if (nortcall) li->purity.overrides.ipo_nortcall = nortcall;
                     }
                 }
                 else
@@ -472,6 +613,8 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
         jl_array_ptr_set(li->slotnames, i, name);
         jl_array_uint8_set(li->slotflags, i, vinfo_mask & jl_unbox_long(jl_array_ptr_ref(vi, 2)));
     }
+    JL_GC_POP();
+    return li;
 }
 
 JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
@@ -483,10 +626,8 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     mi->def.value = NULL;
     mi->specTypes = NULL;
     mi->sparam_vals = jl_emptysvec;
-    jl_atomic_store_relaxed(&mi->uninferred, NULL);
     mi->backedges = NULL;
     jl_atomic_store_relaxed(&mi->cache, NULL);
-    mi->inInference = 0;
     mi->cache_with_orig = 0;
     jl_atomic_store_relaxed(&mi->precompiled, 0);
     return mi;
@@ -497,16 +638,16 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     jl_task_t *ct = jl_current_task;
     jl_code_info_t *src =
         (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
-                                       jl_code_info_type);
+                                     jl_code_info_type);
     src->code = NULL;
-    src->codelocs = NULL;
+    src->debuginfo = NULL;
     src->ssavaluetypes = NULL;
     src->ssaflags = NULL;
     src->method_for_inference_limit_heuristics = jl_nothing;
-    src->linetable = jl_nothing;
     src->slotflags = NULL;
     src->slotnames = NULL;
     src->slottypes = jl_nothing;
+    src->rettype = (jl_value_t*)jl_any_type;
     src->parent = (jl_method_instance_t*)jl_nothing;
     src->min_world = 1;
     src->max_world = ~(size_t)0;
@@ -517,44 +658,12 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     src->constprop = 0;
     src->inlining = 0;
     src->purity.bits = 0;
+    src->nargs = 0;
+    src->isva = 0;
     src->inlining_cost = UINT16_MAX;
     return src;
 }
 
-jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
-{
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    src = jl_new_code_info_uninit();
-    jl_code_info_set_ir(src, ir);
-    JL_GC_POP();
-    return src;
-}
-
-void jl_add_function_to_lineinfo(jl_code_info_t *ci, jl_value_t *func)
-{
-    // func may contain jl_symbol (function name), jl_method_t, or jl_method_instance_t
-    jl_array_t *li = (jl_array_t*)ci->linetable;
-    size_t i, n = jl_array_nrows(li);
-    jl_value_t *rt = NULL, *lno = NULL, *inl = NULL;
-    JL_GC_PUSH3(&rt, &lno, &inl);
-    for (i = 0; i < n; i++) {
-        jl_value_t *ln = jl_array_ptr_ref(li, i);
-        assert(jl_typetagis(ln, jl_lineinfonode_type));
-        jl_value_t *mod = jl_fieldref_noalloc(ln, 0);
-        jl_value_t *file = jl_fieldref_noalloc(ln, 2);
-        lno = jl_fieldref(ln, 3);
-        inl = jl_fieldref(ln, 4);
-        // respect a given linetable if available
-        jl_value_t *ln_func = jl_fieldref_noalloc(ln, 1);
-        if (jl_is_symbol(ln_func) && (jl_sym_t*)ln_func == jl_symbol("none") && jl_is_int32(inl) && jl_unbox_int32(inl) == 0)
-            ln_func = func;
-        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_func, file, lno, inl);
-        jl_array_ptr_set(li, i, rt);
-    }
-    JL_GC_POP();
-}
-
 // invoke (compiling if necessary) the jlcall function pointer for a method template
 static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
         size_t world, jl_svec_t *sparam_vals, jl_value_t **args, uint32_t nargs)
@@ -592,26 +701,60 @@ JL_DLLEXPORT jl_code_info_t *jl_expand_and_resolve(jl_value_t *ex, jl_module_t *
     return func;
 }
 
+JL_DLLEXPORT jl_code_instance_t *jl_cached_uninferred(jl_code_instance_t *codeinst, size_t world)
+{
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != (void*)jl_uninferred_sym)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
+            return codeinst;
+        }
+    }
+    return NULL;
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, jl_code_instance_t *checked, size_t world, jl_code_instance_t *newci)
+{
+    while (!jl_mi_try_insert(mi, checked, newci)) {
+        jl_code_instance_t *new_checked = jl_atomic_load_relaxed(&mi->cache);
+        // Check if another thread inserted a CodeInstance that covers this world
+        jl_code_instance_t *other = jl_cached_uninferred(new_checked, world);
+        if (other)
+            return other;
+        checked = new_checked;
+    }
+    // Successfully inserted
+    return newci;
+}
+
 // Return a newly allocated CodeInfo for the function signature
 // effectively described by the tuple (specTypes, env, Method) inside linfo
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world)
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *mi, size_t world, jl_code_instance_t **cache)
 {
-    jl_value_t *uninferred = jl_atomic_load_relaxed(&linfo->uninferred);
-    if (uninferred) {
-        assert(jl_is_code_info(uninferred)); // make sure this did not get `nothing` put here
-        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)uninferred);
+    jl_code_instance_t *cache_ci = jl_atomic_load_relaxed(&mi->cache);
+    jl_code_instance_t *uninferred_ci = jl_cached_uninferred(cache_ci, world);
+    if (uninferred_ci) {
+        // The uninferred code is in `inferred`, but that is a bit of a misnomer here.
+        // This is the cached output the generated function (or top-level thunk).
+        // This cache has a non-standard owner (indicated by `->owner === :uninferred`),
+        // so it doesn't get confused for inference results.
+        jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred_ci->inferred);
+        assert(jl_is_code_info(src)); // make sure this did not get `nothing` put here
+        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)src);
     }
 
     JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION);
-    jl_value_t *tt = linfo->specTypes;
-    jl_method_t *def = linfo->def.method;
-    jl_timing_show_method_instance(linfo, JL_TIMING_DEFAULT_BLOCK);
+    jl_value_t *tt = mi->specTypes;
+    jl_method_t *def = mi->def.method;
+    jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK);
     jl_value_t *generator = def->generator;
     assert(generator != NULL);
     assert(jl_is_method(def));
     jl_code_info_t *func = NULL;
     jl_value_t *ex = NULL;
-    JL_GC_PUSH2(&ex, &func);
+    jl_code_info_t *uninferred = NULL;
+    jl_code_instance_t *ci = NULL;
+    JL_GC_PUSH4(&ex, &func, &uninferred, &ci);
     jl_task_t *ct = jl_current_task;
     int last_lineno = jl_lineno;
     int last_in = ct->ptls->in_pure_callback;
@@ -625,17 +768,17 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, siz
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
-        ex = jl_call_staged(def, generator, world, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
+        ex = jl_call_staged(def, generator, world, mi->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
 
         // do some post-processing
         if (jl_is_code_info(ex)) {
             func = (jl_code_info_t*)ex;
             jl_array_t *stmts = (jl_array_t*)func->code;
-            jl_resolve_globals_in_ir(stmts, def->module, linfo->sparam_vals, 1);
+            jl_resolve_globals_in_ir(stmts, def->module, mi->sparam_vals, 1);
         }
         else {
             // Lower the user's expression and resolve references to the type parameters
-            func = jl_expand_and_resolve(ex, def->module, linfo->sparam_vals);
+            func = jl_expand_and_resolve(ex, def->module, mi->sparam_vals);
             if (!jl_is_code_info(func)) {
                 if (jl_is_expr(func) && ((jl_expr_t*)func)->head == jl_error_sym) {
                     ct->ptls->in_pure_callback = 0;
@@ -643,29 +786,67 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, siz
                 }
                 jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
             }
+            // TODO: This should ideally be in the lambda expression,
+            // but currently our isva determination is non-syntactic
+            func->isva = def->isva;
         }
-        jl_add_function_to_lineinfo(func, (jl_value_t*)def->name);
 
         // If this generated function has an opaque closure, cache it for
-        // correctness of method identity
+        // correctness of method identity. In particular, other methods that call
+        // this method may end up referencing it in a PartialOpaque lattice element
+        // type. If the method identity were to change (for the same world age)
+        // in between invocations of this method, that return type inference would
+        // no longer be correct.
+        int needs_cache_for_correctness = 0;
         for (int i = 0; i < jl_array_nrows(func->code); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
             if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
+                if (jl_expr_nargs(stmt) >= 4 && jl_is_bool(jl_exprarg(stmt, 3)) && !jl_unbox_bool(jl_exprarg(stmt, 3))) {
+                    // If this new_opaque_closure is prohibited from sourcing PartialOpaque,
+                    // there is no problem
+                    continue;
+                }
                 if (jl_options.incremental && jl_generating_output())
                     jl_error("Impossible to correctly handle OpaqueClosure inside @generated returned during precompile process.");
-                jl_value_t *uninferred = jl_copy_ast((jl_value_t*)func);
-                jl_value_t *old = NULL;
-                if (jl_atomic_cmpswap(&linfo->uninferred, &old, uninferred)) {
-                    jl_gc_wb(linfo, uninferred);
-                }
-                else {
-                    assert(jl_is_code_info(old));
-                    func = (jl_code_info_t*)old;
-                }
+                needs_cache_for_correctness = 1;
                 break;
             }
         }
 
+        if (func->edges == jl_nothing && func->max_world == ~(size_t)0) {
+            if (func->min_world != 1) {
+                jl_error("Generated function result with `edges == nothing` and `max_world == typemax(UInt)` must have `min_world == 1`");
+            }
+        }
+
+        if (cache || needs_cache_for_correctness) {
+            uninferred = (jl_code_info_t*)jl_copy_ast((jl_value_t*)func);
+            ci = jl_new_codeinst_for_uninferred(mi, uninferred);
+
+            if (uninferred->edges != jl_nothing) {
+                // N.B.: This needs to match `store_backedges` on the julia side
+                jl_array_t *edges = (jl_array_t*)uninferred->edges;
+                for (size_t i = 0; i < jl_array_len(edges); ++i) {
+                    jl_value_t *kind = jl_array_ptr_ref(edges, i);
+                    if (jl_is_method_instance(kind)) {
+                        jl_method_instance_add_backedge((jl_method_instance_t*)kind, jl_nothing, mi);
+                    } else if (jl_is_mtable(kind)) {
+                        jl_method_table_add_backedge((jl_methtable_t*)kind, jl_array_ptr_ref(edges, ++i), (jl_value_t*)mi);
+                    } else {
+                        jl_method_instance_add_backedge((jl_method_instance_t*)jl_array_ptr_ref(edges, ++i), kind, mi);
+                    }
+                }
+            }
+
+            jl_code_instance_t *cached_ci = jl_cache_uninferred(mi, cache_ci, world, ci);
+            if (cached_ci != ci) {
+                func = (jl_code_info_t*)jl_copy_ast(jl_atomic_load_relaxed(&cached_ci->inferred));
+                assert(jl_is_code_info(func));
+            }
+            if (cache)
+                *cache = cached_ci;
+        }
+
         ct->ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
         ct->world_age = last_age;
@@ -723,7 +904,6 @@ JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     m->nospecializeinfer = src->nospecializeinfer;
     m->constprop = src->constprop;
     m->purity.bits = src->purity.bits;
-    jl_add_function_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
     jl_svec_t *sparam_vars = jl_outer_unionall_vars(m->sig);
@@ -807,15 +987,23 @@ JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
         jl_array_ptr_set(copy, i, st);
     }
     src = jl_copy_code_info(src);
+    src->isva = m->isva; // TODO: It would be nice to reverse this
+    assert(m->nargs == src->nargs);
     src->code = copy;
     jl_gc_wb(src, copy);
     m->slot_syms = jl_compress_argnames(src->slotnames);
     jl_gc_wb(m, m->slot_syms);
-    if (gen_only)
+    if (gen_only) {
         m->source = NULL;
-    else
-        m->source = (jl_value_t*)jl_compress_ir(m, src);
-    jl_gc_wb(m, m->source);
+    }
+    else {
+        m->debuginfo = src->debuginfo;
+        jl_gc_wb(m, m->debuginfo);
+        m->source = (jl_value_t*)src;
+        jl_gc_wb(m, m->source);
+        m->source = (jl_value_t*)jl_compress_ir(m, NULL);
+        jl_gc_wb(m, m->source);
+    }
     JL_GC_POP();
 }
 
@@ -835,6 +1023,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->module = module;
     m->external_mt = NULL;
     m->source = NULL;
+    m->debuginfo = NULL;
     jl_atomic_store_relaxed(&m->unspecialized, NULL);
     m->generator = NULL;
     m->name = NULL;
@@ -904,7 +1093,7 @@ void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *ca
 // method definition ----------------------------------------------------------
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_value_t *uninferred_source, int isva)
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred)
 {
     jl_method_t *m = jl_new_method_uninit(module);
     JL_GC_PUSH1(&m);
@@ -923,35 +1112,34 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
     jl_value_t *file = jl_linenode_file(functionloc);
     m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
     m->line = jl_linenode_line(functionloc);
-    if (jl_is_code_info(uninferred_source))
-        jl_method_set_source(m, (jl_code_info_t*)uninferred_source);
+    if (isinferred) {
+        m->slot_syms = jl_compress_argnames(ci->slotnames);
+        jl_gc_wb(m, m->slot_syms);
+    } else {
+        jl_method_set_source(m, ci);
+    }
     JL_GC_POP();
     return m;
 }
 
-// empty generic function def
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd)
+JL_DLLEXPORT void jl_check_gf(jl_value_t *gf, jl_sym_t *name)
 {
-    jl_value_t *gf = NULL;
-
-    assert(name && bp);
-    if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
+    if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
         jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    gf = jl_atomic_load_relaxed(bp);
-    if (gf != NULL) {
-        if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
-            jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    }
-    if (bnd)
-        bnd->constp = 1; // XXX: use jl_declare_constant and jl_checked_assignment
-    if (gf == NULL) {
-        gf = (jl_value_t*)jl_new_generic_function(name, module);
-        jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
-        if (bnd) jl_gc_wb(bnd, gf);
+}
+
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name)
+{
+    jl_value_t *gf = jl_get_binding_value_if_const(b);
+    if (gf) {
+        jl_check_gf(gf, b->globalref->name);
+        return gf;
     }
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!jl_bkind_is_some_guard(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction))))
+        jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
+    gf = (jl_value_t*)jl_new_generic_function(name, mod);
+    jl_declare_constant_val(b, mod, name, gf);
     return gf;
 }
 
diff --git a/src/module.c b/src/module.c
index 7a12552415b11..96d94049cff13 100644
--- a/src/module.c
+++ b/src/module.c
@@ -12,6 +12,23 @@
 extern "C" {
 #endif
 
+// In this translation unit and this translation unit only emit this symbol `extern` for use by julia
+EXTERN_INLINE_DEFINE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT;
+EXTERN_INLINE_DEFINE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT;
+extern inline enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world)
+{
+    if (!gr)
+        return NULL;
+    jl_binding_t *b = NULL;
+    if (gr)
+        b = gr->binding;
+    if (!b)
+        b = jl_get_module_binding(gr->mod, gr->name, 0);
+    return jl_get_binding_partition(b, world);
+}
+
 JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_names)
 {
     jl_task_t *ct = jl_current_task;
@@ -161,37 +178,51 @@ static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_bin
     jl_task_t *ct = jl_current_task;
     jl_globalref_t *g = (jl_globalref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_globalref_t), jl_globalref_type);
     g->mod = mod;
-    jl_gc_wb(g, g->mod);
+    jl_gc_wb_fresh(g, g->mod);
     g->name = name;
+    jl_gc_wb_fresh(g, g->name);
     g->binding = b;
+    jl_gc_wb_fresh(g, g->binding);
     return g;
 }
 
+static jl_binding_partition_t *new_binding_partition(void)
+{
+    jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type);
+    jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD));
+    bpart->min_world = 0;
+    jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1);
+    jl_atomic_store_relaxed(&bpart->next, NULL);
+#ifdef _P64
+    bpart->reserved = 0;
+#endif
+    return bpart;
+}
+
 static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name)
 {
     jl_task_t *ct = jl_current_task;
     assert(jl_is_module(mod) && jl_is_symbol(name));
     jl_binding_t *b = (jl_binding_t*)jl_gc_alloc(ct->ptls, sizeof(jl_binding_t), jl_binding_type);
     jl_atomic_store_relaxed(&b->value, NULL);
-    jl_atomic_store_relaxed(&b->owner, NULL);
-    jl_atomic_store_relaxed(&b->ty, NULL);
+    jl_atomic_store_relaxed(&b->partitions, NULL);
     b->globalref = NULL;
-    b->constp = 0;
     b->exportp = 0;
     b->publicp = 0;
-    b->imported = 0;
     b->deprecated = 0;
-    b->usingfailed = 0;
-    b->padding = 0;
     JL_GC_PUSH1(&b);
     b->globalref = jl_new_globalref(mod, name, b);
+    jl_gc_wb(b, b->globalref);
+    jl_binding_partition_t *bpart = new_binding_partition();
+    jl_atomic_store_relaxed(&b->partitions, bpart);
+    jl_gc_wb(b, bpart);
     JL_GC_POP();
     return b;
 }
 
 extern jl_mutex_t jl_modules_mutex;
 
-static void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
 {
     if (jl_current_task->ptls->in_pure_callback)
         jl_errorf("new globals cannot be created in a generated function");
@@ -219,14 +250,24 @@ static void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
 static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var) JL_GLOBALLY_ROOTED;
 
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b) {
-        if (b2 == NULL)
-            check_safe_newbinding(m, var);
-        if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+retry:
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_DECLARED) {
+                check_safe_newbinding(m, var);
+                if (!alloc)
+                    jl_errorf("Global %s.%s does not exist and cannot be assigned. Declare it using `global` before attempting assignment.", jl_symbol_name(m->name), jl_symbol_name(var));
+            }
+            jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)jl_any_type, BINDING_KIND_GLOBAL);
+            if (!jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+                goto retry;
+            jl_gc_wb_knownold(bpart, jl_any_type);
+        } else {
             jl_module_t *from = jl_binding_dbgmodule(b, m, var);
             if (from == m)
                 jl_errorf("cannot assign a value to imported variable %s.%s",
@@ -248,43 +289,88 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var
     return b->globalref->mod; // TODO: deprecate this?
 }
 
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load_relaxed(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_seqcst(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (!jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
+    return decode_restriction_value(pku);
+}
+
+typedef struct _modstack_t {
+    jl_module_t *m;
+    jl_sym_t *var;
+    struct _modstack_t *prev;
+} modstack_t;
+static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
+
+JL_DLLEXPORT jl_value_t *jl_reresolve_binding_value_seqcst(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)))) {
+        jl_resolve_owner(b, b->globalref->mod, b->globalref->name, NULL);
+    }
+    return jl_get_binding_value_seqcst(b);
+}
+
 // get binding for adding a method
 // like jl_get_binding_wr, but has different error paths and messages
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b) {
-        if (b2 == NULL)
-            check_safe_newbinding(m, var);
-        if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
-            jl_value_t *f = jl_atomic_load_relaxed(&b2->value);
-            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
-            if (f == NULL) {
-                // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
-                jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
-                          jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
-            }
-            // TODO: we might want to require explicitly importing types to add constructors
-            //       or we might want to drop this error entirely
-            if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
-                jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
-                          jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_DECLARED) {
+                check_safe_newbinding(m, var);
             }
-            return b2;
+            return b;
+        }
+        jl_value_t *f = jl_get_binding_value_if_const(b);
+        if (f == NULL) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+            // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
+            jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
+                        jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
+        }
+        // TODO: we might want to require explicitly importing types to add constructors
+        //       or we might want to drop this error entirely
+        if (decode_restriction_kind(pku) != BINDING_KIND_IMPORTED && !(f && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+            jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
+                        jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
         }
+        return b;
     }
     return b;
 }
 
-typedef struct _modstack_t {
-    jl_module_t *m;
-    jl_sym_t *var;
-    struct _modstack_t *prev;
-} modstack_t;
-
-static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
-
 static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 
 #ifndef __clang_gcanalyzer__
@@ -295,23 +381,28 @@ static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROO
 }
 #endif
 
-static int eq_bindings(jl_binding_t *owner, jl_binding_t *alias)
+static int eq_bindings(jl_binding_partition_t *owner, jl_binding_t *alias, size_t world)
 {
-    assert(owner == jl_atomic_load_relaxed(&owner->owner));
-    if (owner == alias)
-        return 1;
-    alias = jl_atomic_load_relaxed(&alias->owner);
-    if (owner == alias)
+    jl_ptr_kind_union_t owner_pku = jl_atomic_load_relaxed(&owner->restriction);
+    assert(decode_restriction_kind(owner_pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(owner_pku) == BINDING_KIND_DECLARED ||
+           jl_bkind_is_some_constant(decode_restriction_kind(owner_pku)));
+    jl_binding_partition_t *alias_bpart = jl_get_binding_partition(alias, world);
+    if (owner == alias_bpart)
         return 1;
-    if (owner->constp && alias->constp && jl_atomic_load_relaxed(&owner->value) && jl_atomic_load_relaxed(&alias->value) == jl_atomic_load_relaxed(&owner->value))
+    jl_ptr_kind_union_t alias_pku = jl_walk_binding_inplace(&alias, &alias_bpart, world);
+    if (jl_bkind_is_some_constant(decode_restriction_kind(owner_pku)) &&
+        jl_bkind_is_some_constant(decode_restriction_kind(alias_pku)) &&
+        decode_restriction_value(owner_pku) &&
+        decode_restriction_value(alias_pku) == decode_restriction_value(owner_pku))
         return 1;
-    return 0;
+    return owner == alias_bpart;
 }
 
 // find a binding from a module's `usings` list
 static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, jl_module_t **from, modstack_t *st, int warn)
 {
     jl_binding_t *b = NULL;
+    jl_binding_partition_t *bpart = NULL;
     jl_module_t *owner = NULL;
     JL_LOCK(&m->lock);
     int i = (int)m->usings.len - 1;
@@ -326,13 +417,17 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
             if (tempb == NULL)
                 // couldn't resolve; try next using (see issue #6105)
                 continue;
-            assert(jl_atomic_load_relaxed(&tempb->owner) == tempb);
-            if (b != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempb, b)) {
+            jl_binding_partition_t *tempbpart = jl_get_binding_partition(tempb, jl_current_task->world_age);
+            jl_ptr_kind_union_t tempb_pku = jl_atomic_load_relaxed(&tempbpart->restriction);
+            assert(decode_restriction_kind(tempb_pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(tempb_pku) == BINDING_KIND_DECLARED || jl_bkind_is_some_constant(decode_restriction_kind(tempb_pku)));
+            (void)tempb_pku;
+            if (bpart != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempbpart, b, jl_current_task->world_age)) {
                 if (warn) {
                     // set usingfailed=1 to avoid repeating this warning
                     // the owner will still be NULL, so it can be later imported or defined
                     tempb = jl_get_module_binding(m, var, 1);
-                    tempb->usingfailed = 1;
+                    tempbpart = jl_get_binding_partition(tempb, jl_current_task->world_age);
+                    jl_atomic_store_release(&tempbpart->restriction, encode_restriction(NULL, BINDING_KIND_FAILED));
                     jl_printf(JL_STDERR,
                               "WARNING: both %s and %s export \"%s\"; uses of it in module %s must be qualified\n",
                               jl_symbol_name(owner->name),
@@ -344,6 +439,7 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
             if (owner == NULL || !tempb->deprecated) {
                 owner = imp;
                 b = tempb;
+                bpart = tempbpart;
             }
         }
     }
@@ -355,13 +451,14 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
 // this might not be the same as the owner of the binding, since the binding itself may itself have been imported from elsewhere
 static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b && !b->imported) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) != BINDING_KIND_GLOBAL) {
         // for implicitly imported globals, try to re-resolve it to find the module we got it from most directly
         jl_module_t *from = NULL;
-        b = using_resolve_binding(m, var, &from, NULL, 0);
-        if (b) {
-            if (b2 == NULL || jl_atomic_load_relaxed(&b->owner) == jl_atomic_load_relaxed(&b2->owner))
+        jl_binding_t *b2 = using_resolve_binding(m, var, &from, NULL, 0);
+        if (b2) {
+            jl_binding_partition_t *b2part = jl_get_binding_partition(b2, jl_current_task->world_age);
+            if (eq_bindings(b2part, b, jl_current_task->world_age))
                 return from;
             // if we did not find it (or accidentally found a different one), ignore this
         }
@@ -376,10 +473,16 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
 {
     if (b == NULL)
         b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 == NULL) {
-        if (b->usingfailed)
-            return NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+retry:
+    if (decode_restriction_kind(pku) == BINDING_KIND_FAILED)
+        return NULL;
+    if (decode_restriction_kind(pku) == BINDING_KIND_DECLARED) {
+        return b;
+    }
+    if (decode_restriction_kind(pku) == BINDING_KIND_GUARD) {
+        jl_binding_t *b2 = NULL;
         modstack_t top = { m, var, st };
         modstack_t *tmp = st;
         for (; tmp != NULL; tmp = tmp->prev) {
@@ -394,19 +497,17 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
             return NULL;
         assert(from);
         JL_GC_PROMISE_ROOTED(from); // gc-analysis does not understand output parameters
+        JL_GC_PROMISE_ROOTED(b2);
         if (b2->deprecated) {
-            if (jl_atomic_load_relaxed(&b2->value) == jl_nothing) {
+            if (jl_get_binding_value(b2) == jl_nothing) {
                 // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return NULL;
             }
         }
         // do a full import to prevent the result of this lookup from
         // changing, for example if this var is assigned to later.
-        jl_binding_t *owner = NULL;
-        if (!jl_atomic_cmpswap(&b->owner, &owner, b2)) {
-            // concurrent import
-            return owner;
-        }
+        if (!jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction((jl_value_t*)b2, BINDING_KIND_IMPLICIT)))
+            goto retry;
         if (b2->deprecated) {
             b->deprecated = 1; // we will warn about this below, but we might want to warn at the use sites too
             if (m != jl_main_module && m != jl_base_module &&
@@ -421,20 +522,26 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
                 jl_binding_dep_message(from, var, b2);
             }
         }
+        return b2;
     }
-    assert(jl_atomic_load_relaxed(&b2->owner) == b2);
-    return b2;
+    jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    return b;
 }
 
 // get the current likely owner of binding when accessing m.var, without resolving the binding (it may change later)
 JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     jl_module_t *from = m;
-    if (b == NULL || (!b->usingfailed && jl_atomic_load_relaxed(&b->owner) == NULL))
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) == BINDING_KIND_GUARD) {
         b = using_resolve_binding(m, var, &from, NULL, 0);
-    else
-        b = jl_atomic_load_relaxed(&b->owner);
+        bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    }
+    pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
     return b;
 }
 
@@ -442,13 +549,20 @@ JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     if (b == NULL)
         return jl_nothing;
-    b = jl_atomic_load_relaxed(&b->owner);
-    if (b == NULL)
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
         return jl_nothing;
-    jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
-    return ty ? ty : jl_nothing;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        // TODO: We would like to return the type of the constant, but
+        // currently code relies on this returning any to bypass conversion
+        // before an attempted assignment to a constant.
+        // return jl_typeof(jl_atomic_load_relaxed(&bpart->restriction));
+        return (jl_value_t*)jl_any_type;
+    }
+    return decode_restriction_value(pku);
 }
 
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m, jl_sym_t *var)
@@ -479,7 +593,8 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && b->imported;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_IMPORTED;
 }
 
 extern const char *jl_filename;
@@ -498,7 +613,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
     jl_binding_t *dep_message_binding = jl_get_binding(m, jl_symbol(dep_binding_name));
     jl_value_t *dep_message = NULL;
     if (dep_message_binding != NULL)
-        dep_message = jl_atomic_load_relaxed(&dep_message_binding->value);
+        dep_message = jl_get_binding_value(dep_message_binding);
     JL_GC_PUSH1(&dep_message);
     if (dep_message != NULL) {
         if (jl_is_string(dep_message)) {
@@ -509,7 +624,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
         }
     }
     else {
-        jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+        jl_value_t *v = jl_get_binding_value(b);
         dep_message = v; // use as gc-root
         if (v) {
             if (jl_is_type(v) || jl_is_module(v)) {
@@ -546,9 +661,12 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname,
                   jl_symbol_name(to->name));
     }
     else {
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        assert(decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(pku) == BINDING_KIND_DECLARED || jl_bkind_is_some_constant(decode_restriction_kind(pku)));
+        (void)pku;
         if (b->deprecated) {
-            if (jl_atomic_load_relaxed(&b->value) == jl_nothing) {
+            if (jl_get_binding_value(b) == jl_nothing) {
                 // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return;
             }
@@ -572,17 +690,28 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname,
             // importing a binding on top of itself. harmless.
             return;
         }
-        jl_binding_t *ownerto = NULL;
-        if (jl_atomic_cmpswap(&bto->owner, &ownerto, b)) {
-            bto->imported |= (explici != 0);
+        jl_binding_partition_t *btopart = jl_get_binding_partition(bto, jl_current_task->world_age);
+        jl_ptr_kind_union_t bto_pku = jl_atomic_load_relaxed(&btopart->restriction);
+retry:
+        if (decode_restriction_kind(bto_pku) == BINDING_KIND_GUARD ||
+            decode_restriction_kind(bto_pku) == BINDING_KIND_IMPLICIT ||
+            decode_restriction_kind(bto_pku) == BINDING_KIND_FAILED) {
+
+            jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)b, (explici != 0) ? BINDING_KIND_IMPORTED : BINDING_KIND_EXPLICIT);
+            if (!jl_atomic_cmpswap(&btopart->restriction, &bto_pku, new_pku))
+                goto retry;
             bto->deprecated |= b->deprecated; // we already warned about this above, but we might want to warn at the use sites too
         }
         else {
-            if (eq_bindings(b, bto)) {
-                // already imported
-                bto->imported |= (explici != 0);
+            if (eq_bindings(bpart, bto, jl_current_task->world_age)) {
+                // already imported - potentially upgrade to _IMPORTED or _EXPLICIT
+                if (jl_bkind_is_some_import(decode_restriction_kind(bto_pku))) {
+                    jl_ptr_kind_union_t new_pku = encode_restriction(decode_restriction_value(bto_pku), (explici != 0) ? BINDING_KIND_IMPORTED : BINDING_KIND_EXPLICIT);
+                    if (!jl_atomic_cmpswap(&btopart->restriction, &bto_pku, new_pku))
+                        goto retry;
+                }
             }
-            else if (ownerto != bto) {
+            else if (jl_bkind_is_some_import(decode_restriction_kind(bto_pku))) {
                 // already imported from somewhere else
                 jl_printf(JL_STDERR,
                           "WARNING: ignoring conflicting import of %s.%s into %s\n",
@@ -644,18 +773,24 @@ JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
         jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (b->exportp && (jl_atomic_load_relaxed(&b->owner) == b || b->imported)) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        if (b->exportp && (decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(pku) == BINDING_KIND_IMPORTED)) {
             jl_sym_t *var = b->globalref->name;
             jl_binding_t *tob = jl_get_module_binding(to, var, 0);
-            if (tob && jl_atomic_load_relaxed(&tob->owner) != NULL &&
-                // don't warn for conflicts with the module name itself.
-                // see issue #4715
-                var != to->name &&
-                !eq_bindings(jl_atomic_load_relaxed(&tob->owner), b)) {
-                jl_printf(JL_STDERR,
-                          "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(var),
-                          jl_symbol_name(to->name));
+            if (tob) {
+                jl_binding_partition_t *tobpart = jl_get_binding_partition(tob, jl_current_task->world_age);
+                jl_ptr_kind_union_t tobpku = jl_walk_binding_inplace(&tob, &tobpart, jl_current_task->world_age);
+                if (tob && decode_restriction_kind(tobpku) != BINDING_KIND_GUARD &&
+                    // don't warn for conflicts with the module name itself.
+                    // see issue #4715
+                    var != to->name &&
+                    !eq_bindings(tobpart, b, jl_current_task->world_age)) {
+                    jl_printf(JL_STDERR,
+                            "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
+                            jl_symbol_name(from->name), jl_symbol_name(var),
+                            jl_symbol_name(to->name));
+                }
             }
         }
         table = jl_atomic_load_relaxed(&from->bindings);
@@ -665,20 +800,38 @@ JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
 JL_DLLEXPORT void jl_module_public(jl_module_t *from, jl_sym_t *s, int exported)
 {
     jl_binding_t *b = jl_get_module_binding(from, s, 1);
+    if (b->publicp) {
+        // check for conflicting declarations
+        if (b->exportp && !exported)
+            jl_errorf("cannot declare %s.%s public; it is already declared exported",
+                      jl_symbol_name(from->name), jl_symbol_name(s));
+        if (!b->exportp && exported)
+            jl_errorf("cannot declare %s.%s exported; it is already declared public",
+                      jl_symbol_name(from->name), jl_symbol_name(s));
+    }
     b->publicp = 1;
     b->exportp |= exported;
 }
 
-JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var) // unlike most queries here, this is currently seq_cst
+JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import) // unlike most queries here, this is currently seq_cst
 {
-    jl_binding_t *b = jl_get_binding(m, var);
-    return b && (jl_atomic_load(&b->value) != NULL);
+    jl_binding_t *b = jl_get_module_binding(m, var, allow_import);
+    if (!b)
+        return 0;
+    if (!allow_import) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (!bpart || jl_bkind_is_some_import(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction))))
+            return 0;
+        return jl_get_binding_value(b) != NULL;
+    }
+    return jl_reresolve_binding_value_seqcst(b) != NULL;
 }
 
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && (b->exportp || jl_atomic_load_relaxed(&b->owner) == b);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && (b->exportp || decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL);
 }
 
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
@@ -696,18 +849,24 @@ JL_DLLEXPORT int jl_module_public_p(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && jl_atomic_load_relaxed(&b->owner) != NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    enum jl_partition_kind kind = decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+    return kind == BINDING_KIND_DECLARED || !jl_bkind_is_some_guard(kind);
 }
 
 static uint_t bindingkey_hash(size_t idx, jl_value_t *data)
 {
-    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
+    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_sym_t *var = b->globalref->name;
     return var->hash;
 }
 
 static int bindingkey_eq(size_t idx, const void *var, jl_value_t *data, uint_t hv)
 {
+    if (idx >= jl_svec_len(data))
+        return 0; // We got a OOB access, probably due to a data race
     jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
     jl_sym_t *name = b->globalref->name;
     return var == name;
@@ -722,6 +881,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
         ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, (jl_value_t*)bindings, hv, 0); // acquire
         if (idx != -1) {
             jl_binding_t *b = (jl_binding_t*)jl_svecref(bindings, idx); // relaxed
+            JL_GC_PROMISE_ROOTED(b);
             if (locked)
                 JL_UNLOCK(&m->lock);
             return b;
@@ -766,7 +926,7 @@ JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr)
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
     // ignores b->deprecated
-    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->value);
+    return b == NULL ? NULL : jl_get_binding_value(b);
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
@@ -777,12 +937,12 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
     // XXX: this only considers if the original is deprecated, not the binding in m
     if (b->deprecated)
         jl_binding_deprecation_warning(m, var, b);
-    return jl_atomic_load_relaxed(&b->value);
+    return jl_get_binding_value(b);
 }
 
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
-    jl_binding_t *bp = jl_get_binding_wr(m, var);
+    jl_binding_t *bp = jl_get_binding_wr(m, var, 0);
     jl_checked_assignment(bp, m, var, val);
 }
 
@@ -790,43 +950,33 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var
 {
     // this function is mostly only used during initialization, so the data races here are not too important to us
     jl_binding_t *bp = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = NULL;
-    if (!jl_atomic_cmpswap(&bp->owner, &b2, bp) && b2 != bp)
-        jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
-    if (jl_atomic_load_relaxed(&bp->value) == NULL) {
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
-        uint8_t constp = 0;
-        // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
-        if (constp = bp->constp, bp->constp = 1, constp == 0) {
-            jl_value_t *old = NULL;
-            if (jl_atomic_cmpswap(&bp->value, &old, val)) {
-                jl_gc_wb(bp, val);
-                return;
-            }
-        }
-    }
-    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bp, jl_current_task->world_age);
+    jl_atomic_store_release(&bpart->restriction, encode_restriction(val, BINDING_KIND_CONST));
+    jl_gc_wb(bpart, val);
 }
 
 JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    return jl_bkind_is_some_constant(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)));
 }
 
 JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && jl_atomic_load_relaxed(&b->value) != NULL;
+    return b && jl_get_binding_value(b) != NULL;
 }
 
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && jl_bkind_is_some_constant(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)));
 }
 
 // set the deprecated flag for a binding:
@@ -856,7 +1006,6 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b
     if (b->deprecated == 1 && jl_options.depwarn) {
         if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR)
             jl_printf(JL_STDERR, "WARNING: ");
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
         jl_printf(JL_STDERR, "%s.%s is deprecated",
                   jl_symbol_name(m->name), jl_symbol_name(s));
         jl_binding_dep_message(m, s, b);
@@ -875,39 +1024,29 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b
     }
 }
 
-jl_value_t *jl_check_binding_wr(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign)
+jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign)
 {
-    jl_value_t *old_ty = NULL;
-    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type)) {
-        if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
-            JL_GC_PUSH1(&rhs); // callee-rooted
-            if (!jl_isa(rhs, old_ty))
-                jl_errorf("cannot assign an incompatible value to the global %s.%s.",
-                          jl_symbol_name(mod->name), jl_symbol_name(var));
-            JL_GC_POP();
-        }
-    }
-    else {
-        old_ty = (jl_value_t*)jl_any_type;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku)));
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        jl_value_t *old = decode_restriction_value(pku);
+        if (jl_egal(rhs, old))
+            return NULL;
+        if (jl_typeof(rhs) == jl_typeof(old))
+            jl_errorf("invalid redefinition of constant %s.%s. This redefinition may be permitted using the `const` keyword.",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+        else
+            jl_errorf("invalid redefinition of constant %s.%s.",
+                jl_symbol_name(mod->name), jl_symbol_name(var));
     }
-    if (b->constp) {
-        if (reassign) {
-            jl_value_t *old = NULL;
-            if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
-                jl_gc_wb(b, rhs);
-                return NULL;
-            }
-            if (jl_egal(rhs, old))
-                return NULL;
-            if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs))
-                reassign = 0;
-            else
-                jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
-                               jl_symbol_name(mod->name), jl_symbol_name(var));
-        }
-        if (!reassign)
-            jl_errorf("invalid redefinition of constant %s.%s",
-                      jl_symbol_name(mod->name), jl_symbol_name(var));
+    jl_value_t *old_ty = decode_restriction_value(pku);
+    if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
+        JL_GC_PUSH1(&rhs); // callee-rooted
+        if (!jl_isa(rhs, old_ty))
+            jl_errorf("cannot assign an incompatible value to the global %s.%s.",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+        JL_GC_POP();
     }
     return old_ty;
 }
@@ -938,12 +1077,13 @@ JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, j
 
 JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs)
 {
-    jl_value_t *ty = NULL;
-    if (jl_atomic_cmpswap_relaxed(&b->ty, &ty, (jl_value_t*)jl_any_type))
-        ty = (jl_value_t*)jl_any_type;
-    if (b->constp)
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku)));
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
         jl_errorf("invalid redefinition of constant %s.%s",
                   jl_symbol_name(mod->name), jl_symbol_name(var));
+    jl_value_t *ty = decode_restriction_value(pku);
     return modify_value(ty, &b->value, (jl_value_t*)b, op, rhs, 1, mod, var);
 }
 
@@ -956,16 +1096,6 @@ JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod
     return old;
 }
 
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var)
-{
-    // n.b. jl_get_binding_wr should have ensured b->owner == b as mod.var
-    if (jl_atomic_load_relaxed(&b->owner) != b || (jl_atomic_load_relaxed(&b->value) != NULL && !b->constp)) {
-        jl_errorf("cannot declare %s.%s constant; it already has a value",
-                  jl_symbol_name(mod->name), jl_symbol_name(var));
-    }
-    b->constp = 1;
-}
-
 JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
 {
     JL_LOCK(&m->lock);
@@ -982,10 +1112,14 @@ JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
     return (jl_value_t*)a;
 }
 
-JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
+void _append_symbol_to_bindings_array(jl_array_t* a, jl_sym_t *name) {
+    jl_array_grow_end(a, 1);
+    //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
+    jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)name);
+}
+
+void append_module_names(jl_array_t* a, jl_module_t *m, int all, int imported, int usings)
 {
-    jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
-    JL_GC_PUSH1(&a);
     jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
         jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
@@ -994,15 +1128,41 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
         jl_sym_t *asname = b->globalref->name;
         int hidden = jl_symbol_name(asname)[0]=='#';
         int main_public = (m == jl_main_module && !(asname == jl_eval_sym || asname == jl_include_sym));
-        if ((b->publicp ||
-             (imported && b->imported) ||
-             (jl_atomic_load_relaxed(&b->owner) == b && !b->imported && (all || main_public))) &&
-            (all || (!b->deprecated && !hidden))) {
-            jl_array_grow_end(a, 1);
-            // n.b. change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
-            jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)asname);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        enum jl_partition_kind kind = decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+        if (((b->publicp) ||
+             (imported && (kind == BINDING_KIND_CONST_IMPORT || kind == BINDING_KIND_IMPORTED)) ||
+             (usings && kind == BINDING_KIND_EXPLICIT) ||
+             ((kind == BINDING_KIND_GLOBAL || kind == BINDING_KIND_CONST || kind == BINDING_KIND_DECLARED) && (all || main_public))) &&
+            (all || (!b->deprecated && !hidden)))
+            _append_symbol_to_bindings_array(a, asname);
+    }
+}
+
+void append_exported_names(jl_array_t* a, jl_module_t *m, int all)
+{
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        if (b->exportp && (all || !b->deprecated))
+            _append_symbol_to_bindings_array(a, b->globalref->name);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported, int usings)
+{
+    jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
+    JL_GC_PUSH1(&a);
+    append_module_names(a, m, all, imported, usings);
+    if (usings) {
+        // If `usings` is specified, traverse the list of `using`-ed modules and incorporate
+        // the names exported by those modules into the list.
+        for(int i=(int)m->usings.len-1; i >= 0; --i) {
+            jl_module_t *usinged = module_usings_getidx(m, i);
+            append_exported_names(a, usinged, all);
         }
-        table = jl_atomic_load_relaxed(&m->bindings);
     }
     JL_GC_POP();
     return (jl_value_t*)a;
@@ -1048,8 +1208,10 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m)
         jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (jl_atomic_load_relaxed(&b->owner) && jl_atomic_load_relaxed(&b->owner) != b && !b->imported)
-            jl_atomic_store_relaxed(&b->owner, NULL);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_IMPLICIT) {
+            jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD));
+        }
     }
     JL_UNLOCK(&m->lock);
 }
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index 2c70a053eb970..0bf3a729cbcb1 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -50,7 +50,15 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     JL_GC_PUSH2(&sigtype, &selected_rt);
     sigtype = jl_argtype_with_function(captures, (jl_value_t*)argt);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    jl_method_instance_t *mi = NULL;
+    if (source->source) {
+        mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    } else {
+        mi = (jl_method_instance_t *)jl_atomic_load_relaxed(&source->specializations);
+        if (!jl_subtype(sigtype, mi->specTypes)) {
+            jl_error("sigtype mismatch in optimized opaque closure");
+        }
+    }
     jl_task_t *ct = jl_current_task;
     size_t world = ct->world_age;
     jl_code_instance_t *ci = NULL;
@@ -105,7 +113,7 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
         jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec);
 
         // OC wrapper methods are not world dependent
-        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0);
+        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0, NULL);
         if (!jl_atomic_load_acquire(&ci->invoke))
             jl_compile_codeinst(ci);
         specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
@@ -139,7 +147,7 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet
     JL_GC_PUSH3(&root, &sigtype, &inst);
     root = jl_box_long(lineno);
     root = jl_new_struct(jl_linenumbernode_type, root, file);
-    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, isinferred ? jl_nothing : (jl_value_t*)ci, isva);
+    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva, isinferred);
     root = (jl_value_t*)meth;
     size_t world = jl_current_task->world_age;
     // these are only legal in the current world since they are not in any tables
@@ -147,10 +155,11 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet
     jl_atomic_store_release(&meth->deleted_world, world);
 
     if (isinferred) {
-        sigtype = jl_argtype_with_function(env, (jl_value_t*)argt);
+        jl_value_t *argslotty = jl_array_ptr_ref(ci->slottypes, 0);
+        sigtype = jl_argtype_with_function_type(argslotty, (jl_value_t*)argt);
         jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
         inst = jl_new_codeinst(mi, jl_nothing, rt_ub, (jl_value_t*)jl_any_type, NULL, (jl_value_t*)ci,
-            0, world, world, 0, 0, jl_nothing, 0);
+            0, world, world, 0, jl_nothing, 0, ci->debuginfo);
         jl_mi_cache_insert(mi, inst);
     }
 
@@ -161,10 +170,10 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet
 
 JL_CALLABLE(jl_new_opaque_closure_jlcall)
 {
-    if (nargs < 4)
+    if (nargs < 5)
         jl_error("new_opaque_closure: Not enough arguments");
     return (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)args[0],
-        args[1], args[2], args[3], &args[4], nargs-4, 1);
+        args[1], args[2], args[4], &args[5], nargs-5, 1);
 }
 
 // check whether the specified number of arguments is compatible with the
diff --git a/src/options.h b/src/options.h
index 69b31ea918008..800be866183b0 100644
--- a/src/options.h
+++ b/src/options.h
@@ -68,15 +68,12 @@
 // GC_FINAL_STATS prints total GC stats at exit
 // #define GC_FINAL_STATS
 
-// MEMPROFILE prints pool summary statistics after every GC
-//#define MEMPROFILE
+// MEMPROFILE prints pool and large objects summary statistics after every GC
+// #define MEMPROFILE
 
 // GC_TIME prints time taken by each phase of GC
 // #define GC_TIME
 
-// OBJPROFILE counts objects by type
-// #define OBJPROFILE
-
 // pool allocator configuration options
 
 // GC_SMALL_PAGE allocates objects in 4k pages
@@ -113,7 +110,7 @@
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define JL_STACK_SIZE (64*1024*1024)
 #elif defined(_P64)
-#define JL_STACK_SIZE (4*1024*1024)
+#define JL_STACK_SIZE (8*1024*1024)
 #else
 #define JL_STACK_SIZE (2*1024*1024)
 #endif
diff --git a/src/passes.h b/src/passes.h
index 9c3b0421670b5..6557a5813063d 100644
--- a/src/passes.h
+++ b/src/passes.h
@@ -64,7 +64,7 @@ struct RemoveNIPass : PassInfoMixin<RemoveNIPass> {
 
 struct MultiVersioningPass : PassInfoMixin<MultiVersioningPass> {
     bool external_use;
-    MultiVersioningPass(bool external_use = false) : external_use(external_use) {}
+    MultiVersioningPass(bool external_use = false) JL_NOTSAFEPOINT : external_use(external_use) {}
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
index aafce01856634..236be179e12c9 100644
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -19,18 +19,6 @@
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
 
@@ -40,6 +28,7 @@
 #include <llvm/Transforms/IPO/ConstantMerge.h>
 #include <llvm/Transforms/IPO/ForceFunctionAttrs.h>
 #include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
 #include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
@@ -76,6 +65,8 @@
 #include <llvm/Transforms/Scalar/SimplifyCFG.h>
 #include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
 #include <llvm/Transforms/Utils/InjectTLIMappings.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #include <llvm/Transforms/Vectorize/VectorCombine.h>
@@ -189,10 +180,11 @@ namespace {
         // }
     }
 
-#ifdef JL_DEBUG_BUILD
+#ifdef JL_VERIFY_PASSES
     static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
-        if (!llvm_only)
-            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
+        if (!llvm_only){
+            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass(true)));
+        }
         MPM.addPass(VerifierPass());
     }
 #endif
@@ -332,7 +324,7 @@ namespace {
 
 static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeEarlySimplificationMarkerPass());
-#ifdef JL_DEBUG_BUILD
+#ifdef JL_VERIFY_PASSES
     addVerificationPasses(MPM, options.llvm_only);
 #endif
     if (options.enable_early_simplifications) {
@@ -608,7 +600,8 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
         if (O.getSpeedupLevel() >= 2) {
             buildVectorPipeline(FPM, PB, O, options);
         }
-        FPM.addPass(WarnMissedTransformationsPass());
+        if (options.warn_missed_transformations)
+            FPM.addPass(WarnMissedTransformationsPass());
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     }
     buildIntrinsicLoweringPipeline(MPM, PB, O, options);
@@ -616,63 +609,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
     MPM.addPass(AfterOptimizationMarkerPass());
 }
 
-struct PipelineConfig {
-    int Speedup;
-    int Size;
-    int lower_intrinsics;
-    int dump_native;
-    int external_use;
-    int llvm_only;
-    int always_inline;
-    int enable_early_simplifications;
-    int enable_early_optimizations;
-    int enable_scalar_optimizations;
-    int enable_loop_optimizations;
-    int enable_vector_pipeline;
-    int remove_ni;
-    int cleanup;
-};
-
-extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, PipelineConfig* config) JL_NOTSAFEPOINT
-{
-    OptimizationLevel O;
-    switch (config->Size) {
-        case 1:
-            O = OptimizationLevel::Os;
-            break;
-        default:
-            O = OptimizationLevel::Oz;
-            break;
-        case 0:
-            switch (config->Speedup) {
-                case 0:
-                    O = OptimizationLevel::O0;
-                    break;
-                case 1:
-                    O = OptimizationLevel::O1;
-                    break;
-                case 2:
-                    O = OptimizationLevel::O2;
-                    break;
-                default:
-                    O = OptimizationLevel::O3;
-                    break;
-            }
-    }
-    buildPipeline(*reinterpret_cast<ModulePassManager*>(MPM), reinterpret_cast<PassBuilder*>(PB), O,
-                    OptimizationOptions{!!config->lower_intrinsics,
-                                        !!config->dump_native,
-                                        !!config->external_use,
-                                        !!config->llvm_only,
-                                        !!config->always_inline,
-                                        !!config->enable_early_simplifications,
-                                        !!config->enable_early_optimizations,
-                                        !!config->enable_scalar_optimizations,
-                                        !!config->enable_loop_optimizations,
-                                        !!config->enable_vector_pipeline,
-                                        !!config->remove_ni,
-                                        !!config->cleanup});
-}
 
 #undef JULIA_PASS
 
@@ -805,6 +741,17 @@ void NewPM::run(Module &M) {
 #else
     StandardInstrumentations SI(false);
 #endif
+#if JL_LLVM_VERSION >= 170000
+    PassInstrumentationCallbacks PIC;
+    adjustPIC(PIC);
+    TimePasses.registerCallbacks(PIC);
+    FunctionAnalysisManager FAM(createFAM(O, *TM.get()));
+    LoopAnalysisManager LAM;
+    CGSCCAnalysisManager CGAM;
+    ModuleAnalysisManager MAM;
+    SI.registerCallbacks(PIC, &MAM);
+    SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this
+#else
     FunctionAnalysisManager FAM(createFAM(O, *TM.get()));
     PassInstrumentationCallbacks PIC;
     adjustPIC(PIC);
@@ -814,6 +761,7 @@ void NewPM::run(Module &M) {
     LoopAnalysisManager LAM;
     CGSCCAnalysisManager CGAM;
     ModuleAnalysisManager MAM;
+#endif
     PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC);
     PB.registerLoopAnalyses(LAM);
     PB.registerFunctionAnalyses(FAM);
@@ -857,7 +805,16 @@ static Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPip
             OPTION(lower_intrinsics),
             OPTION(dump_native),
             OPTION(external_use),
-            OPTION(llvm_only)
+            OPTION(llvm_only),
+            OPTION(always_inline),
+            OPTION(enable_early_simplifications),
+            OPTION(enable_early_optimizations),
+            OPTION(enable_scalar_optimizations),
+            OPTION(enable_loop_optimizations),
+            OPTION(enable_vector_pipeline),
+            OPTION(remove_ni),
+            OPTION(cleanup),
+            OPTION(warn_missed_transformations)
 #undef OPTION
         };
         while (!name.empty()) {
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index af9d93d612249..5a4f599d1f0eb 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -189,7 +189,7 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur
             jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
             if (inferred &&
                 inferred != jl_nothing &&
-                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
+                (jl_options.compile_enabled != JL_OPTIONS_COMPILE_ALL && jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
                 do_compile = 1;
             }
             else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
diff --git a/src/processor.cpp b/src/processor.cpp
index fc56a93be9787..025043ac362d4 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -7,7 +7,11 @@
 #include <llvm/ADT/ArrayRef.h>
 #include <llvm/ADT/SmallVector.h>
 #include <llvm/ADT/StringMap.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Host.h>
+#else
 #include <llvm/Support/Host.h>
+#endif
 #include <llvm/Support/MathExtras.h>
 #include <llvm/Support/raw_ostream.h>
 
@@ -158,7 +162,11 @@ struct FeatureList {
     {
         int cnt = 0;
         for (size_t i = 0; i < n; i++)
+            #if JL_LLVM_VERSION >= 170000
+            cnt += llvm::popcount(eles[i]);
+            #else
             cnt += llvm::countPopulation(eles[i]);
+            #endif
         return cnt;
     }
     inline bool empty() const
@@ -389,7 +397,7 @@ JL_UNUSED static uint32_t find_feature_bit(const FeatureName *features, size_t n
             return feature.bit;
         }
     }
-    return (uint32_t)-1;
+    return UINT32_MAX;
 }
 
 // This is how we save the target identification.
@@ -642,7 +650,7 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
     jl_value_t* rejection_reason = nullptr;
     JL_GC_PUSH1(&rejection_reason);
     uint32_t target_idx = callback(ids, &rejection_reason);
-    if (target_idx == (uint32_t)-1) {
+    if (target_idx == UINT32_MAX) {
         jl_error(jl_string_ptr(rejection_reason));
     }
     JL_GC_POP();
@@ -856,7 +864,7 @@ static inline void check_cmdline(T &&cmdline, bool imaging)
 }
 
 struct SysimgMatch {
-    uint32_t best_idx{(uint32_t)-1};
+    uint32_t best_idx{UINT32_MAX};
     int vreg_size{0};
 };
 
@@ -911,7 +919,7 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v
         feature_size = new_feature_size;
         rejection_reasons.push_back("Updating best match to this target\n");
     }
-    if (match.best_idx == (uint32_t)-1) {
+    if (match.best_idx == UINT32_MAX) {
         // Construct a nice error message for debugging purposes
         std::string error_msg = "Unable to find compatible target in cached code image.\n";
         for (size_t i = 0; i < rejection_reasons.size(); i++) {
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index 0d069b085ac47..d28e527ed44e8 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -11,7 +11,7 @@
 
 // This nesting is required to allow compilation on musl
 #define USE_DYN_GETAUXVAL
-#if defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
+#if (defined(_OS_LINUX_) || defined(_OS_FREEBSD_)) && defined(_CPU_AARCH64_)
 #  undef USE_DYN_GETAUXVAL
 #  include <sys/auxv.h>
 #elif defined(__GLIBC_PREREQ)
@@ -207,7 +207,7 @@ static constexpr auto feature_masks = get_feature_masks(
 #undef JL_FEATURE_DEF
     -1);
 static const auto real_feature_masks =
-    feature_masks & FeatureList<feature_sz>{{(uint32_t)-1, (uint32_t)-1, 0}};
+    feature_masks & FeatureList<feature_sz>{{UINT32_MAX, UINT32_MAX, 0}};
 
 namespace Feature {
 enum : uint32_t {
@@ -473,7 +473,7 @@ static constexpr auto feature_masks = get_feature_masks(
 #undef JL_FEATURE_DEF
     -1);
 static const auto real_feature_masks =
-    feature_masks & FeatureList<feature_sz>{{(uint32_t)-1, (uint32_t)-1, 0}};
+    feature_masks & FeatureList<feature_sz>{{UINT32_MAX, UINT32_MAX, 0}};
 
 namespace Feature {
 enum : uint32_t {
@@ -737,7 +737,16 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #  define AT_HWCAP2 26
 #endif
 
-#if defined(USE_DYN_GETAUXVAL)
+#if defined(_OS_FREEBSD_)
+static inline unsigned long jl_getauxval(unsigned long type)
+{
+    unsigned long val;
+    if (elf_aux_info((int)type, &val, sizeof(val)) != 0) {
+        return 0;
+    }
+    return val;
+}
+#elif defined(USE_DYN_GETAUXVAL)
 static unsigned long getauxval_procfs(unsigned long type)
 {
     int fd = open("/proc/self/auxv", O_RDONLY);
@@ -830,7 +839,7 @@ template<typename T, typename F>
 static inline bool try_read_procfs_line(llvm::StringRef line, const char *prefix, T &out,
                                         bool &flag, F &&reset)
 {
-    if (!line.startswith(prefix))
+    if (!line.starts_with(prefix))
         return false;
     if (flag)
         reset();
@@ -1522,7 +1531,7 @@ static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(v
         }
 #endif
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
-        if (fbit == (uint32_t)-1)
+        if (fbit == UINT32_MAX)
             return false;
         set_bit(list, fbit, true);
         return true;
@@ -1603,7 +1612,7 @@ static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
         }
     }
     auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
-    if (match.best_idx == -1)
+    if (match.best_idx == UINT32_MAX)
         return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
@@ -1865,7 +1874,7 @@ JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
     JL_GC_PUSH1(&rejection_reason);
     uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
     JL_GC_POP();
-    if (match_idx == (uint32_t)-1)
+    if (match_idx == UINT32_MAX)
         return rejection_reason;
     return jl_nothing;
 }
@@ -1890,12 +1899,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
+#ifndef __clang_gcanalyzer__
 llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
+
+    auto &cmdline = get_cmdline_targets();
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+    auto ntargets = image_targets.size();
+    if (image_targets.empty())
+        jl_error("No targets specified");
     llvm::SmallVector<jl_target_spec_t, 0> res;
-    for (auto &target: jit_targets) {
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        if (t.en.flags & JL_TARGET_CLONE_ALL)
+            continue;
+        auto &features0 = image_targets[t.base].en.features;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
+        static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        // The most useful one in general...
+        t.en.flags |= JL_TARGET_CLONE_LOOP;
+#ifdef _CPU_ARM_
+        static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
+        for (auto fe: clone_math) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_MATH;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_simd[] = {Feature::neon};
+        for (auto fe: clone_simd) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_SIMD;
+                break;
+            }
+        }
+#endif
+    }
+    for (auto &target: image_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
         for (auto &fename: feature_names) {
@@ -1916,6 +1969,8 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
     return res;
 }
 
+#endif
+
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 {
     if (feature >= 32 * feature_sz)
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index 1c266e29e9dd7..f8d9eb9fd9e73 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -144,13 +144,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
                 jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
     return res;
 }
-
+#ifndef __clang_gcanalyzer__
 llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
+
+    auto &cmdline = get_cmdline_targets();
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<1>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+    auto ntargets = image_targets.size();
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        t.en.flags |= JL_TARGET_CLONE_ALL;
+    }
+    if (image_targets.empty())
+        jl_error("No image targets found");
     llvm::SmallVector<jl_target_spec_t, 0> res;
-    for (auto &target: jit_targets) {
+    for (auto &target: image_targets) {
         jl_target_spec_t ele;
         std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
         ele.data = serialize_target_data(target.name, target.en.features,
@@ -161,6 +175,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
     }
     return res;
 }
+#endif
 
 JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
@@ -179,7 +194,7 @@ JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
     JL_GC_PUSH1(&rejection_reason);
     uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
     JL_GC_POP();
-    if (match_idx == (uint32_t)-1)
+    if (match_idx == UINT32_MAX)
         return rejection_reason;
     return jl_nothing;
 }
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index 24363d2d9d28f..db954680289ea 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -786,7 +786,7 @@ static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(v
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
-        if (fbit == (uint32_t)-1)
+        if (fbit == UINT32_MAX)
             return false;
         set_bit(list, fbit, true);
         return true;
@@ -880,7 +880,7 @@ static uint32_t sysimg_init_cb(const void *id, jl_value_t** rejection_reason)
                  "https://docs.julialang.org/en/v1/devdocs/sysimg/ for more.");
     }
     auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
-    if (match.best_idx == (uint32_t)-1)
+    if (match.best_idx == UINT32_MAX)
         return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
@@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
     return match.best_idx;
 }
 
+//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
+// so we won't call sysimg_init_cb, else this function shouldn't do anything.
 static void ensure_jit_target(bool imaging)
 {
     auto &cmdline = get_cmdline_targets();
@@ -1058,7 +1060,7 @@ JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
     JL_GC_PUSH1(&rejection_reason);
     uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
     JL_GC_POP();
-    if (match_idx == (uint32_t)-1)
+    if (match_idx == UINT32_MAX)
         return rejection_reason;
     return jl_nothing;
 }
@@ -1102,13 +1104,82 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
             {feature_masks, 0}, {{}, 0}, 0});
     return res;
 }
-
+//This function parses the -C command line to figure out which targets to multiversion to.
+#ifndef __clang_gcanalyzer__
 llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
+    auto &cmdline = get_cmdline_targets();
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+
+    auto ntargets = image_targets.size();
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        if (t.en.flags & JL_TARGET_CLONE_ALL)
+            continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
+        // The most useful one in general...
+        t.en.flags |= JL_TARGET_CLONE_LOOP;
+        auto &features0 = image_targets[t.base].en.features;
+        // Special case for KNL/KNM since they're so different
+        if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
+            if ((t.name == "knl" || t.name == "knm") &&
+                image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
+                t.en.flags |= JL_TARGET_CLONE_ALL;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
+        static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
+                                                  Feature::sse41, Feature::sse42,
+                                                  Feature::avx, Feature::avx2,
+                                                  Feature::vaes, Feature::vpclmulqdq,
+                                                  Feature::sse4a, Feature::avx512f,
+                                                  Feature::avx512dq, Feature::avx512ifma,
+                                                  Feature::avx512pf, Feature::avx512er,
+                                                  Feature::avx512cd, Feature::avx512bw,
+                                                  Feature::avx512vl, Feature::avx512vbmi,
+                                                  Feature::avx512vpopcntdq, Feature::avxvnni,
+                                                  Feature::avx512vbmi2, Feature::avx512vnni,
+                                                  Feature::avx512bitalg, Feature::avx512bf16,
+                                                  Feature::avx512vp2intersect, Feature::avx512fp16};
+        for (auto fe: clone_math) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_MATH;
+                break;
+            }
+        }
+        for (auto fe: clone_simd) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_SIMD;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
+        for (auto fe: clone_bf16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
+                break;
+            }
+        }
+    }
+    if (image_targets.empty())
+        jl_error("No targets specified");
     llvm::SmallVector<jl_target_spec_t, 0> res;
-    for (auto &target: jit_targets) {
+    for (auto &target: image_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
         for (auto &fename: feature_names) {
@@ -1128,6 +1199,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
     }
     return res;
 }
+#endif
 
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 {
diff --git a/src/rtutils.c b/src/rtutils.c
index 1ab881e613564..85a9be5e0b1da 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -152,9 +152,9 @@ JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *
     jl_throw(jl_new_struct(jl_undefvarerror_type, var, scope));
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_sym_t *type_name, jl_sym_t *var)
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var)
 {
-    jl_errorf("type %s has no field %s", jl_symbol_name(type_name), jl_symbol_name(var));
+    jl_throw(jl_new_struct(jl_fielderror_type, t, var));
 }
 
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str)
@@ -239,9 +239,8 @@ JL_DLLEXPORT void __stack_chk_fail(void)
 
 // exceptions -----------------------------------------------------------------
 
-JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
+JL_DLLEXPORT void jl_enter_handler(jl_task_t *ct, jl_handler_t *eh)
 {
-    jl_task_t *ct = jl_current_task;
     // Must have no safepoint
     eh->prev = ct->eh;
     eh->gcstack = ct->gcstack;
@@ -249,7 +248,6 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
     eh->locks_len = ct->ptls->locks.len;
     eh->defer_signal = ct->ptls->defer_signal;
     eh->world_age = ct->world_age;
-    ct->eh = eh;
 #ifdef ENABLE_TIMINGS
     eh->timing_stack = ct->ptls->timing_stack;
 #endif
@@ -260,9 +258,8 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 // * We leave a try block through normal control flow
 // * An exception causes a nonlocal jump to the catch block. In this case
 //   there's additional cleanup required, eg pushing the exception stack.
-JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
+JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh)
 {
-    jl_task_t *ct = jl_current_task;
 #ifdef _OS_WINDOWS_
     if (ct->ptls->needs_resetstkoflw) {
         _resetstkoflw();
@@ -272,10 +269,11 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     // `eh` may be not equal to `ct->eh`. See `jl_pop_handler`
     // This function should **NOT** have any safepoint before the ones at the
     // end.
-    sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
+    jl_ptls_t ptls = ct->ptls;
+    sig_atomic_t old_defer_signal = ptls->defer_signal;
     ct->eh = eh->prev;
     ct->gcstack = eh->gcstack;
-    small_arraylist_t *locks = &ct->ptls->locks;
+    small_arraylist_t *locks = &ptls->locks;
     int unlocks = locks->len > eh->locks_len;
     if (unlocks) {
         for (size_t i = locks->len; i > eh->locks_len; i--)
@@ -283,41 +281,67 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
         locks->len = eh->locks_len;
     }
     ct->world_age = eh->world_age;
-    ct->ptls->defer_signal = eh->defer_signal;
-    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    ptls->defer_signal = eh->defer_signal;
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ptls->gc_state);
     if (old_gc_state != eh->gc_state)
-        jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
+        jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
     if (!old_gc_state || !eh->gc_state) // it was or is unsafe now
-        jl_gc_safepoint_(ct->ptls);
+        jl_gc_safepoint_(ptls);
+    jl_value_t *exception = ptls->sig_exception;
+    if (exception) {
+        int8_t oldstate = jl_gc_unsafe_enter(ptls);
+        /* The temporary ptls->bt_data is rooted by special purpose code in the
+        GC. This exists only for the purpose of preserving bt_data until we
+        set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+        ptls->sig_exception = NULL;
+        jl_gc_unsafe_leave(ptls, oldstate);
+    }
     if (old_defer_signal && !eh->defer_signal)
-        jl_sigint_safepoint(ct->ptls);
+        jl_sigint_safepoint(ptls);
     if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
             unlocks && eh->locks_len == 0) {
         jl_gc_run_pending_finalizers(ct);
     }
 }
 
-JL_DLLEXPORT void jl_pop_handler(int n)
+JL_DLLEXPORT void jl_eh_restore_state_noexcept(jl_task_t *ct, jl_handler_t *eh)
+{
+    assert(ct->gcstack == eh->gcstack && "Incorrect GC usage under try catch");
+    ct->eh = eh->prev;
+    ct->ptls->defer_signal = eh->defer_signal; // optional, but certain try-finally (in stream.jl) may be slightly harder to write without this
+}
+
+JL_DLLEXPORT void jl_pop_handler(jl_task_t *ct, int n)
+{
+    if (__unlikely(n <= 0))
+        return;
+    jl_handler_t *eh = ct->eh;
+    while (--n > 0)
+        eh = eh->prev;
+    jl_eh_restore_state(ct, eh);
+}
+
+JL_DLLEXPORT void jl_pop_handler_noexcept(jl_task_t *ct, int n)
 {
-    jl_task_t *ct = jl_current_task;
     if (__unlikely(n <= 0))
         return;
     jl_handler_t *eh = ct->eh;
     while (--n > 0)
         eh = eh->prev;
-    jl_eh_restore_state(eh);
+    jl_eh_restore_state_noexcept(ct, eh);
 }
 
-JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT
+JL_DLLEXPORT size_t jl_excstack_state(jl_task_t *ct) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *s = ct->excstack;
     return s ? s->top : 0;
 }
 
-JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_restore_excstack(jl_task_t *ct, size_t state) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *s = ct->excstack;
     if (s) {
         assert(s->top >= state);
@@ -332,28 +356,27 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE
     dest->top = src->top;
 }
 
-static void jl_reserve_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
+static void jl_reserve_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
                                 size_t reserved_size)
 {
     jl_excstack_t *s = *stack;
     if (s && s->reserved_size >= reserved_size)
         return;
     size_t bufsz = sizeof(jl_excstack_t) + sizeof(uintptr_t)*reserved_size;
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(ct->ptls, bufsz);
     new_s->top = 0;
     new_s->reserved_size = reserved_size;
     if (s)
         jl_copy_excstack(new_s, s);
     *stack = new_s;
-    jl_gc_wb(task, new_s);
+    jl_gc_wb(ct, new_s);
 }
 
-void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_reserve_excstack(task, stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
+    jl_reserve_excstack(ct, stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
     jl_excstack_t *s = *stack;
     jl_bt_element_t *rawstack = jl_excstack_raw(s);
     memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size);
@@ -543,7 +566,7 @@ JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
     if (jl_base_module == NULL)
         return NULL;
     jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"), 0);
-    return stderr_obj ? jl_atomic_load_relaxed(&stderr_obj->value) : NULL;
+    return stderr_obj ? jl_get_binding_value(stderr_obj) : NULL;
 }
 
 // toys for debugging ---------------------------------------------------------
@@ -638,12 +661,10 @@ static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT
     jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
     if (globname && dv->name->module) {
         jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0);
-        if (b && jl_atomic_load_relaxed(&b->owner) && b->constp) {
-            jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
-            // The `||` makes this function work for both function instances and function types.
-            if (bv == v || jl_typeof(bv) == v)
-                return 1;
-        }
+        jl_value_t *bv = jl_get_binding_value_if_const(b);
+        // The `||` makes this function work for both function instances and function types.
+        if (bv && (bv == v || jl_typeof(bv) == v))
+            return 1;
     }
     return 0;
 }
@@ -801,7 +822,8 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         else {
             n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth, ctx);
             n += jl_printf(out, ".<toplevel thunk> -> ");
-            n += jl_static_show_x(out, jl_atomic_load_relaxed(&li->uninferred), depth, ctx);
+            n += jl_static_show_x(out, jl_atomic_load_relaxed(&jl_cached_uninferred(
+                jl_atomic_load_relaxed(&li->cache), 1)->inferred), depth, ctx);
         }
     }
     else if (vt == jl_typename_type) {
@@ -864,6 +886,17 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
             return n;
         }
+        if (jl_genericmemory_type && dv->name == jl_genericmemory_typename) {
+            jl_value_t *isatomic = jl_tparam0(dv);
+            jl_value_t *el_type = jl_tparam1(dv);
+            jl_value_t *addrspace = jl_tparam2(dv);
+            if (isatomic == (jl_value_t*)jl_not_atomic_sym && addrspace && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+                n += jl_printf(out, "Memory{");
+                n += jl_static_show_x(out, el_type, depth, ctx);
+                n += jl_printf(out, "}");
+                return n;
+            }
+        }
         if (ctx.quiet) {
             return jl_static_show_symbol(out, dv->name->name);
         }
@@ -1104,7 +1137,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (jl_genericmemoryref_type && jl_is_genericmemoryref_type(vt)) {
         jl_genericmemoryref_t *ref = (jl_genericmemoryref_t*)v;
-        n += jl_printf(out, "MemoryRef(offset=");
+        n += jl_printf(out, "GenericMemoryRef(offset=");
         size_t offset = (size_t)ref->ptr_or_offset;
         if (ref->mem) {
             const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typeof(ref->mem))->layout;
@@ -1117,30 +1150,19 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (jl_genericmemory_type && jl_is_genericmemory_type(vt)) {
         jl_genericmemory_t *m = (jl_genericmemory_t*)v;
-        jl_value_t *isatomic = jl_tparam0(vt);
-        jl_value_t *addrspace = jl_tparam2(vt);
-        if (isatomic == (jl_value_t*)jl_not_atomic_sym && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
-            n += jl_printf(out, "Memory{");
-        }
-        else {
-            n += jl_printf(out, "GenericMemory{");
-            n += jl_static_show_x(out, isatomic, depth, ctx);
-            n += jl_printf(out, ", ");
-            n += jl_static_show_x(out, addrspace, depth, ctx);
-            n += jl_printf(out, ", ");
-        }
+        //jl_value_t *isatomic = jl_tparam0(vt);
         jl_value_t *el_type = jl_tparam1(vt);
-        n += jl_static_show_x(out, el_type, depth, ctx);
+        jl_value_t *addrspace = jl_tparam2(vt);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         size_t j, tlen = m->length;
-        n += jl_printf(out, "}(%" PRIdPTR ", %p)[", tlen, m->ptr);
-//#ifdef _P64
-//        n += jl_printf(out, "0x%016" PRIx64, tlen);
-//#else
-//        n += jl_printf(out, "0x%08" PRIx32, tlen);
-//#endif
+        n += jl_printf(out, "(%" PRIdPTR ", %p)[", tlen, m->ptr);
+        if (!(addrspace && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0)) {
+            n += jl_printf(out, "...]");
+            return n;
+        }
+        const char *typetagdata = NULL;
         const jl_datatype_layout_t *layout = vt->layout;
         int nlsep = 0;
-        const char *typetagdata = NULL;
         if (layout->flags.arrayelem_isboxed) {
             // print arrays with newlines, unless the elements are probably small
             for (j = 0; j < tlen; j++) {
@@ -1392,6 +1414,7 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
         return n;
     }
     if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
+            ((jl_datatype_t*)ftype)->name->mt &&
             ((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
             ((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
         n += jl_static_show_symbol(s, ((jl_datatype_t*)ftype)->name->mt->name);
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index 25be49d6d2f3f..56f8487d8fb73 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -4,6 +4,11 @@
 #include <map>
 #include <string>
 #include <llvm/ADT/StringMap.h>
+#if JL_LLVM_VERSION >= 170000
+#include <llvm/TargetParser/Host.h>
+#else
+#include <llvm/Support/Host.h>
+#endif
 #include <llvm/Support/raw_ostream.h>
 
 #include "julia.h"
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index ff479a4d30959..db4007d32035e 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -9,6 +9,7 @@
 #include "APInt-C.h"
 #include "julia.h"
 #include "julia_internal.h"
+#include "llvm-version.h"
 
 const unsigned int host_char_bit = 8;
 
@@ -673,8 +674,7 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 
     void *ptr;
     jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
-    jl_value_t *jv = jl_gc_alloc_1w();
-    jl_set_typeof(jv, rt);
+    jl_value_t *jv = jl_gc_alloc(jl_current_task->ptls, sizeof(void*), rt);
     *(void**)jl_data_ptr(jv) = ptr;
     JL_GC_POP();
     return jv;
@@ -1384,10 +1384,8 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
 un_iintrinsic_fast(LLVMNeg, neg, neg_int, u)
 #define add(a,b) a + b
 bi_iintrinsic_fast(LLVMAdd, add, add_int, u)
-bi_iintrinsic_fast(LLVMAdd, add, add_ptr, u)
 #define sub(a,b) a - b
 bi_iintrinsic_fast(LLVMSub, sub, sub_int, u)
-bi_iintrinsic_fast(LLVMSub, sub, sub_ptr, u)
 #define mul(a,b) a * b
 bi_iintrinsic_fast(LLVMMul, mul, mul_int, u)
 #define div(a,b) a / b
@@ -1576,6 +1574,16 @@ bi_iintrinsic_cnvtb_fast(LLVMAShr, ashr_op, ashr_int, , 1)
 //un_iintrinsic_fast(LLVMByteSwap, bswap_op, bswap_int, u)
 un_iintrinsic_slow(LLVMByteSwap, bswap_int, u)
 //#define ctpop_op(a) __builtin_ctpop(a)
+#if JL_LLVM_VERSION >= 170000
+//uu_iintrinsic_fast(LLVMPopcount, ctpop_op, ctpop_int, u)
+uu_iintrinsic_slow(LLVMPopcount, ctpop_int, u)
+//#define ctlz_op(a) __builtin_ctlz(a)
+//uu_iintrinsic_fast(LLVMCountl_zero, ctlz_op, ctlz_int, u)
+uu_iintrinsic_slow(LLVMCountl_zero, ctlz_int, u)
+//#define cttz_op(a) __builtin_cttz(a)
+//uu_iintrinsic_fast(LLVMCountr_zero, cttz_op, cttz_int, u)
+uu_iintrinsic_slow(LLVMCountr_zero, cttz_int, u)
+#else
 //uu_iintrinsic_fast(LLVMCountPopulation, ctpop_op, ctpop_int, u)
 uu_iintrinsic_slow(LLVMCountPopulation, ctpop_int, u)
 //#define ctlz_op(a) __builtin_ctlz(a)
@@ -1584,6 +1592,7 @@ uu_iintrinsic_slow(LLVMCountLeadingZeros, ctlz_int, u)
 //#define cttz_op(a) __builtin_cttz(a)
 //uu_iintrinsic_fast(LLVMCountTrailingZeros, cttz_op, cttz_int, u)
 uu_iintrinsic_slow(LLVMCountTrailingZeros, cttz_int, u)
+#endif
 #define not_op(a) ~a
 un_iintrinsic_fast(LLVMFlipAllBits, not_op, not_int, u)
 
@@ -1696,3 +1705,19 @@ JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
     else
         return jl_false;
 }
+
+JL_DLLEXPORT jl_value_t *jl_add_ptr(jl_value_t *ptr, jl_value_t *offset)
+{
+    JL_TYPECHK(add_ptr, pointer, ptr);
+    JL_TYPECHK(add_ptr, ulong, offset);
+    char *ptrval = (char*)jl_unbox_long(ptr) + jl_unbox_ulong(offset);
+    return jl_new_bits(jl_typeof(ptr), &ptrval);
+}
+
+JL_DLLEXPORT jl_value_t *jl_sub_ptr(jl_value_t *ptr, jl_value_t *offset)
+{
+    JL_TYPECHK(sub_ptr, pointer, ptr);
+    JL_TYPECHK(sub_ptr, ulong, offset);
+    char *ptrval = (char*)jl_unbox_long(ptr) - jl_unbox_ulong(offset);
+    return jl_new_bits(jl_typeof(ptr), &ptrval);
+}
diff --git a/src/safepoint.c b/src/safepoint.c
index 22cda0a89444d..2e324078897a6 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -267,12 +267,12 @@ void jl_safepoint_wait_thread_resume(void)
         uv_cond_broadcast(&safepoint_cond_begin);
         uv_mutex_unlock(&safepoint_lock);
         uv_mutex_lock(&ct->ptls->sleep_lock);
+        while (jl_atomic_load_relaxed(&ct->ptls->suspend_count))
+            uv_cond_wait(&ct->ptls->wake_signal, &ct->ptls->sleep_lock);
     }
-    while (jl_atomic_load_relaxed(&ct->ptls->suspend_count))
-        uv_cond_wait(&ct->ptls->wake_signal, &ct->ptls->sleep_lock);
-    // must while still holding the mutex_unlock, so we know other threads in
-    // jl_safepoint_suspend_thread will observe this thread in the correct GC
-    // state, and not still stuck in JL_GC_STATE_WAITING
+    // must exit gc while still holding the mutex_unlock, so we know other
+    // threads in jl_safepoint_suspend_thread will observe this thread in the
+    // correct GC state, and not still stuck in JL_GC_STATE_WAITING
     jl_atomic_store_release(&ct->ptls->gc_state, state);
     uv_mutex_unlock(&ct->ptls->sleep_lock);
 }
@@ -290,12 +290,20 @@ int jl_safepoint_suspend_thread(int tid, int waitstate)
     if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
         return 0;
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive yet or already dead
+        return 0;
+    }
+    uv_mutex_lock(&safepoint_lock);
     uv_mutex_lock(&ptls2->sleep_lock);
     int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count) + 1;
     jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count);
     if (suspend_count == 1) { // first to suspend
         jl_safepoint_enable(3);
         jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 3 + sizeof(void*)));
+        if (jl_atomic_load(&_threadedregion) != 0 || tid == jl_atomic_load_relaxed(&io_loop_tid))
+            jl_wake_libuv(); // our integration with libuv right now doesn't handle except by waking it
     }
     uv_mutex_unlock(&ptls2->sleep_lock);
     if (waitstate) {
@@ -305,7 +313,9 @@ int jl_safepoint_suspend_thread(int tid, int waitstate)
             // not, so assume it is running GC and wait for GC to finish first.
             // It will be unable to reenter helping with GC because we have
             // changed its safepoint page.
+            uv_mutex_unlock(&safepoint_lock);
             jl_set_gc_and_wait();
+            uv_mutex_lock(&safepoint_lock);
         }
         while (jl_atomic_load_acquire(&ptls2->suspend_count) != 0) {
             int8_t state2 = jl_atomic_load_acquire(&ptls2->gc_state);
@@ -313,9 +323,10 @@ int jl_safepoint_suspend_thread(int tid, int waitstate)
                 break;
             if (waitstate == 3 && state2 == JL_GC_STATE_WAITING)
                 break;
-            jl_cpu_pause(); // yield (wait for safepoint_cond_begin, for example)?
+            uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
         }
     }
+    uv_mutex_unlock(&safepoint_lock);
     return suspend_count;
 }
 
@@ -326,6 +337,11 @@ int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
     if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
         return 0;
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive yet or already dead
+        return 0;
+    }
     uv_mutex_lock(&safepoint_lock);
     uv_mutex_lock(&ptls2->sleep_lock);
     int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count);
@@ -338,6 +354,7 @@ int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
 #ifdef _OS_DARWIN_
         jl_safepoint_resume_thread_mach(ptls2, tid);
 #endif
+        uv_cond_broadcast(&safepoint_cond_begin);
     }
     if (suspend_count != 0) {
         jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count - 1);
diff --git a/src/scheduler.c b/src/scheduler.c
index 81cf03bc38c5b..bb2f85b52283f 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -7,7 +7,6 @@
 
 #include "julia.h"
 #include "julia_internal.h"
-#include "gc.h"
 #include "threading.h"
 
 #ifdef __cplusplus
@@ -32,7 +31,7 @@ static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
 // a running count of how many threads are currently not_sleeping
 // plus a running count of the number of in-flight wake-ups
 // n.b. this may temporarily exceed jl_n_threads
-static _Atomic(int) nrunning = 0;
+_Atomic(int) n_threads_running = 0;
 
 // invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
 // invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
@@ -85,15 +84,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA
 extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
                                          jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT;
 
-// parallel task runtime
-// ---
-
-JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return cong(max, &ptls->rngseed);
-}
-
 // initialize the threading infrastructure
 // (called only by the main thread)
 void jl_init_threadinginfra(void)
@@ -110,67 +100,7 @@ void jl_init_threadinginfra(void)
 }
 
 
-void JL_NORETURN jl_finish_task(jl_task_t *t);
-
-static inline int may_mark(void) JL_NOTSAFEPOINT
-{
-    return (jl_atomic_load(&gc_n_threads_marking) > 0);
-}
-
-static inline int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    return (jl_atomic_load(&ptls->gc_sweeps_requested) > 0);
-}
-
-// parallel gc thread function
-void jl_parallel_gc_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, JL_GC_STATE_UNSAFE);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_mutex_lock(&gc_threads_lock);
-        while (!may_mark() && !may_sweep(ptls)) {
-            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
-        }
-        uv_mutex_unlock(&gc_threads_lock);
-        gc_mark_loop_parallel(ptls, 0);
-        if (may_sweep(ptls)) { // not an else!
-            gc_sweep_pool_parallel(ptls);
-            jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
-        }
-    }
-}
-
-// concurrent gc thread function
-void jl_concurrent_gc_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, JL_GC_STATE_UNSAFE);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_sem_wait(&gc_sweep_assists_needed);
-        gc_free_pages();
-    }
-}
+void JL_NORETURN jl_finish_task(jl_task_t *ct);
 
 // thread function: used by all mutator threads except the main thread
 void jl_threadfun(void *arg)
@@ -183,8 +113,6 @@ void jl_threadfun(void *arg)
     jl_init_stack_limits(0, &stack_lo, &stack_hi);
     // warning: this changes `jl_current_task`, so be careful not to call that from this function
     jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
-    int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
-    assert(wasrunning); (void)wasrunning;
     JL_GC_PROMISE_ROOTED(ct);
 
     // wait for all threads
@@ -207,9 +135,9 @@ void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT
     // record that there is now another thread that may be used to schedule work
     // we will decrement this again in scheduler_delete_thread, only slightly
     // in advance of pthread_join (which hopefully itself also had been
-    // adopted by now and is included in nrunning too)
-    (void)jl_atomic_fetch_add_relaxed(&nrunning, 1);
-    // n.b. this is the only point in the code where we ignore the invariants on the ordering of nrunning
+    // adopted by now and is included in n_threads_running too)
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+    // n.b. this is the only point in the code where we ignore the invariants on the ordering of n_threads_running
     // since we are being initialized from foreign code, we could not necessarily have expected or predicted that to happen
 }
 
@@ -262,6 +190,21 @@ static int sleep_check_after_threshold(uint64_t *start_cycles) JL_NOTSAFEPOINT
     return 0;
 }
 
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // equivalent to wake_thread, without the assert on wasrunning
+    int8_t state = jl_atomic_load_relaxed(&ptls->sleep_check_state);
+    if (state == sleeping) {
+        if (jl_atomic_cmpswap_relaxed(&ptls->sleep_check_state, &state, not_sleeping)) {
+            // this notification will never be consumed, so we may have now
+            // introduced some inaccuracy into the count, but that is
+            // unavoidable with any asynchronous interruption
+            jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+        }
+    }
+}
+
+
 static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
@@ -269,7 +212,7 @@ static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
             return 1;
         }
     }
-    int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, -1); // consume in-flight wakeup
+    int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1); // consume in-flight wakeup
     assert(wasrunning > 1); (void)wasrunning;
     return 0;
 }
@@ -281,7 +224,7 @@ static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
     if (jl_atomic_load_relaxed(&ptls2->sleep_check_state) != not_sleeping) {
         int8_t state = sleeping;
         if (jl_atomic_cmpswap_relaxed(&ptls2->sleep_check_state, &state, not_sleeping)) {
-            int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1); // increment in-flight wakeup count
+            int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1); // increment in-flight wakeup count
             assert(wasrunning); (void)wasrunning;
             JL_PROBE_RT_SLEEP_CHECK_WAKE(ptls2, state);
             uv_mutex_lock(&ptls2->sleep_lock);
@@ -301,10 +244,7 @@ static void wake_libuv(void) JL_NOTSAFEPOINT
     JULIA_DEBUG_SLEEPWAKE( io_wakeup_leave = cycleclock() );
 }
 
-/* ensure thread tid is awake if necessary */
-JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
-{
-    jl_task_t *ct = jl_current_task;
+void wakeup_thread(jl_task_t *ct, int16_t tid) JL_NOTSAFEPOINT { // Pass in ptls when we have it already available to save a lookup
     int16_t self = jl_atomic_load_relaxed(&ct->tid);
     if (tid != self)
         jl_fence(); // [^store_buffering_1]
@@ -312,11 +252,11 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
     JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
     if (tid == self || tid == -1) {
         // we're already awake, but make sure we'll exit uv_run
-        // and that nrunning is updated if this is now considered in-flight
+        // and that n_threads_running is updated if this is now considered in-flight
         jl_ptls_t ptls = ct->ptls;
         if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
             if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
-                int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
                 assert(wasrunning); (void)wasrunning;
                 JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
             }
@@ -359,6 +299,12 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
     JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
 }
 
+/* ensure thread tid is awake if necessary */
+JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    wakeup_thread(ct, tid);
+}
 
 // get the next runnable task
 static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
@@ -438,6 +384,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
         jl_ptls_t ptls = ct->ptls;
         if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid) && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) {
             // acquire sleep-check lock
+            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
             jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
             jl_fence(); // [^store_buffering_1]
             JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
@@ -447,140 +394,156 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                 }
                 continue;
             }
-            task = get_next_task(trypoptask, q); // note: this should not yield
-            if (ptls != ct->ptls) {
-                // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
-                ptls = ct->ptls;
-                if (set_not_sleeping(ptls)) {
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+            volatile int isrunning = 1;
+            JL_TRY {
+                task = get_next_task(trypoptask, q); // note: this should not yield
+                if (ptls != ct->ptls) {
+                    // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
+                    ptls = ct->ptls;
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    }
+                    continue; // jump to JL_CATCH
                 }
-                if (task)
-                    return task;
-                continue;
-            }
-            if (task) {
-                if (set_not_sleeping(ptls)) {
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                if (task) {
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    }
+                    continue; // jump to JL_CATCH
                 }
-                return task;
-            }
 
-            // IO is always permitted, but outside a threaded region, only
-            // thread 0 will process messages.
-            // Inside a threaded region, any thread can listen for IO messages,
-            // and one thread should win this race and watch the event loop,
-            // but we bias away from idle threads getting parked here.
-            //
-            // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
-            //  - After decrementing _threadedregion, the thread is required to
-            //    call jl_wakeup_thread(0), that will kick out any thread who is
-            //    already there, and then eventually thread 0 will get here.
-            //  - Inside a _threadedregion, there must exist at least one
-            //    thread that has a happens-before relationship on the libuv lock
-            //    before reaching this decision point in the code who will see
-            //    the lock as unlocked and thus must win this race here.
-            int uvlock = 0;
-            if (jl_atomic_load_relaxed(&_threadedregion)) {
-                uvlock = jl_mutex_trylock(&jl_uv_mutex);
-            }
-            else if (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
-                uvlock = 1;
-                JL_UV_LOCK();
-            }
-            else {
-                // Since we might have started some IO work, we might need
-                // to ensure tid = 0 will go watch that new event source.
-                // If trylock would have succeeded, that may have been our
-                // responsibility, so need to make sure thread 0 will take care
-                // of us.
-                if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
-                    jl_wakeup_thread(0);
-            }
-            if (uvlock) {
-                int enter_eventloop = may_sleep(ptls);
-                int active = 0;
-                if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
-                    // if we won the race against someone who actually needs
-                    // the lock to do real work, we need to let them have it instead
-                    enter_eventloop = 0;
-                if (enter_eventloop) {
-                    uv_loop_t *loop = jl_global_event_loop();
-                    loop->stop_flag = 0;
-                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
-                    active = uv_run(loop, UV_RUN_ONCE);
-                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
-                    jl_gc_safepoint();
+                // IO is always permitted, but outside a threaded region, only
+                // thread 0 will process messages.
+                // Inside a threaded region, any thread can listen for IO messages,
+                // and one thread should win this race and watch the event loop,
+                // but we bias away from idle threads getting parked here.
+                //
+                // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
+                //  - After decrementing _threadedregion, the thread is required to
+                //    call jl_wakeup_thread(0), that will kick out any thread who is
+                //    already there, and then eventually thread 0 will get here.
+                //  - Inside a _threadedregion, there must exist at least one
+                //    thread that has a happens-before relationship on the libuv lock
+                //    before reaching this decision point in the code who will see
+                //    the lock as unlocked and thus must win this race here.
+                int uvlock = 0;
+                if (jl_atomic_load_relaxed(&_threadedregion)) {
+                    uvlock = jl_mutex_trylock(&jl_uv_mutex);
                 }
-                JL_UV_UNLOCK();
-                // optimization: check again first if we may have work to do.
-                // Otherwise we got a spurious wakeup since some other thread
-                // that just wanted to steal libuv from us. We will just go
-                // right back to sleep on the individual wake signal to let
-                // them take it from us without conflict.
-                if (active || !may_sleep(ptls)) {
-                    if (set_not_sleeping(ptls)) {
-                        JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                else if (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
+                    uvlock = 1;
+                    JL_UV_LOCK();
+                }
+                else {
+                    // Since we might have started some IO work, we might need
+                    // to ensure tid = 0 will go watch that new event source.
+                    // If trylock would have succeeded, that may have been our
+                    // responsibility, so need to make sure thread 0 will take care
+                    // of us.
+                    if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
+                        wakeup_thread(ct, 0);
+                }
+                if (uvlock) {
+                    int enter_eventloop = may_sleep(ptls);
+                    int active = 0;
+                    if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
+                        // if we won the race against someone who actually needs
+                        // the lock to do real work, we need to let them have it instead
+                        enter_eventloop = 0;
+                    if (enter_eventloop) {
+                        uv_loop_t *loop = jl_global_event_loop();
+                        loop->stop_flag = 0;
+                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
+                        active = uv_run(loop, UV_RUN_ONCE);
+                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
+                        jl_gc_safepoint();
+                    }
+                    JL_UV_UNLOCK();
+                    // optimization: check again first if we may have work to do.
+                    // Otherwise we got a spurious wakeup since some other thread
+                    // that just wanted to steal libuv from us. We will just go
+                    // right back to sleep on the individual wake signal to let
+                    // them take it from us without conflict.
+                    if (active || !may_sleep(ptls)) {
+                        if (set_not_sleeping(ptls)) {
+                            JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                        }
+                        start_cycles = 0;
+                        continue; // jump to JL_CATCH
+                    }
+                    if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
+                        // thread 0 is the only thread permitted to run the event loop
+                        // so it needs to stay alive, just spin-looping if necessary
+                        if (set_not_sleeping(ptls)) {
+                            JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                        }
+                        start_cycles = 0;
+                        continue; // jump to JL_CATCH
                     }
-                    start_cycles = 0;
-                    continue;
                 }
-                if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
-                    // thread 0 is the only thread permitted to run the event loop
-                    // so it needs to stay alive, just spin-looping if necessary
-                    if (set_not_sleeping(ptls)) {
-                        JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+
+                // any thread which wants us running again will have to observe
+                // sleep_check_state==sleeping and increment n_threads_running for us
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+                assert(wasrunning);
+                isrunning = 0;
+                if (wasrunning == 1) {
+                    // This was the last running thread, and there is no thread with !may_sleep
+                    // so make sure io_loop_tid is notified to check wait_empty
+                    // TODO: this also might be a good time to check again that
+                    // libuv's queue is truly empty, instead of during delete_thread
+                    int16_t tid2 = 0;
+                    if (ptls->tid != tid2) {
+                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid2];
+                        uv_mutex_lock(&ptls2->sleep_lock);
+                        uv_cond_signal(&ptls2->wake_signal);
+                        uv_mutex_unlock(&ptls2->sleep_lock);
                     }
-                    start_cycles = 0;
-                    continue;
                 }
-            }
 
-            // any thread which wants us running again will have to observe
-            // sleep_check_state==sleeping and increment nrunning for us
-            int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, -1);
-            assert(wasrunning);
-            if (wasrunning == 1) {
-                // This was the last running thread, and there is no thread with !may_sleep
-                // so make sure io_loop_tid is notified to check wait_empty
-                // TODO: this also might be a good time to check again that
-                // libuv's queue is truly empty, instead of during delete_thread
-                if (ptls->tid != jl_atomic_load_relaxed(&io_loop_tid)) {
-                    uv_mutex_lock(&ptls->sleep_lock);
+                // the other threads will just wait for an individual wake signal to resume
+                JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
+                int8_t gc_state = jl_gc_safe_enter(ptls);
+                uv_mutex_lock(&ptls->sleep_lock);
+                while (may_sleep(ptls)) {
+                    if (ptls->tid == 0) {
+                        task = wait_empty;
+                        if (task && jl_atomic_load_relaxed(&n_threads_running) == 0) {
+                            wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                            assert(!wasrunning);
+                            wasrunning = !set_not_sleeping(ptls);
+                            assert(!wasrunning);
+                            JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                            if (!ptls->finalizers_inhibited)
+                                ptls->finalizers_inhibited++; // this annoyingly is rather sticky (we should like to reset it at the end of jl_task_wait_empty)
+                            break;
+                        }
+                        task = NULL;
+                    }
+                    // else should we warn the user of certain deadlock here if tid == 0 && n_threads_running == 0?
                     uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
-                    uv_mutex_unlock(&ptls->sleep_lock);
                 }
-            }
-
-            // the other threads will just wait for an individual wake signal to resume
-            JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
-            int8_t gc_state = jl_gc_safe_enter(ptls);
-            uv_mutex_lock(&ptls->sleep_lock);
-            while (may_sleep(ptls)) {
-                task = wait_empty;
-                if (ptls->tid == 0 && task && jl_atomic_load_relaxed(&nrunning) == 0) {
-                    wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
-                    assert(!wasrunning);
-                    wasrunning = !set_not_sleeping(ptls);
-                    assert(!wasrunning);
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                    if (!ptls->finalizers_inhibited)
-                        ptls->finalizers_inhibited++; // this annoyingly is rather sticky (we should like to reset it at the end of jl_task_wait_empty)
-                    break;
+                assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+                assert(jl_atomic_load_relaxed(&n_threads_running));
+                start_cycles = 0;
+                uv_mutex_unlock(&ptls->sleep_lock);
+                JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
+                jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
+                if (task) {
+                    assert(task == wait_empty);
+                    wait_empty = NULL;
+                    continue;
                 }
-                // else should we warn the user of certain deadlock here if tid == 0 && nrunning == 0?
-                uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
             }
-            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
-            assert(jl_atomic_load_relaxed(&nrunning));
-            start_cycles = 0;
-            uv_mutex_unlock(&ptls->sleep_lock);
-            JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
-            jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
-            if (task) {
-                assert(task == wait_empty);
-                wait_empty = NULL;
-                return task;
+            JL_CATCH {
+                // probably SIGINT, but possibly a user mistake in trypoptask
+                if (!isrunning)
+                    jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                set_not_sleeping(ptls);
+                jl_rethrow();
             }
+            if (task)
+                return task;
         }
         else {
             // maybe check the kernel for new messages too
@@ -594,7 +557,7 @@ void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
     int notsleeping = jl_atomic_exchange_relaxed(&ptls->sleep_check_state, sleeping_like_the_dead) == not_sleeping;
     jl_fence();
     if (notsleeping) {
-        if (jl_atomic_load_relaxed(&nrunning) == 1) {
+        if (jl_atomic_load_relaxed(&n_threads_running) == 1) {
             jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[jl_atomic_load_relaxed(&io_loop_tid)];
             // This was the last running thread, and there is no thread with !may_sleep
             // so make sure tid 0 is notified to check wait_empty
@@ -604,10 +567,10 @@ void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
         }
     }
     else {
-        jl_atomic_fetch_add_relaxed(&nrunning, 1);
+        jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
     }
-    jl_wakeup_thread(0); // force thread 0 to see that we do not have the IO lock (and am dead)
-    jl_atomic_fetch_add_relaxed(&nrunning, -1);
+    wakeup_thread(jl_atomic_load_relaxed(&ptls->current_task), 0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
 }
 
 #ifdef __cplusplus
diff --git a/src/serialize.h b/src/serialize.h
index 2a91189dce739..3d3eb4df5e862 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -52,22 +52,21 @@ extern "C" {
 #define TAG_SHORT_INT32        44
 #define TAG_CALL1              45
 #define TAG_CALL2              46
-#define TAG_LINEINFO           47
-#define TAG_SHORT_BACKREF      48
-#define TAG_BACKREF            49
-#define TAG_UNIONALL           50
-#define TAG_GOTONODE           51
-#define TAG_QUOTENODE          52
-#define TAG_GENERAL            53
-#define TAG_GOTOIFNOT          54
-#define TAG_RETURNNODE         55
-#define TAG_ARGUMENT           56
-#define TAG_RELOC_METHODROOT   57
-#define TAG_BINDING            58
-#define TAG_MEMORYT            59
-#define TAG_ENTERNODE          60
-
-#define LAST_TAG 60
+#define TAG_SHORT_BACKREF      47
+#define TAG_BACKREF            48
+#define TAG_UNIONALL           49
+#define TAG_GOTONODE           50
+#define TAG_QUOTENODE          51
+#define TAG_GENERAL            52
+#define TAG_GOTOIFNOT          53
+#define TAG_RETURNNODE         54
+#define TAG_ARGUMENT           55
+#define TAG_RELOC_METHODROOT   56
+#define TAG_BINDING            57
+#define TAG_MEMORYT            58
+#define TAG_ENTERNODE          59
+
+#define LAST_TAG 59
 
 #define write_uint8(s, n) ios_putc((n), (s))
 #define read_uint8(s) ((uint8_t)ios_getc((s)))
diff --git a/src/signal-handling.c b/src/signal-handling.c
index 2ddbf2ad1cc8e..d7f4697a3c4f0 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -155,7 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
     // The "modern Fisher–Yates shuffle" - O(n) algorithm
     // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
     for (int i = size; i-- > 1; ) {
-        size_t j = cong(i, seed);
+        size_t j = cong(i + 1, seed); // cong is an open interval so we add 1
         uint64_t tmp = carray[j];
         carray[j] = carray[i];
         carray[i] = tmp;
@@ -309,6 +309,11 @@ static void jl_check_profile_autostop(void)
     }
 }
 
+static void stack_overflow_warning(void)
+{
+    jl_safe_printf("Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable.\n");
+}
+
 #if defined(_WIN32)
 #include "signals-win.c"
 #else
@@ -335,6 +340,8 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx)
     return ((CONTEXT*)_ctx)->Rip;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     return ((ucontext_t*)_ctx)->uc_mcontext.pc;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    return ((ucontext_t*)_ctx)->uc_mcontext.mc_gpregs.gp_elr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     return ((ucontext_t*)_ctx)->uc_mcontext.arm_pc;
 #else
@@ -420,6 +427,8 @@ void jl_show_sigill(void *_ctx)
 #endif
 }
 
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
 // make it invalid for a task to return from this point to its stack
 // this is generally quite an foolish operation, but does free you up to do
 // arbitrary things on this stack now without worrying about corrupt state that
@@ -432,15 +441,17 @@ void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
         ct->eh = NULL;
         ct->world_age = 1;
         // Force all locks to drop. Is this a good idea? Of course not. But the alternative would probably deadlock instead of crashing.
-        small_arraylist_t *locks = &ct->ptls->locks;
+        jl_ptls_t ptls = ct->ptls;
+        small_arraylist_t *locks = &ptls->locks;
         for (size_t i = locks->len; i > 0; i--)
             jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
         locks->len = 0;
-        ct->ptls->in_pure_callback = 0;
-        ct->ptls->in_finalizer = 0;
-        ct->ptls->defer_signal = 0;
+        ptls->in_pure_callback = 0;
+        ptls->in_finalizer = 0;
+        ptls->defer_signal = 0;
         // forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
-        jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_UNSAFE);
+        jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_UNSAFE);
+        surprise_wakeup(ptls);
         // allow continuing to use a Task that should have already died--unsafe necromancy!
         jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
     }
@@ -454,6 +465,7 @@ void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *c
     size_t i, n = ct ? *bt_size : 0;
     if (sig) {
         // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
+        // and also resets the state of ct and ptls so that some code can run on this task again
         jl_task_frame_noreturn(ct);
 #ifndef _OS_WINDOWS_
         sigset_t sset;
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 5063aa3e8e0d2..a939e4df71ae0 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -9,6 +9,7 @@
 #include <mach/task.h>
 #include <mach/mig_errors.h>
 #include <AvailabilityMacros.h>
+#include <stdint.h>
 #include "mach_excServer.c"
 
 #ifdef MAC_OS_X_VERSION_10_9
@@ -44,17 +45,33 @@ static void attach_exception_port(thread_port_t thread, int segv_only);
 
 // low 16 bits are the thread id, the next 8 bits are the original gc_state
 static arraylist_t suspended_threads;
-extern uv_mutex_t safepoint_lock;
 extern uv_cond_t safepoint_cond_begin;
 
+#define GC_STATE_SHIFT 8*sizeof(int16_t)
+static inline int8_t decode_gc_state(uintptr_t item)
+{
+    return (int8_t)(item >> GC_STATE_SHIFT);
+}
+
+static inline int16_t decode_tid(uintptr_t item)
+{
+    return (int16_t)item;
+}
+
+static inline uintptr_t encode_item(int16_t tid, int8_t gc_state)
+{
+    return (uintptr_t)tid | ((uintptr_t)gc_state << GC_STATE_SHIFT);
+}
+
 // see jl_safepoint_wait_thread_resume
 void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2)
 {
     // must be called with uv_mutex_lock(&safepoint_lock) and uv_mutex_lock(&ptls2->sleep_lock) held (in that order)
     for (size_t i = 0; i < suspended_threads.len; i++) {
         uintptr_t item = (uintptr_t)suspended_threads.items[i];
-        int16_t tid = (int16_t)item;
-        int8_t gc_state = (int8_t)(item >> 8);
+
+        int16_t tid = decode_tid(item);
+        int8_t gc_state = decode_gc_state(item);
         if (tid != tid2)
             continue;
         jl_atomic_store_release(&ptls2->gc_state, gc_state);
@@ -71,8 +88,8 @@ void jl_mach_gc_end(void)
     size_t j = 0;
     for (size_t i = 0; i < suspended_threads.len; i++) {
         uintptr_t item = (uintptr_t)suspended_threads.items[i];
-        int16_t tid = (int16_t)item;
-        int8_t gc_state = (int8_t)(item >> 8);
+        int16_t tid = decode_tid(item);
+        int8_t gc_state = decode_gc_state(item);
         jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
         uv_mutex_lock(&ptls2->sleep_lock);
         if (jl_atomic_load_relaxed(&ptls2->suspend_count) == 0) {
@@ -115,9 +132,9 @@ static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid)
         // Eventually, we should probably release this signal to the original
         // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
         // triggers a SIGSEGV and gets handled by the usual codepath for unix.
-        int8_t gc_state = ptls2->gc_state;
+        int8_t gc_state = jl_atomic_load_acquire(&ptls2->gc_state);
         jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
-        uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
+        uintptr_t item = encode_item(tid, gc_state);
         arraylist_push(&suspended_threads, (void*)item);
         thread_suspend(thread);
     }
@@ -205,38 +222,92 @@ typedef arm_exception_state64_t host_exception_state_t;
 #define HOST_EXCEPTION_STATE_COUNT ARM_EXCEPTION_STATE64_COUNT
 #endif
 
-static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
-                             void (*fptr)(void))
+// create a fake function that describes the variable manipulations in jl_call_in_state
+__attribute__((naked)) static void fake_stack_pop(void)
 {
 #ifdef _CPU_X86_64_
-    uintptr_t rsp = state->__rsp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa %rsp, 0\n" // CFA here uses %rsp directly
+        "  .cfi_offset %rip, 0\n" // previous value of %rip at CFA
+        "  .cfi_offset %rsp, 8\n" // previous value of %rsp at CFA
+        "  nop\n"
+    );
 #elif defined(_CPU_AARCH64_)
-    uintptr_t rsp = state->__sp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa sp, 0\n" // use sp as fp here
+        "  .cfi_offset lr, 0\n"
+        "  .cfi_offset sp, 8\n"
+        // Anything else got smashed, since we didn't explicitly copy all of the
+        // state object to the stack (to build a real sigreturn frame).
+        // This is also not quite valid, since the AArch64 DWARF spec lacks the ability to define how to restore the LR register correctly,
+        // so normally libunwind implementations on linux detect this function specially and hack around the invalid info:
+        // https://github.com/llvm/llvm-project/commit/c82deed6764cbc63966374baf9721331901ca958
+        " nop\n"
+    );
 #else
-#error "julia: throw-in-context not supported on this platform"
+CFI_NORETURN
 #endif
-    if (ptls2 == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls2->signal_stack + (ptls2->signal_stack_size ? ptls2->signal_stack_size : sig_stack_size);
-    }
-    assert(rsp % 16 == 0);
-    rsp -= 16;
+}
 
+static void jl_call_in_state(host_thread_state_t *state, void (*fptr)(void))
+{
+#ifdef _CPU_X86_64_
+    uintptr_t sp = state->__rsp;
+#elif defined(_CPU_AARCH64_)
+    uintptr_t sp = state->__sp;
+#endif
+    sp = (sp - 256) & ~(uintptr_t)15; // redzone and re-alignment
+    assert(sp % 16 == 0);
+    sp -= 16;
 #ifdef _CPU_X86_64_
-    rsp -= sizeof(void*);
-    state->__rsp = rsp; // set stack pointer
+    // set return address to NULL
+    *(uintptr_t*)sp = 0;
+    // pushq %sp
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rsp;
+    // pushq %rip
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rip;
+    // pushq .fake_stack_pop + 1; aka call from fake_stack_pop
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)&fake_stack_pop + 1;
+    state->__rsp = sp; // set stack pointer
     state->__rip = (uint64_t)fptr; // "call" the function
 #elif defined(_CPU_AARCH64_)
-    state->__sp = rsp;
-    state->__pc = (uint64_t)fptr;
-    state->__lr = 0;
+    // push {%sp, %pc + 4}
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__sp;
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)state->__pc;
+    state->__sp = sp; // x31
+    state->__pc = (uint64_t)fptr; // pc
+    state->__lr = (uintptr_t)&fake_stack_pop + 4; // x30
 #else
 #error "julia: throw-in-context not supported on this platform"
 #endif
 }
 
+static void jl_longjmp_in_state(host_thread_state_t *state, jl_jmp_buf jmpbuf)
+{
+
+    if (!jl_simulate_longjmp(jmpbuf, (bt_context_t*)state)) {
+        // for sanitizer builds, fallback to calling longjmp on the original stack
+        // (this will fail for stack overflow, but that is hardly sanitizer-legal anyways)
+#ifdef _CPU_X86_64_
+    state->__rdi = (uintptr_t)jmpbuf;
+    state->__rsi = 1;
+#elif defined(_CPU_AARCH64_)
+    state->__x[0] = (uintptr_t)jmpbuf;
+    state->__x[1] = 1;
+#else
+#error "julia: jl_longjmp_in_state not supported on this platform"
+#endif
+        jl_call_in_state(state, (void (*)(void))longjmp);
+    }
+}
+
 #ifdef _CPU_X86_64_
 int is_write_fault(host_exception_state_t exc_state) {
     return exc_reg_is_write_fault(exc_state.__err);
@@ -258,14 +329,26 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
     host_thread_state_t state;
     kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-    if (1) { // XXX: !jl_has_safe_restore(ptls2)
+    if (ptls2->safe_restore) {
+        jl_longjmp_in_state(&state, *ptls2->safe_restore);
+    }
+    else {
         assert(exception);
         ptls2->bt_size =
             rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
-                              NULL /*current_task?*/);
+                            NULL /*current_task?*/);
         ptls2->sig_exception = exception;
+        ptls2->io_wait = 0;
+        jl_task_t *ct = ptls2->current_task;
+        jl_handler_t *eh = ct->eh;
+        if (eh != NULL) {
+            asan_unpoison_task_stack(ct, &eh->eh_ctx);
+            jl_longjmp_in_state(&state, eh->eh_ctx);
+        }
+        else {
+            jl_no_exc_handler(exception, ct);
+        }
     }
-    jl_call_in_state(ptls2, &state, &jl_sig_throw);
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 }
@@ -273,14 +356,18 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn
-        jl_task_t *ct = jl_get_current_task();
-        jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls;
-        jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or jl_unwind_stepn
+        jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(context), *saferestore);
+        return;
     }
-    else {
-        sigdie_handler(sig, info, context);
+    jl_task_t *ct = jl_get_current_task();
+    if ((sig != SIGBUS || info->si_code == BUS_ADRERR) &&
+    !(ct == NULL || ct->ptls == NULL || jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING || ct->eh == NULL)
+    && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
     }
+    sigdie_handler(sig, info, context);
 }
 
 // n.b. mach_exc_server expects us to define this symbol locally
@@ -332,13 +419,11 @@ kern_return_t catch_mach_exception_raise(
         jl_safe_printf("ERROR: Exception handler triggered on unmanaged thread.\n");
         return KERN_INVALID_ARGUMENT;
     }
-    // XXX: jl_throw_in_thread or segv_handler will eventually check this, but
-    //      we would like to avoid some of this work if we could detect this earlier
-    // if (jl_has_safe_restore(ptls2)) {
-    //     jl_throw_in_thread(ptls2, thread, jl_stackovf_exception);
-    //     return KERN_SUCCESS;
-    // }
-    if (ptls2->gc_state == JL_GC_STATE_WAITING)
+    if (ptls2->safe_restore) {
+        jl_throw_in_thread(ptls2, thread, NULL);
+        return KERN_SUCCESS;
+    }
+    if (jl_atomic_load_acquire(&ptls2->gc_state) == JL_GC_STATE_WAITING)
         return KERN_FAILURE;
     if (exception == EXC_ARITHMETIC) {
         jl_throw_in_thread(ptls2, thread, jl_diverror_exception);
@@ -363,10 +448,11 @@ kern_return_t catch_mach_exception_raise(
         }
         return KERN_SUCCESS;
     }
-    if (ptls2->current_task->eh == NULL)
+    if (jl_atomic_load_relaxed(&ptls2->current_task)->eh == NULL)
         return KERN_FAILURE;
     jl_value_t *excpt;
     if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
+        stack_overflow_warning();
         excpt = jl_stackovf_exception;
     }
     else if (is_write_fault(exc_state)) // false for alignment errors
@@ -495,7 +581,6 @@ static void jl_try_deliver_sigint(void)
 
 static void JL_NORETURN jl_exit_thread0_cb(int signo)
 {
-CFI_NORETURN
     jl_critical_error(signo, 0, NULL, jl_current_task);
     jl_atexit_hook(128);
     jl_raise(signo);
@@ -527,7 +612,7 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 #else
 #error Fill in first integer argument here
 #endif
-    jl_call_in_state(ptls2, &state, (void (*)(void))&jl_exit_thread0_cb);
+    jl_call_in_state(&state, (void (*)(void))&jl_exit_thread0_cb);
     unsigned int count = MACH_THREAD_STATE_COUNT;
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
@@ -717,16 +802,16 @@ void *mach_profile_listener(void *arg)
 #endif
                 jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
-                // store threadid but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // store task id (never null)
+                // META_OFFSET_TASKID store task id (never null)
                 bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
-                // store cpu cycle clock
+                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
                 bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
 
                 // Mark the end of this block with two 0's
diff --git a/src/signals-unix.c b/src/signals-unix.c
index eb51a5fccfaba..f99eca31730b6 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -24,10 +24,12 @@
 #endif
 
 // Figure out the best signals/timers to use for this platform
-#ifdef __APPLE__ // Darwin's mach ports allow signal-free thread management
+#if defined(__APPLE__) // Darwin's mach ports allow signal-free thread management
 #define HAVE_MACH
 #define HAVE_KEVENT
-#else // generic Linux or BSD
+#elif defined(__OpenBSD__)
+#define HAVE_KEVENT
+#else // generic Linux or FreeBSD
 #define HAVE_TIMER
 #endif
 
@@ -42,7 +44,7 @@ static const size_t sig_stack_size = 8 * 1024 * 1024;
 
 // helper function for returning the unw_context_t inside a ucontext_t
 // (also used by stackwalk.c)
-bt_context_t *jl_to_bt_context(void *sigctx)
+bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT
 {
 #ifdef __APPLE__
     return (bt_context_t*)&((ucontext64_t*)sigctx)->uc_mcontext64->__ss;
@@ -60,7 +62,11 @@ bt_context_t *jl_to_bt_context(void *sigctx)
 static int thread0_exit_count = 0;
 static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size);
 
-static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT;
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf);
+
+#if !defined(_OS_DARWIN_)
+static inline uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
 {
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
@@ -74,22 +80,22 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.arm_sp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_X86_64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__rsp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__sp;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.mc_rsp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_gpregs.gp_sp;
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+    const struct sigcontext *ctx = (const struct sigcontext *)_ctx;
+    return ctx->sc_rsp;
 #else
     // TODO Add support for PowerPC(64)?
     return 0;
 #endif
 }
 
-static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
+static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) JL_NOTSAFEPOINT
 {
     // One guard page for signal_stack.
     return ptls->signal_stack == NULL ||
@@ -97,10 +103,8 @@ static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
             (char*)ptr <= (char*)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size));
 }
 
-// Modify signal context `_ctx` so that `fptr` will execute when the signal
-// returns. `fptr` will execute on the signal stack, and must not return.
-// jl_call_in_ctx is also currently executing on that signal stack,
-// so be careful not to smash it
+// Modify signal context `_ctx` so that `fptr` will execute when the signal returns
+// The function `fptr` itself must not return.
 JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
 {
     // Modifying the ucontext should work but there is concern that
@@ -110,46 +114,48 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls) {
-        sigset_t sset;
-        sigemptyset(&sset);
-        sigaddset(&sset, sig);
-        pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
-        fptr();
-        return;
-    }
     uintptr_t rsp = jl_get_rsp_from_ctx(_ctx);
-    if (is_addr_on_sigstack(ptls, (void*)rsp))
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    else
-        rsp = (uintptr_t)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size);
-    assert(rsp % 16 == 0);
-    rsp -= 16;
+    rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_RSP] = rsp;
     ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_rsp = rsp;
     ctx->uc_mcontext.mc_rip = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_ESP] = rsp;
     ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_esp = rsp;
     ctx->uc_mcontext.mc_eip = (uintptr_t)fptr;
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+    struct sigcontext *ctx = (struct sigcontext *)_ctx;
+    rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
+    ctx->sc_rsp = rsp;
+    ctx->sc_rip = fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     ctx->uc_mcontext.sp = rsp;
     ctx->uc_mcontext.regs[29] = 0; // Clear link register (x29)
     ctx->uc_mcontext.pc = (uintptr_t)fptr;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    ucontext_t *ctx = (ucontext_t*)_ctx;
+    ctx->uc_mcontext.mc_gpregs.gp_sp = rsp;
+    ctx->uc_mcontext.mc_gpregs.gp_x[29] = 0; // Clear link register (x29)
+    ctx->uc_mcontext.mc_gpregs.gp_elr = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     uintptr_t target = (uintptr_t)fptr;
@@ -169,22 +175,6 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     ctx->uc_mcontext.arm_sp = rsp;
     ctx->uc_mcontext.arm_lr = 0; // Clear link register
     ctx->uc_mcontext.arm_pc = target;
-#elif defined(_OS_DARWIN_) && (defined(_CPU_X86_64_) || defined(_CPU_AARCH64_))
-    // Only used for SIGFPE.
-    // This doesn't seems to be reliable when the SIGFPE is generated
-    // from a divide-by-zero exception, which is now handled by
-    // `catch_exception_raise`. It works fine when a signal is received
-    // due to `kill`/`raise` though.
-    ucontext64_t *ctx = (ucontext64_t*)_ctx;
-#if defined(_CPU_X86_64_)
-    rsp -= sizeof(void*);
-    ctx->uc_mcontext64->__ss.__rsp = rsp;
-    ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr;
-#else
-    ctx->uc_mcontext64->__ss.__sp = rsp;
-    ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr;
-    ctx->uc_mcontext64->__ss.__lr = 0;
-#endif
 #else
 #pragma message("julia: throw-in-context not supported on this platform")
     // TODO Add support for PowerPC(64)?
@@ -195,30 +185,38 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     fptr();
 #endif
 }
+#endif
 
 static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx)
 {
     jl_ptls_t ptls = ct->ptls;
-    if (!jl_get_safe_restore()) {
-        ptls->bt_size =
-            rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
-                              ct->gcstack);
-        ptls->sig_exception = e;
+    assert(!jl_get_safe_restore());
+    ptls->bt_size =
+        rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
+                            ct->gcstack);
+    ptls->sig_exception = e;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp_in_ctx(sig, sigctx, eh->eh_ctx);
+    }
+    else {
+        jl_no_exc_handler(e, ct);
     }
-    jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx);
 }
 
 static pthread_t signals_thread;
 
-static int is_addr_on_stack(jl_task_t *ct, void *addr)
+static int is_addr_on_stack(jl_task_t *ct, void *addr) JL_NOTSAFEPOINT
 {
-    if (ct->copy_stack) {
+    if (ct->ctx.copy_stack) {
         jl_ptls_t ptls = ct->ptls;
         return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
                 (char*)addr < (char*)ptls->stackbase);
     }
-    return ((char*)addr > (char*)ct->stkbuf &&
-            (char*)addr < (char*)ct->stkbuf + ct->bufsz);
+    return ((char*)addr > (char*)ct->ctx.stkbuf &&
+            (char*)addr < (char*)ct->ctx.stkbuf + ct->ctx.bufsz);
 }
 
 static void sigdie_handler(int sig, siginfo_t *info, void *context)
@@ -237,8 +235,12 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
         info->si_code == SI_KERNEL ||
 #endif
         info->si_code == SI_QUEUE ||
+#ifdef SI_MESGQ
         info->si_code == SI_MESGQ ||
+#endif
+#ifdef SI_ASYNCIO
         info->si_code == SI_ASYNCIO ||
+#endif
 #ifdef SI_SIGIO
         info->si_code == SI_SIGIO ||
 #endif
@@ -292,6 +294,8 @@ int exc_reg_is_write_fault(uintptr_t esr) {
 #if defined(HAVE_MACH)
 #include "signals-mach.c"
 #else
+#include <poll.h>
+#include <sys/eventfd.h>
 
 int jl_lock_stackwalk(void)
 {
@@ -336,6 +340,18 @@ int is_write_fault(void *context) {
     ucontext_t *ctx = (ucontext_t*)context;
     return exc_reg_is_write_fault(ctx->uc_mcontext.mc_err);
 }
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+// FreeBSD seems not to expose a means of accessing ESR via `ucontext_t` on AArch64.
+// TODO: Is there an alternative approach that can be taken? ESR may become accessible
+// in a future release though.
+int is_write_fault(void *context) {
+    return 0;
+}
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+int is_write_fault(void *context) {
+    struct sigcontext *ctx = (struct sigcontext *)context;
+    return exc_reg_is_write_fault(ctx->sc_err);
+}
 #else
 #pragma message("Implement this query for consistent PROT_NONE handling")
 int is_write_fault(void *context) {
@@ -343,7 +359,7 @@ int is_write_fault(void *context) {
 }
 #endif
 
-static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
+static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) JL_NOTSAFEPOINT
 {
     return (ptls->signal_stack != NULL &&
             is_addr_on_sigstack(ptls, ptr) &&
@@ -353,8 +369,9 @@ static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
 JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -385,6 +402,7 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     if (ct->eh == NULL)
         sigdie_handler(sig, info, context);
     if ((sig != SIGBUS || info->si_code == BUS_ADRERR) && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
         jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context);
     }
     else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) {
@@ -404,17 +422,15 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     }
 }
 
-#if !defined(JL_DISABLE_LIBUNWIND)
-static bt_context_t *signal_context;
-pthread_mutex_t in_signal_lock;
-static pthread_cond_t exit_signal_cond;
-static pthread_cond_t signal_caught_cond;
+pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
+static bt_context_t *signal_context; // protected by in_signal_lock
+static int exit_signal_cond = -1;
+static int signal_caught_cond = -1;
+static int signals_inflight = 0;
 
 int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
-    struct timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    ts.tv_sec += timeout;
+    int err;
     pthread_mutex_lock(&in_signal_lock);
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
@@ -423,48 +439,74 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
         pthread_mutex_unlock(&in_signal_lock);
         return 0;
     }
-    jl_atomic_store_release(&ptls2->signal_request, 1);
-    pthread_kill(ptls2->system_id, SIGUSR2);
-    // wait for thread to acknowledge
-    int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts);
-    if (err == ETIMEDOUT) {
-        sig_atomic_t request = 1;
-        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+    while (signals_inflight) {
+        // something is wrong, or there is already a usr2 in flight elsewhere
+        // try to wait for it to finish or wait for timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err == -1 || (event.revents & POLLIN) == 0) {
+            // not ready after timeout: cancel this request
             pthread_mutex_unlock(&in_signal_lock);
             return 0;
         }
-        // Request is either now 0 (meaning the other thread is waiting for
-        //   exit_signal_cond already),
-        // Or it is now -1 (meaning the other thread
-        //   is waiting for in_signal_lock, and we need to release that lock
-        //   here for a bit, until the other thread has a chance to get to the
-        //   exit_signal_cond)
-        if (request == -1) {
-            err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock);
-            assert(!err);
+        // consume it before continuing
+        eventfd_t got;
+        do {
+            err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(signals_inflight >= got);
+        signals_inflight -= got;
+    }
+    signals_inflight++;
+    sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1);
+    assert(request == 0 || request == -1);
+    request = 1;
+    err = pthread_kill(ptls2->system_id, SIGUSR2);
+    if (err == 0) {
+        // wait for thread to acknowledge or timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err != 1 || (event.revents & POLLIN) == 0)
+            err = -1;
+    }
+    if (err == -1) {
+        // not ready after timeout: try to cancel this request
+        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+            signals_inflight--;
+            pthread_mutex_unlock(&in_signal_lock);
+            return 0;
         }
     }
+    eventfd_t got;
+    do {
+        err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+    } while (err == -1 && errno == EINTR);
+    if (err != sizeof(eventfd_t)) abort();
+    assert(signals_inflight >= got);
+    signals_inflight -= got;
+    signals_inflight++;
     // Now the other thread is waiting on exit_signal_cond (verify that here by
     // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
-    jl_atomic_store_release(&ptls2->signal_request, 1); // prepare to resume normally
+    request = jl_atomic_load_acquire(&ptls2->signal_request);
+    assert(request == 0 || request == -1); (void) request;
+    jl_atomic_store_release(&ptls2->signal_request, 4); // prepare to resume normally, but later code may change this
     *ctx = *signal_context;
     return 1;
 }
 
 void jl_thread_resume(int tid)
 {
-    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-    pthread_cond_broadcast(&exit_signal_cond);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge (so that signal_request doesn't get mixed up)
-    // The other thread is waiting to leave exit_signal_cond (verify that here by
-    // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
+    int err;
+    eventfd_t got = 1;
+    err = write(exit_signal_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
     pthread_mutex_unlock(&in_signal_lock);
 }
-#endif
 
 // Throw jl_interrupt_exception if the master thread is in a signal async region
 // or if SIGINT happens too often.
@@ -473,9 +515,12 @@ static void jl_try_deliver_sigint(void)
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
+    pthread_mutex_lock(&in_signal_lock);
+    signals_inflight++;
     jl_atomic_store_release(&ptls2->signal_request, 2);
     // This also makes sure `sleep` is aborted.
     pthread_kill(ptls2->system_id, SIGUSR2);
+    pthread_mutex_unlock(&in_signal_lock);
 }
 
 // Write only by signal handling thread, read only by main thread
@@ -508,12 +553,13 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 }
 
 // request:
-// -1: beginning processing [invalid outside here]
+// -1: processing
 //  0: nothing [not from here]
-//  1: get state
+//  1: get state & wait for request
 //  2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
 //     is reached
 //  3: raise `thread0_exit_signo` and try to exit
+//  4: no-op
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
     jl_task_t *ct = jl_get_current_task();
@@ -523,26 +569,36 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
     if (ptls == NULL)
         return;
     int errno_save = errno;
-    // acknowledge that we saw the signal_request
-    sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, -1);
-#if !defined(JL_DISABLE_LIBUNWIND)
+    sig_atomic_t request = jl_atomic_load(&ptls->signal_request);
+    if (request == 0)
+        return;
+    if (!jl_atomic_cmpswap(&ptls->signal_request, &request, -1))
+        return;
     if (request == 1) {
-        pthread_mutex_lock(&in_signal_lock);
         signal_context = jl_to_bt_context(ctx);
-        // acknowledge that we set the signal_caught_cond broadcast
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == -1); (void) request;
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_cond_wait(&exit_signal_cond, &in_signal_lock);
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == 1 || request == 3);
-        // acknowledge that we got the resume signal
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_mutex_unlock(&in_signal_lock);
+        // acknowledge that we saw the signal_request and set signal_context
+        int err;
+        eventfd_t got = 1;
+        err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+        if (err != sizeof(eventfd_t)) abort();
+        sig_atomic_t processing = -1;
+        jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
+        // wait for exit signal
+        do {
+            err = read(exit_signal_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(got == 1);
+        request = jl_atomic_exchange(&ptls->signal_request, -1);
+        signal_context = NULL;
+        assert(request == 2 || request == 3 || request == 4);
     }
-    else
-#endif
-    jl_atomic_exchange(&ptls->signal_request, 0); // returns -1
+    int err;
+    eventfd_t got = 1;
+    err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
+    sig_atomic_t processing = -1;
+    jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
     if (request == 2) {
         int force = jl_check_force_sigint();
         if (force || (!ptls->defer_signal && ptls->io_wait)) {
@@ -551,7 +607,11 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
                 jl_safe_printf("WARNING: Force throwing a SIGINT\n");
             // Force a throw
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) // restarting jl_ or profile
+                jl_longjmp_in_ctx(sig, ctx, *saferestore);
+            else
+                jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
         }
     }
     else if (request == 3) {
@@ -615,6 +675,17 @@ JL_DLLEXPORT void jl_profile_stop_timer(void)
     }
 }
 
+#elif defined(__OpenBSD__)
+
+JL_DLLEXPORT int jl_profile_start_timer(void)
+{
+    return -1;
+}
+
+JL_DLLEXPORT void jl_profile_stop_timer(void)
+{
+}
+
 #else
 
 #error no profile tools available
@@ -653,6 +724,9 @@ void jl_install_thread_signal_handler(jl_ptls_t ptls)
     ss.ss_flags = 0;
     ss.ss_size = ssize;
     assert(ssize != 0);
+
+#ifndef _OS_OPENBSD_
+    /* fallback to malloc(), but it isn't possible on OpenBSD */
     if (signal_stack == NULL) {
         signal_stack = malloc(ssize);
         ssize = 0;
@@ -661,6 +735,8 @@ void jl_install_thread_signal_handler(jl_ptls_t ptls)
         else
             jl_safe_printf("\nwarning: julia signal stack allocated without guard page (launch foreign threads earlier to avoid this warning).\n");
     }
+#endif
+
     if (signal_stack != NULL) {
         ss.ss_sp = signal_stack;
         if (sigaltstack(&ss, NULL) < 0)
@@ -914,16 +990,16 @@ static void *signal_listener(void *arg)
 
                         jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
-                        // store threadid but add 1 as 0 is preserved to indicate end of block
+                        // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
                         bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
 
-                        // store task id (never null)
+                        // META_OFFSET_TASKID store task id (never null)
                         bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
 
-                        // store cpu cycle clock
+                        // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
                         bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                        // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                        // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
                         bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls2->sleep_check_state) + 1;
 
                         // Mark the end of this block with two 0's
@@ -960,12 +1036,12 @@ static void *signal_listener(void *arg)
         else if (critical) {
             // critical in this case actually means SIGINFO request
 #ifndef SIGINFO // SIGINFO already prints something similar automatically
-            int nrunning = 0;
+            int n_threads_running = 0;
             for (int idx = nthreads; idx-- > 0; ) {
                 jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
-                nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
+                n_threads_running += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
             }
-            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, nthreads);
+            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), n_threads_running, nthreads);
 #endif
 
             jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
@@ -987,10 +1063,12 @@ void restore_signals(void)
     jl_sigsetset(&sset);
     pthread_sigmask(SIG_SETMASK, &sset, 0);
 
-#if !defined(HAVE_MACH) && !defined(JL_DISABLE_LIBUNWIND)
+#if !defined(HAVE_MACH)
+    exit_signal_cond = eventfd(0, EFD_CLOEXEC);
+    signal_caught_cond = eventfd(0, EFD_CLOEXEC);
     if (pthread_mutex_init(&in_signal_lock, NULL) != 0 ||
-        pthread_cond_init(&exit_signal_cond, NULL) != 0 ||
-        pthread_cond_init(&signal_caught_cond, NULL) != 0) {
+            exit_signal_cond == -1 ||
+            signal_caught_cond == -1) {
         jl_error("SIGUSR pthread init failed");
     }
 #endif
@@ -1003,8 +1081,9 @@ void restore_signals(void)
 static void fpe_handler(int sig, siginfo_t *info, void *context)
 {
     (void)info;
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -1014,6 +1093,21 @@ static void fpe_handler(int sig, siginfo_t *info, void *context)
         jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
 }
 
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf)
+{
+#if defined(_OS_DARWIN_)
+    jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(_ctx), jmpbuf);
+#else
+    if (jl_simulate_longjmp(jmpbuf, jl_to_bt_context(_ctx)))
+        return;
+    sigset_t sset;
+    sigemptyset(&sset);
+    sigaddset(&sset, sig);
+    pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+    jl_longjmp(jmpbuf, 1);
+#endif
+}
+
 static void sigint_handler(int sig)
 {
     jl_sigint_passed = 1;
diff --git a/src/signals-win.c b/src/signals-win.c
index f763b71e1cf32..b5f8dd8bd79d9 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -86,9 +86,13 @@ void __cdecl crt_sig_handler(int sig, int num)
         }
         break;
     default: // SIGSEGV, SIGTERM, SIGILL, SIGABRT
-        if (sig == SIGSEGV && jl_get_safe_restore()) {
-            signal(sig, (void (__cdecl *)(int))crt_sig_handler);
-            jl_sig_throw();
+        if (sig == SIGSEGV) { // restarting jl_ or profile
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) {
+                signal(sig, (void (__cdecl *)(int))crt_sig_handler);
+                jl_longjmp(*saferestore, 1);
+                return;
+            }
         }
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
@@ -109,6 +113,8 @@ static jl_ptls_t stkerror_ptls;
 static int have_backtrace_fiber;
 static void JL_NORETURN start_backtrace_fiber(void)
 {
+    // print the warning (this mysteriously needs a lot of stack for the WriteFile syscall)
+    stack_overflow_warning();
     // collect the backtrace
     stkerror_ptls->bt_size =
         rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx,
@@ -124,41 +130,41 @@ void restore_signals(void)
     SetConsoleCtrlHandler(NULL, 0);
 }
 
-void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c);
+
+static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
 {
-#if defined(_CPU_X86_64_)
-    DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
-#elif defined(_CPU_X86_)
-    DWORD32 Esp = (ctxThread->Esp & (DWORD32)-16) - 4;
-#else
-#error WIN16 not supported :P
-#endif
-    if (ct && !jl_get_safe_restore()) {
-        assert(excpt != NULL);
-        jl_ptls_t ptls = ct->ptls;
-        ptls->bt_size = 0;
-        if (excpt != jl_stackovf_exception) {
-            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
-                                              ct->gcstack);
-        }
-        else if (have_backtrace_fiber) {
-            uv_mutex_lock(&backtrace_lock);
-            stkerror_ctx = ctxThread;
-            stkerror_ptls = ptls;
-            jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
-            uv_mutex_unlock(&backtrace_lock);
-        }
-        ptls->sig_exception = excpt;
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        if (!jl_simulate_longjmp(*saferestore, ctxThread))
+            abort();
+        return;
+    }
+    assert(ct && excpt);
+    jl_ptls_t ptls = ct->ptls;
+    ptls->bt_size = 0;
+    if (excpt != jl_stackovf_exception) {
+        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
+                                          ct->gcstack);
+    }
+    else if (have_backtrace_fiber) {
+        uv_mutex_lock(&backtrace_lock);
+        stkerror_ctx = ctxThread;
+        stkerror_ptls = ptls;
+        jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
+        uv_mutex_unlock(&backtrace_lock);
+    }
+    ptls->sig_exception = excpt;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        if (!jl_simulate_longjmp(eh->eh_ctx, ctxThread))
+            abort();
+    }
+    else {
+        jl_no_exc_handler(excpt, ct);
     }
-#if defined(_CPU_X86_64_)
-    *(DWORD64*)Rsp = 0;
-    ctxThread->Rsp = Rsp;
-    ctxThread->Rip = (DWORD64)&jl_sig_throw;
-#elif defined(_CPU_X86_)
-    *(DWORD32*)Esp = 0;
-    ctxThread->Esp = Esp;
-    ctxThread->Eip = (DWORD)&jl_sig_throw;
-#endif
 }
 
 HANDLE hMainThread = INVALID_HANDLE_VALUE;
@@ -326,7 +332,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
     default:
         jl_safe_printf("UNKNOWN"); break;
     }
-    jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
+    jl_safe_printf(" at 0x%zx -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
     jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
 
     jl_critical_error(0, 0, ExceptionInfo->ContextRecord, ct);
@@ -420,16 +426,16 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
 
                 jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
 
-                // store threadid but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // store task id (never null)
+                // META_OFFSET_TASKID store task id (never null)
                 bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
-                // store cpu cycle clock
+                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
                 bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
 
                 // Mark the end of this block with two 0's
diff --git a/src/smallintset.c b/src/smallintset.c
index df67239f79fb5..a80a18009c9db 100644
--- a/src/smallintset.c
+++ b/src/smallintset.c
@@ -36,7 +36,7 @@ static inline size_t jl_intref(const jl_genericmemory_t *arr, size_t idx) JL_NOT
     else if (el == jl_memory_uint16_type)
         return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
     else if (el == jl_memory_uint32_type)
-        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)arr->ptr)[idx]), (uint32_t)-1);
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)arr->ptr)[idx]), UINT32_MAX);
     else
         abort();
 }
@@ -53,7 +53,7 @@ static inline size_t jl_intref_acquire(const jl_genericmemory_t *arr, size_t idx
     else if (el == jl_memory_uint16_type)
         return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
     else if (el == jl_memory_uint32_type)
-        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint32_t)*)arr->ptr)[idx]), (uint32_t)-1);
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint32_t)*)arr->ptr)[idx]), UINT32_MAX);
     else
         abort();
 }
diff --git a/src/stackwalk.c b/src/stackwalk.c
index 37f239609504e..6aa36fa8b499c 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -5,6 +5,7 @@
   utilities for walking the stack and looking up information about code addresses
 */
 #include <inttypes.h>
+#include "gc-stock.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "threading.h"
@@ -82,7 +83,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
         skip--;
     }
 #endif
-#if !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_) // no point on windows, since RtlVirtualUnwind won't give us a second chance if the segfault happens in ntdll
     jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     jl_set_safe_restore(&buf);
@@ -652,6 +653,80 @@ void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT
     free(frames);
 }
 
+const char *jl_debuginfo_file1(jl_debuginfo_t *debuginfo)
+{
+    jl_value_t *def = debuginfo->def;
+    if (jl_is_method_instance(def))
+        def = ((jl_method_instance_t*)def)->def.value;
+    if (jl_is_method(def))
+        def = (jl_value_t*)((jl_method_t*)def)->file;
+    if (jl_is_symbol(def))
+        return jl_symbol_name((jl_sym_t*)def);
+    return "<unknown>";
+}
+
+const char *jl_debuginfo_file(jl_debuginfo_t *debuginfo)
+{
+    jl_debuginfo_t *linetable = debuginfo->linetable;
+    while ((jl_value_t*)linetable != jl_nothing) {
+        debuginfo = linetable;
+        linetable = debuginfo->linetable;
+    }
+    return jl_debuginfo_file1(debuginfo);
+}
+
+jl_module_t *jl_debuginfo_module1(jl_value_t *debuginfo_def)
+{
+    if (jl_is_method_instance(debuginfo_def))
+        debuginfo_def = ((jl_method_instance_t*)debuginfo_def)->def.value;
+    if (jl_is_method(debuginfo_def))
+        debuginfo_def = (jl_value_t*)((jl_method_t*)debuginfo_def)->module;
+    if (jl_is_module(debuginfo_def))
+        return (jl_module_t*)debuginfo_def;
+    return NULL;
+}
+
+const char *jl_debuginfo_name(jl_value_t *func)
+{
+    if (func == NULL)
+        return "macro expansion";
+    if (jl_is_method_instance(func))
+        func = ((jl_method_instance_t*)func)->def.value;
+    if (jl_is_method(func))
+        func = (jl_value_t*)((jl_method_t*)func)->name;
+    if (jl_is_symbol(func))
+        return jl_symbol_name((jl_sym_t*)func);
+    if (jl_is_module(func))
+        return "top-level scope";
+    return "<unknown>";
+}
+
+// func == module : top-level
+// func == NULL : macro expansion
+static void jl_print_debugloc(jl_debuginfo_t *debuginfo, jl_value_t *func, size_t ip, int inlined) JL_NOTSAFEPOINT
+{
+    if (!jl_is_symbol(debuginfo->def)) // this is a path or
+        func = debuginfo->def; // this is inlined code
+    struct jl_codeloc_t stmt = jl_uncompress1_codeloc(debuginfo->codelocs, ip);
+    intptr_t edges_idx = stmt.to;
+    if (edges_idx) {
+        jl_debuginfo_t *edge = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, edges_idx - 1);
+        assert(jl_typetagis(edge, jl_debuginfo_type));
+        jl_print_debugloc(edge, NULL, stmt.pc, 1);
+    }
+    intptr_t ip2 = stmt.line;
+    if (ip2 >= 0 && ip > 0 && (jl_value_t*)debuginfo->linetable != jl_nothing) {
+        jl_print_debugloc(debuginfo->linetable, func, ip2, 0);
+    }
+    else {
+        if (ip2 < 0) // set broken debug info to ignored
+            ip2 = 0;
+        const char *func_name = jl_debuginfo_name(func);
+        const char *file = jl_debuginfo_file(debuginfo);
+        jl_safe_print_codeloc(func_name, file, ip2, inlined);
+    }
+}
+
 // Print code location for backtrace buffer entry at *bt_entry
 void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
 {
@@ -659,33 +734,23 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
         jl_print_native_codeloc(bt_entry[0].uintptr);
     }
     else if (jl_bt_entry_tag(bt_entry) == JL_BT_INTERP_FRAME_TAG) {
-        size_t ip = jl_bt_entry_header(bt_entry);
+        size_t ip = jl_bt_entry_header(bt_entry); // zero-indexed
         jl_value_t *code = jl_bt_entry_jlvalue(bt_entry, 0);
-        if (jl_is_method_instance(code)) {
+        jl_value_t *def = (jl_value_t*)jl_core_module; // just used as a token here that isa Module
+        if (jl_is_code_instance(code)) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)code;
+            def = (jl_value_t*)ci->def;
+            code = jl_atomic_load_relaxed(&ci->inferred);
+        } else if (jl_is_method_instance(code)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)code;
+            def = code;
             // When interpreting a method instance, need to unwrap to find the code info
-            code = jl_atomic_load_relaxed(&((jl_method_instance_t*)code)->uninferred);
+            code = mi->def.method->source;
         }
         if (jl_is_code_info(code)) {
             jl_code_info_t *src = (jl_code_info_t*)code;
             // See also the debug info handling in codegen.cpp.
-            // NB: debuginfoloc is 1-based!
-            intptr_t debuginfoloc = jl_array_data(src->codelocs, int32_t)[ip];
-            while (debuginfoloc != 0) {
-                jl_line_info_node_t *locinfo = (jl_line_info_node_t*)
-                    jl_array_ptr_ref(src->linetable, debuginfoloc - 1);
-                assert(jl_typetagis(locinfo, jl_lineinfonode_type));
-                const char *func_name = "Unknown";
-                jl_value_t *method = locinfo->method;
-                if (jl_is_method_instance(method))
-                    method = ((jl_method_instance_t*)method)->def.value;
-                if (jl_is_method(method))
-                    method = (jl_value_t*)((jl_method_t*)method)->name;
-                if (jl_is_symbol(method))
-                    func_name = jl_symbol_name((jl_sym_t*)method);
-                jl_safe_print_codeloc(func_name, jl_symbol_name(locinfo->file),
-                                      locinfo->line, locinfo->inlined_at);
-                debuginfoloc = locinfo->inlined_at;
-            }
+            jl_print_debugloc(src->debuginfo, def, ip + 1, 0);
         }
         else {
             // If we're using this function something bad has already happened;
@@ -854,16 +919,280 @@ _os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT
 #endif
 
 
-extern bt_context_t *jl_to_bt_context(void *sigctx);
+extern bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT;
+
+// Some notes: this simulates a longjmp call occurring in context `c`, as if the
+// user was to set the PC in `c` to call longjmp and the PC in the longjmp to
+// return here. This helps work around many cases where siglongjmp out of a
+// signal handler is not supported (e.g. missing a _sigunaltstack call).
+// Additionally note that this doesn't restore the MXCSR or FP control word
+// (which some, but not most longjmp implementations do).  It also doesn't
+// support shadow stacks, so if those are in use, you might need to use a direct
+// jl_longjmp instead to leave the signal frame instead of relying on simulating
+// it and attempting to return normally.
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT
+{
+#if (defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
+    https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+    return 0;
+#elif defined(_OS_WINDOWS_)
+    _JUMP_BUFFER* _ctx = (_JUMP_BUFFER*)mctx;
+    #if defined(_CPU_X86_64_)
+    c->Rbx = _ctx->Rbx;
+    c->Rsp = _ctx->Rsp;
+    c->Rbp = _ctx->Rbp;
+    c->Rsi = _ctx->Rsi;
+    c->Rdi = _ctx->Rdi;
+    c->R12 = _ctx->R12;
+    c->R13 = _ctx->R13;
+    c->R14 = _ctx->R14;
+    c->R15 = _ctx->R15;
+    c->Rip = _ctx->Rip;
+    memcpy(&c->Xmm6, &_ctx->Xmm6, 10 * sizeof(_ctx->Xmm6)); // Xmm6-Xmm15
+    // c->MxCsr = _ctx->MxCsr;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    // c->SegGS[0] = _ctx->Frame;
+    c->Rax = 1;
+    c->Rsp += sizeof(void*);
+    assert(c->Rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_)
+    c->Ebp = _ctx->Ebp;
+    c->Ebx = _ctx->Ebx;
+    c->Edi = _ctx->Edi;
+    c->Esi = _ctx->Esi;
+    c->Esp = _ctx->Esp;
+    c->Eip = _ctx->Eip;
+    // c->SegFS[0] = _ctx->Registration;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    c->Eax = 1;
+    c->Esp += sizeof(void*);
+    assert(c->Esp % 16 == 0);
+    return 1;
+    #else
+    #error Windows is currently only supported on x86 and x86_64
+    #endif
+#elif defined(_OS_LINUX_) && defined(__GLIBC__)
+    __jmp_buf *_ctx = &mctx->__jmpbuf;
+    mcontext_t *mc = &c->uc_mcontext;
+    #if defined(_CPU_X86_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
+    mc->gregs[REG_EBX] = (*_ctx)[0];
+    mc->gregs[REG_ESI] = (*_ctx)[1];
+    mc->gregs[REG_EDI] = (*_ctx)[2];
+    mc->gregs[REG_EBP] = (*_ctx)[3];
+    mc->gregs[REG_ESP] = (*_ctx)[4];
+    mc->gregs[REG_EIP] = (*_ctx)[5];
+    // ifdef PTR_DEMANGLE ?
+    mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
+    mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
+    mc->gregs[REG_EAX] = 1;
+    assert(mc->gregs[REG_ESP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_64_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
+    mc->gregs[REG_RBX] = (*_ctx)[0];
+    mc->gregs[REG_RBP] = (*_ctx)[1];
+    mc->gregs[REG_R12] = (*_ctx)[2];
+    mc->gregs[REG_R13] = (*_ctx)[3];
+    mc->gregs[REG_R14] = (*_ctx)[4];
+    mc->gregs[REG_R15] = (*_ctx)[5];
+    mc->gregs[REG_RSP] = (*_ctx)[6];
+    mc->gregs[REG_RIP] = (*_ctx)[7];
+    // ifdef PTR_DEMANGLE ?
+    mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
+    mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
+    mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
+    mc->gregs[REG_RAX] = 1;
+    assert(mc->gregs[REG_RSP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_ARM_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
+    mc->arm_sp = (*_ctx)[0];
+    mc->arm_lr = (*_ctx)[1];
+    mc->arm_r4 = (*_ctx)[2]; // aka v1
+    mc->arm_r5 = (*_ctx)[3]; // aka v2
+    mc->arm_r6 = (*_ctx)[4]; // aka v3
+    mc->arm_r7 = (*_ctx)[5]; // aka v4
+    mc->arm_r8 = (*_ctx)[6]; // aka v5
+    mc->arm_r9 = (*_ctx)[7]; // aka v6 aka sb
+    mc->arm_r10 = (*_ctx)[8]; // aka v7 aka sl
+    mc->arm_fp = (*_ctx)[10]; // aka v8 aka r11
+    // ifdef PTR_DEMANGLE ?
+    mc->arm_sp = ptr_demangle(mc->arm_sp);
+    mc->arm_lr = ptr_demangle(mc->arm_lr);
+    mc->arm_pc = mc->arm_lr;
+    mc->arm_r0 = 1;
+    assert(mc->arm_sp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
+    // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
+    unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
+    mc->regs[19] = (*_ctx)[0];
+    mc->regs[20] = (*_ctx)[1];
+    mc->regs[21] = (*_ctx)[2];
+    mc->regs[22] = (*_ctx)[3];
+    mc->regs[23] = (*_ctx)[4];
+    mc->regs[24] = (*_ctx)[5];
+    mc->regs[25] = (*_ctx)[6];
+    mc->regs[26] = (*_ctx)[7];
+    mc->regs[27] = (*_ctx)[8];
+    mc->regs[28] = (*_ctx)[9];
+    mc->regs[29] = (*_ctx)[10]; // aka fp
+    mc->regs[30] = (*_ctx)[11]; // aka lr
+    // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
+    mc->sp = (*_ctx)[13];
+    mcfp->vregs[7] = (*_ctx)[14]; // aka d8
+    mcfp->vregs[8] = (*_ctx)[15]; // aka d9
+    mcfp->vregs[9] = (*_ctx)[16]; // aka d10
+    mcfp->vregs[10] = (*_ctx)[17]; // aka d11
+    mcfp->vregs[11] = (*_ctx)[18]; // aka d12
+    mcfp->vregs[12] = (*_ctx)[19]; // aka d13
+    mcfp->vregs[13] = (*_ctx)[20]; // aka d14
+    mcfp->vregs[14] = (*_ctx)[21]; // aka d15
+    // ifdef PTR_DEMANGLE ?
+    mc->sp = ptr_demangle(mc->sp);
+    mc->regs[30] = ptr_demangle(mc->regs[30]);
+    mc->pc = mc->regs[30];
+    mc->regs[0] = 1;
+    assert(mc->sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown linux")
+    (void)mc;
+    (void)mctx;
+    return 0;
+    #endif
+#elif defined(_OS_DARWIN_)
+    #if defined(_CPU_X86_64_)
+    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
+    x86_thread_state64_t *mc = (x86_thread_state64_t*)c;
+    mc->__rbx = ((uint64_t*)mctx)[0];
+    mc->__rbp = ((uint64_t*)mctx)[1];
+    mc->__rsp = ((uint64_t*)mctx)[2];
+    mc->__r12 = ((uint64_t*)mctx)[3];
+    mc->__r13 = ((uint64_t*)mctx)[4];
+    mc->__r14 = ((uint64_t*)mctx)[5];
+    mc->__r15 = ((uint64_t*)mctx)[6];
+    mc->__rip = ((uint64_t*)mctx)[7];
+    // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
+    // prior to that _os_ptr_munge_token was (hopefully) typically 0,
+    // so x ^ 0 == x and this is a no-op
+    mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
+    mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
+    mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
+    mc->__rax = 1;
+    assert(mc->__rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
+    // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
+    // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
+    arm_thread_state64_t *mc = (arm_thread_state64_t*)c;
+    mc->__x[19] = ((uint64_t*)mctx)[0];
+    mc->__x[20] = ((uint64_t*)mctx)[1];
+    mc->__x[21] = ((uint64_t*)mctx)[2];
+    mc->__x[22] = ((uint64_t*)mctx)[3];
+    mc->__x[23] = ((uint64_t*)mctx)[4];
+    mc->__x[24] = ((uint64_t*)mctx)[5];
+    mc->__x[25] = ((uint64_t*)mctx)[6];
+    mc->__x[26] = ((uint64_t*)mctx)[7];
+    mc->__x[27] = ((uint64_t*)mctx)[8];
+    mc->__x[28] = ((uint64_t*)mctx)[9];
+    mc->__x[10] = ((uint64_t*)mctx)[10];
+    mc->__x[11] = ((uint64_t*)mctx)[11];
+    mc->__x[12] = ((uint64_t*)mctx)[12];
+    // 13 is reserved/unused
+    double *mcfp = (double*)&mc[1];
+    mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
+    mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
+    mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
+    mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
+    mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
+    mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
+    mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
+    mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
+    mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
+    mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
+    mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
+    mc->__sp = mc->__x[12];
+    // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
+    mc->__pc = ptrauth_strip(mc->__lr, 0);
+    mc->__pad = 0; // aka __ra_sign_state = not signed
+    mc->__x[0] = 1;
+    assert(mc->__sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown darwin")
+    (void)mctx;
+    return 0;
+#endif
+#elif defined(_OS_FREEBSD_)
+    mcontext_t *mc = &c->uc_mcontext;
+    #if defined(_CPU_X86_64_)
+    // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
+    mc->mc_rip = ((long*)mctx)[0];
+    mc->mc_rbx = ((long*)mctx)[1];
+    mc->mc_rsp = ((long*)mctx)[2];
+    mc->mc_rbp = ((long*)mctx)[3];
+    mc->mc_r12 = ((long*)mctx)[4];
+    mc->mc_r13 = ((long*)mctx)[5];
+    mc->mc_r14 = ((long*)mctx)[6];
+    mc->mc_r15 = ((long*)mctx)[7];
+    mc->mc_rax = 1;
+    mc->mc_rsp += sizeof(void*);
+    assert(mc->mc_rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    mc->mc_gpregs.gp_x[19] = ((long*)mctx)[0];
+    mc->mc_gpregs.gp_x[20] = ((long*)mctx)[1];
+    mc->mc_gpregs.gp_x[21] = ((long*)mctx)[2];
+    mc->mc_gpregs.gp_x[22] = ((long*)mctx)[3];
+    mc->mc_gpregs.gp_x[23] = ((long*)mctx)[4];
+    mc->mc_gpregs.gp_x[24] = ((long*)mctx)[5];
+    mc->mc_gpregs.gp_x[25] = ((long*)mctx)[6];
+    mc->mc_gpregs.gp_x[26] = ((long*)mctx)[7];
+    mc->mc_gpregs.gp_x[27] = ((long*)mctx)[8];
+    mc->mc_gpregs.gp_x[28] = ((long*)mctx)[9];
+    mc->mc_gpregs.gp_x[29] = ((long*)mctx)[10];
+    mc->mc_gpregs.gp_lr = ((long*)mctx)[11];
+    mc->mc_gpregs.gp_sp = ((long*)mctx)[12];
+    mc->mc_fpregs.fp_q[7] = ((long*)mctx)[13];
+    mc->mc_fpregs.fp_q[8] = ((long*)mctx)[14];
+    mc->mc_fpregs.fp_q[9] = ((long*)mctx)[15];
+    mc->mc_fpregs.fp_q[10] = ((long*)mctx)[16];
+    mc->mc_fpregs.fp_q[11] = ((long*)mctx)[17];
+    mc->mc_fpregs.fp_q[12] = ((long*)mctx)[18];
+    mc->mc_fpregs.fp_q[13] = ((long*)mctx)[19];
+    mc->mc_fpregs.fp_q[14] = ((long*)mctx)[20];
+    mc->mc_gpregs.gp_x[0] = 1;
+    assert(mc->mc_gpregs.gp_sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown freebsd")
+    (void)mctx;
+    return 0;
+    #endif
+#else
+return 0;
+#endif
+}
 
-static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
+JL_DLLEXPORT size_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    ptls->bt_size = 0;
     if (t == ct) {
-        ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        return;
+        return rec_backtrace(bt_data, max_bt_size, 0);
     }
     bt_context_t *context = NULL;
     bt_context_t c;
@@ -871,9 +1200,11 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) {
         int lockret = jl_lock_stackwalk();
         // if this task is already running somewhere, we need to stop the thread it is running on and query its state
-        if (!jl_thread_suspend_and_get_state(old, 0, &c)) {
+        if (!jl_thread_suspend_and_get_state(old, 1, &c)) {
             jl_unlock_stackwalk(lockret);
-            return;
+            if (jl_atomic_load_relaxed(&t->tid) != old)
+                continue;
+            return 0;
         }
         jl_unlock_stackwalk(lockret);
         if (jl_atomic_load_relaxed(&t->tid) == old) {
@@ -888,217 +1219,33 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
         // got the wrong thread stopped, try again
         jl_thread_resume(old);
     }
-    if (context == NULL && (!t->copy_stack && t->started && t->stkbuf != NULL)) {
+    if (context == NULL && (!t->ctx.copy_stack && t->ctx.started && t->ctx.ctx != NULL)) {
         // need to read the context from the task stored state
+        jl_jmp_buf *mctx = &t->ctx.ctx->uc_mcontext;
 #if defined(_OS_WINDOWS_)
         memset(&c, 0, sizeof(c));
-        _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
-#if defined(_CPU_X86_64_)
-        c.Rbx = mctx->Rbx;
-        c.Rsp = mctx->Rsp;
-        c.Rbp = mctx->Rbp;
-        c.Rsi = mctx->Rsi;
-        c.Rdi = mctx->Rdi;
-        c.R12 = mctx->R12;
-        c.R13 = mctx->R13;
-        c.R14 = mctx->R14;
-        c.R15 = mctx->R15;
-        c.Rip = mctx->Rip;
-        memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
-#else
-        c.Eip = mctx->Eip;
-        c.Esp = mctx->Esp;
-        c.Ebp = mctx->Ebp;
-#endif
-        context = &c;
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
 #elif defined(JL_HAVE_UNW_CONTEXT)
-        context = &t->ctx.ctx;
+        context = t->ctx.ctx;
 #elif defined(JL_HAVE_UCONTEXT)
-        context = jl_to_bt_context(&t->ctx.ctx);
+        context = jl_to_bt_context(t->ctx.ctx);
 #elif defined(JL_HAVE_ASM)
         memset(&c, 0, sizeof(c));
-     #if defined(_OS_LINUX_) && defined(__GLIBC__)
-        __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
-        mcontext_t *mc = &c.uc_mcontext;
-      #if defined(_CPU_X86_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
-        mc->gregs[REG_EBX] = (*mctx)[0];
-        mc->gregs[REG_ESI] = (*mctx)[1];
-        mc->gregs[REG_EDI] = (*mctx)[2];
-        mc->gregs[REG_EBP] = (*mctx)[3];
-        mc->gregs[REG_ESP] = (*mctx)[4];
-        mc->gregs[REG_EIP] = (*mctx)[5];
-        // ifdef PTR_DEMANGLE ?
-        mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
-        mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
-        context = &c;
-      #elif defined(_CPU_X86_64_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
-        mc->gregs[REG_RBX] = (*mctx)[0];
-        mc->gregs[REG_RBP] = (*mctx)[1];
-        mc->gregs[REG_R12] = (*mctx)[2];
-        mc->gregs[REG_R13] = (*mctx)[3];
-        mc->gregs[REG_R14] = (*mctx)[4];
-        mc->gregs[REG_R15] = (*mctx)[5];
-        mc->gregs[REG_RSP] = (*mctx)[6];
-        mc->gregs[REG_RIP] = (*mctx)[7];
-        // ifdef PTR_DEMANGLE ?
-        mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
-        mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
-        mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
-        context = &c;
-      #elif defined(_CPU_ARM_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
-        mc->arm_sp = (*mctx)[0];
-        mc->arm_lr = (*mctx)[1];
-        mc->arm_r4 = (*mctx)[2]; // aka v1
-        mc->arm_r5 = (*mctx)[3]; // aka v2
-        mc->arm_r6 = (*mctx)[4]; // aka v3
-        mc->arm_r7 = (*mctx)[5]; // aka v4
-        mc->arm_r8 = (*mctx)[6]; // aka v5
-        mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
-        mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
-        mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
-        // ifdef PTR_DEMANGLE ?
-        mc->arm_sp = ptr_demangle(mc->arm_sp);
-        mc->arm_lr = ptr_demangle(mc->arm_lr);
-        mc->arm_pc = mc->arm_lr;
-        context = &c;
-      #elif defined(_CPU_AARCH64_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
-        // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
-        unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
-        mc->regs[19] = (*mctx)[0];
-        mc->regs[20] = (*mctx)[1];
-        mc->regs[21] = (*mctx)[2];
-        mc->regs[22] = (*mctx)[3];
-        mc->regs[23] = (*mctx)[4];
-        mc->regs[24] = (*mctx)[5];
-        mc->regs[25] = (*mctx)[6];
-        mc->regs[26] = (*mctx)[7];
-        mc->regs[27] = (*mctx)[8];
-        mc->regs[28] = (*mctx)[9];
-        mc->regs[29] = (*mctx)[10]; // aka fp
-        mc->regs[30] = (*mctx)[11]; // aka lr
-        // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
-        mc->sp = (*mctx)[13];
-        mcfp->vregs[7] = (*mctx)[14]; // aka d8
-        mcfp->vregs[8] = (*mctx)[15]; // aka d9
-        mcfp->vregs[9] = (*mctx)[16]; // aka d10
-        mcfp->vregs[10] = (*mctx)[17]; // aka d11
-        mcfp->vregs[11] = (*mctx)[18]; // aka d12
-        mcfp->vregs[12] = (*mctx)[19]; // aka d13
-        mcfp->vregs[13] = (*mctx)[20]; // aka d14
-        mcfp->vregs[14] = (*mctx)[21]; // aka d15
-        // ifdef PTR_DEMANGLE ?
-        mc->sp = ptr_demangle(mc->sp);
-        mc->regs[30] = ptr_demangle(mc->regs[30]);
-        mc->pc = mc->regs[30];
-        context = &c;
-      #else
-       #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
-       (void)mc;
-       (void)c;
-       (void)mctx;
-      #endif
-     #elif defined(_OS_DARWIN_)
-        sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-      #if defined(_CPU_X86_64_)
-        // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
-        x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
-        mc->__rbx = ((uint64_t*)mctx)[0];
-        mc->__rbp = ((uint64_t*)mctx)[1];
-        mc->__rsp = ((uint64_t*)mctx)[2];
-        mc->__r12 = ((uint64_t*)mctx)[3];
-        mc->__r13 = ((uint64_t*)mctx)[4];
-        mc->__r14 = ((uint64_t*)mctx)[5];
-        mc->__r15 = ((uint64_t*)mctx)[6];
-        mc->__rip = ((uint64_t*)mctx)[7];
-        // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
-        // prior to that _os_ptr_munge_token was (hopefully) typically 0,
-        // so x ^ 0 == x and this is a no-op
-        mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
-        mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
-        mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
-        context = &c;
-      #elif defined(_CPU_AARCH64_)
-        // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
-        // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
-        // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
-        arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
-        mc->__x[19] = ((uint64_t*)mctx)[0];
-        mc->__x[20] = ((uint64_t*)mctx)[1];
-        mc->__x[21] = ((uint64_t*)mctx)[2];
-        mc->__x[22] = ((uint64_t*)mctx)[3];
-        mc->__x[23] = ((uint64_t*)mctx)[4];
-        mc->__x[24] = ((uint64_t*)mctx)[5];
-        mc->__x[25] = ((uint64_t*)mctx)[6];
-        mc->__x[26] = ((uint64_t*)mctx)[7];
-        mc->__x[27] = ((uint64_t*)mctx)[8];
-        mc->__x[28] = ((uint64_t*)mctx)[9];
-        mc->__x[10] = ((uint64_t*)mctx)[10];
-        mc->__x[11] = ((uint64_t*)mctx)[11];
-        mc->__x[12] = ((uint64_t*)mctx)[12];
-        // 13 is reserved/unused
-        double *mcfp = (double*)&mc[1];
-        mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
-        mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
-        mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
-        mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
-        mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
-        mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
-        mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
-        mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
-        mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
-        mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
-        mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
-        mc->__sp = mc->__x[12];
-        // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
-        mc->__pc = ptrauth_strip(mc->__lr, 0);
-        mc->__pad = 0; // aka __ra_sign_state = not signed
-        context = &c;
-      #else
-       #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
-        (void)mctx;
-        (void)c;
-      #endif
-     #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
-        sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-        mcontext_t *mc = &c.uc_mcontext;
-        // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
-        mc->mc_rip = ((long*)mctx)[0];
-        mc->mc_rbx = ((long*)mctx)[1];
-        mc->mc_rsp = ((long*)mctx)[2];
-        mc->mc_rbp = ((long*)mctx)[3];
-        mc->mc_r12 = ((long*)mctx)[4];
-        mc->mc_r13 = ((long*)mctx)[5];
-        mc->mc_r14 = ((long*)mctx)[6];
-        mc->mc_r15 = ((long*)mctx)[7];
-        context = &c;
-     #else
-      #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
-      (void)c;
-     #endif
-#elif defined(JL_HAVE_SIGALTSTACK)
-     #pragma message("jl_rec_backtrace not defined for SIGALTSTACK")
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
 #else
-     #pragma message("jl_rec_backtrace not defined for unknown task system")
+     #pragma message("jl_record_backtrace not defined for unknown task system")
 #endif
     }
+    size_t bt_size = 0;
     if (context)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context,  t->gcstack);
+        bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, t->gcstack);
     if (old == -1)
         jl_atomic_store_relaxed(&t->tid, old);
     else if (old != ptls->tid)
         jl_thread_resume(old);
+    return bt_size;
 }
 
 //--------------------------------------------------
@@ -1130,12 +1277,15 @@ JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    jl_rec_backtrace(t);
-    size_t i, bt_size = ptls->bt_size;
+    ptls->bt_size = 0;
     jl_bt_element_t *bt_data = ptls->bt_data;
+    size_t bt_size = jl_record_backtrace(t, bt_data, JL_MAX_BT_SIZE);
+    size_t i;
     for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
         jl_print_bt_entry_codeloc(bt_data + i);
     }
+    if (bt_size == 0)
+        jl_safe_printf("      no backtrace recorded\n");
 }
 
 JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
@@ -1151,15 +1301,19 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
     size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
     jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (size_t i = 0; i < nthreads; i++) {
-        // skip GC threads since they don't have tasks
-        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
+        jl_ptls_t ptls2 = allstates[i];
+        if (gc_is_parallel_collector_thread(i)) {
+            jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1);
+            continue;
+        }
+        if (gc_is_concurrent_collector_thread(i)) {
+            jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1);
             continue;
         }
-        jl_ptls_t ptls2 = allstates[i];
         if (ptls2 == NULL) {
             continue;
         }
-        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
         size_t n = mtarraylist_length(live_tasks);
         int t_state = JL_TASK_STATE_DONE;
         jl_task_t *t = ptls2->root_task;
@@ -1171,14 +1325,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
             jl_safe_printf("     ---- Root task (%p)\n", ptls2->root_task);
             if (t != NULL) {
                 jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                        t->sticky, t->started, t_state,
+                        t->sticky, t->ctx.started, t_state,
                         jl_atomic_load_relaxed(&t->tid) + 1);
-                if (t->stkbuf != NULL) {
-                    jlbacktracet(t);
-                }
-                else {
-                    jl_safe_printf("      no stack\n");
-                }
+                jlbacktracet(t);
             }
             jl_safe_printf("     ---- End root task\n");
         }
@@ -1193,12 +1342,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
             jl_safe_printf("     ---- Task %zu (%p)\n", j + 1, t);
             // n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc.
             jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                    t->sticky, t->started, t_state,
+                    t->sticky, t->ctx.started, t_state,
                     jl_atomic_load_relaxed(&t->tid) + 1);
-            if (t->stkbuf != NULL)
-                jlbacktracet(t);
-            else
-                jl_safe_printf("      no stack\n");
+            jlbacktracet(t);
             jl_safe_printf("     ---- End task %zu\n", j + 1);
         }
         jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);
diff --git a/src/staticdata.c b/src/staticdata.c
index 261042b775c14..b991dfe8f37f3 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -100,7 +100,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    188
+#define NUM_TAGS    192
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -122,6 +122,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_array_type);
         INSERT_TAG(jl_expr_type);
         INSERT_TAG(jl_binding_type);
+        INSERT_TAG(jl_binding_partition_type);
         INSERT_TAG(jl_globalref_type);
         INSERT_TAG(jl_string_type);
         INSERT_TAG(jl_module_type);
@@ -213,6 +214,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_addrspace_type);
         INSERT_TAG(jl_addrspace_typename);
         INSERT_TAG(jl_addrspacecore_type);
+        INSERT_TAG(jl_debuginfo_type);
 
         // special typenames
         INSERT_TAG(jl_tuple_typename);
@@ -234,6 +236,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_loaderror_type);
         INSERT_TAG(jl_initerror_type);
         INSERT_TAG(jl_undefvarerror_type);
+        INSERT_TAG(jl_fielderror_type);
         INSERT_TAG(jl_stackovf_exception);
         INSERT_TAG(jl_diverror_exception);
         INSERT_TAG(jl_interrupt_exception);
@@ -264,6 +267,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_kwcall_mt);
         INSERT_TAG(jl_kwcall_func);
         INSERT_TAG(jl_opaque_closure_method);
+        INSERT_TAG(jl_nulldebuginfo);
 
         // some Core.Builtin Functions that we want to be able to reference:
         INSERT_TAG(jl_builtin_throw);
@@ -336,6 +340,8 @@ static arraylist_t object_worklist;  // used to mimic recursion by jl_serialize_
 // jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i]   (0-offset indexing)
 arraylist_t jl_linkage_blobs;
 arraylist_t jl_image_relocs;
+// Keep track of which image corresponds to which top module.
+arraylist_t jl_top_mods;
 
 // Eytzinger tree of images. Used for very fast jl_object_in_image queries
 // See https://algorithmica.org/en/eytzinger
@@ -344,6 +350,18 @@ arraylist_t eytzinger_idxs;
 static uintptr_t img_min;
 static uintptr_t img_max;
 
+// HT_NOTFOUND is a valid integer ID, so we store the integer ids mangled.
+// This pair of functions mangles/demanges
+static size_t from_seroder_entry(void *entry)
+{
+    return (size_t)((char*)entry - (char*)HT_NOTFOUND - 1);
+}
+
+static void *to_seroder_entry(size_t idx)
+{
+    return (void*)((char*)HT_NOTFOUND + 1 + idx);
+}
+
 static int ptr_cmp(const void *l, const void *r)
 {
     uintptr_t left = *(const uintptr_t*)l;
@@ -450,23 +468,36 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
     return idx;
 }
 
-uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
+JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
 {
     return eyt_obj_in_img(obj);
 }
 
+// Map an object to it's "owning" top module
+JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT
+{
+    size_t idx = external_blob_index(v);
+    size_t lbids = n_linkage_blobs();
+    if (idx < lbids) {
+        return (jl_value_t*)jl_top_mods.items[idx];
+    }
+    // The object is runtime allocated
+    return (jl_value_t*)jl_nothing;
+}
+
 // hash of definitions for predefined function pointers
 static htable_t fptr_to_id;
 void *native_functions;   // opaque jl_native_code_desc_t blob used for fetching data from LLVM
 
 // table of struct field addresses to rewrite during saving
 static htable_t field_replace;
+static htable_t relocatable_ext_cis;
 
 // array of definitions for the predefined function pointers
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
 static const jl_fptr_args_t id_to_fptrs[] = {
-    &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
+    &jl_f_throw, &jl_f_throw_methoderror, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
     &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
     &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
     &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call,
@@ -477,7 +508,7 @@ static const jl_fptr_args_t id_to_fptrs[] = {
     &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar,
     &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype,
     &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type,
-    &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
+    &jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
     &jl_f_getglobal, &jl_f_setglobal, &jl_f_swapglobal, &jl_f_modifyglobal, &jl_f_replaceglobal, &jl_f_setglobalonce,
     &jl_f_finalizer, &jl_f__compute_sparams, &jl_f__svec_ref,
     &jl_f_current_scope,
@@ -545,6 +576,8 @@ enum RefTags {
     ExternalLinkage     // reference to some other pkgimage
 };
 
+#define SYS_EXTERNAL_LINK_UNIT sizeof(void*)
+
 // calling conventions for internal entry points.
 // this is used to set the method-instance->invoke field
 typedef enum {
@@ -666,6 +699,14 @@ static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT
         if (jl_is_method(m) && jl_object_in_image(m))
             return 1 + type_in_worklist(mi->specTypes);
     }
+    if (jl_is_binding(v)) {
+        jl_globalref_t *gr = ((jl_binding_t*)v)->globalref;
+        if (!gr)
+            return 0;
+        if (!jl_object_in_image((jl_value_t*)gr->mod))
+            return 0;
+        return 1;
+    }
     if (jl_is_datatype(v)) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
         if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars)
@@ -693,7 +734,8 @@ static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT
 
 static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT
 {
-    ptrhash_put(&field_replace, (void*)addr, newval);
+    if (*addr != newval)
+        ptrhash_put(&field_replace, (void*)addr, newval);
 }
 
 static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED
@@ -741,7 +783,7 @@ static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_
             if ((void*)b == jl_nothing)
                 break;
             jl_sym_t *name = b->globalref->name;
-            if (name == jl_docmeta_sym && jl_atomic_load_relaxed(&b->value))
+            if (name == jl_docmeta_sym && jl_get_binding_value(b))
                 record_field_change((jl_value_t**)&b->value, jl_nothing);
         }
     }
@@ -794,7 +836,6 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             // we only need 3 specific fields of this (the rest are restored afterward, if valid)
             // in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
             // so must not be present here
-            record_field_change((jl_value_t**)&mi->uninferred, NULL);
             record_field_change((jl_value_t**)&mi->backedges, NULL);
             record_field_change((jl_value_t**)&mi->cache, NULL);
         }
@@ -808,6 +849,14 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
         // prevent this from happening, so we do not need to detect that user
         // error now.
     }
+    if (s->incremental && jl_is_binding(v)) {
+        if (needs_uniquing(v)) {
+            jl_binding_t *b = (jl_binding_t*)v;
+            jl_queue_for_serialization(s, b->globalref->mod);
+            jl_queue_for_serialization(s, b->globalref->name);
+            goto done_fields;
+        }
+    }
     if (s->incremental && jl_is_globalref(v)) {
         jl_globalref_t *gr = (jl_globalref_t*)v;
         if (jl_object_in_image((jl_value_t*)gr->mod)) {
@@ -836,6 +885,8 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             // TODO: if (ci in ci->defs->cache)
             record_field_change((jl_value_t**)&ci->next, NULL);
         }
+        if (jl_atomic_load_relaxed(&ci->inferred) && !is_relocatable_ci(&relocatable_ext_cis, ci))
+            record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
     }
 
     if (immediate) // must be things that can be recursively handled, and valid as type parameters
@@ -886,14 +937,17 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     else if (jl_typetagis(v, jl_module_tag << 4)) {
         jl_queue_module_for_serialization(s, (jl_module_t*)v);
     }
+    else if (jl_is_binding_partition(v)) {
+        jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+        jl_queue_for_serialization_(s, decode_restriction_value(jl_atomic_load_relaxed(&bpart->restriction)), 1, immediate);
+        jl_queue_for_serialization_(s, get_replaceable_field((jl_value_t**)&bpart->next, 0), 1, immediate);
+    }
     else if (layout->nfields > 0) {
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = layout->npointers;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
             int mutabl = t->name->mutabl;
-            if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                mutabl = 0;
             jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
             jl_queue_for_serialization_(s, fld, 1, immediate);
         }
@@ -907,7 +961,7 @@ done_fields: ;
     arraylist_push(&serialization_queue, (void*) v);
     size_t idx = serialization_queue.len - 1;
     assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
-    *bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
+    *bp = to_seroder_entry(idx);
 
     // DataType is very unusual, in that some of the fields need to be pre-order, and some
     // (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
@@ -1028,8 +1082,8 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         // We found the sysimg/pkg that this item links against
         // Compute the relocation code
         size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
-        offset /= sizeof(void*);
-        assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
+        assert((offset % SYS_EXTERNAL_LINK_UNIT) == 0);
+        offset /= SYS_EXTERNAL_LINK_UNIT;
         assert(n_linkage_blobs() == jl_array_nrows(s->buildid_depmods_idxs));
         size_t depsidx = jl_array_data(s->buildid_depmods_idxs, uint32_t)[i]; // map from build_id_idx -> deps_idx
         assert(depsidx < INT32_MAX);
@@ -1041,6 +1095,7 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         jl_array_grow_end(link_ids, 1);
         uint32_t *link_id_data  = jl_array_data(link_ids, uint32_t);  // wait until after the `grow`
         link_id_data[jl_array_nrows(link_ids) - 1] = depsidx;
+        assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
         return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
     }
     return 0;
@@ -1053,19 +1108,19 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
 static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
 {
     assert(v != NULL && "cannot get backref to NULL object");
-    void *idx = HT_NOTFOUND;
     if (jl_is_symbol(v)) {
         void **pidx = ptrhash_bp(&symbol_table, v);
-        idx = *pidx;
+        void *idx = *pidx;
         if (idx == HT_NOTFOUND) {
             size_t l = strlen(jl_symbol_name((jl_sym_t*)v));
             write_uint32(s->symbols, l);
             ios_write(s->symbols, jl_symbol_name((jl_sym_t*)v), l + 1);
             size_t offset = ++nsym_tag;
             assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many symbols");
-            idx = (void*)((char*)HT_NOTFOUND + ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + offset);
+            idx = to_seroder_entry(offset - 1);
             *pidx = idx;
         }
+        return ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
     }
     else if (v == (jl_value_t*)s->ptls->root_task) {
         return (uintptr_t)TagRef << RELOC_TAG_OFFSET;
@@ -1093,17 +1148,15 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
         assert(item && "no external linkage identified");
         return item;
     }
+    void *idx = ptrhash_get(&serialization_order, v);
     if (idx == HT_NOTFOUND) {
-        idx = ptrhash_get(&serialization_order, v);
-        if (idx == HT_NOTFOUND) {
-            jl_(jl_typeof(v));
-            jl_(v);
-        }
-        assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
-        assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
-        assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+        jl_(jl_typeof(v));
+        jl_(v);
     }
-    return (char*)idx - 1 - (char*)HT_NOTFOUND;
+    assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
+    assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
+    assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+    return ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
 }
 
 
@@ -1112,7 +1165,7 @@ static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t o
     if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
         if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
-        else if (jl_is_method_instance(fld))
+        else if (jl_is_method_instance(fld) || jl_is_binding(fld))
             arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
         else
             assert(0 && "unknown object type with needs_uniquing set");
@@ -1305,7 +1358,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
 
         if (s->incremental) {
             if (needs_uniquing(v)) {
-                if (jl_is_method_instance(v)) {
+                if (jl_typetagis(v, jl_binding_type)) {
+                    jl_binding_t *b = (jl_binding_t*)v;
+                    if (b->globalref == NULL)
+                        jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
+                    write_pointerfield(s, (jl_value_t*)b->globalref->mod);
+                    write_pointerfield(s, (jl_value_t*)b->globalref->name);
+                    continue;
+                }
+                else if (jl_is_method_instance(v)) {
                     assert(f == s->s);
                     jl_method_instance_t *mi = (jl_method_instance_t*)v;
                     write_pointerfield(s, mi->def.value);
@@ -1328,17 +1389,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             else if (needs_recaching(v)) {
                 arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
             }
-            else if (jl_typetagis(v, jl_binding_type)) {
-                jl_binding_t *b = (jl_binding_t*)v;
-                if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod))
-                    jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
-                // Assign type Any to any owned bindings that don't have a type.
-                // We don't want these accidentally managing to diverge later in different compilation units.
-                if (jl_atomic_load_relaxed(&b->owner) == b) {
-                    jl_value_t *old_ty = NULL;
-                    jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-                }
-            }
         }
 
         // write data
@@ -1524,6 +1574,26 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             ios_write(s->const_data, (char*)pdata, nb);
             write_pointer(f);
         }
+        else if (jl_is_binding_partition(v)) {
+            jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+            jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+            jl_value_t *restriction_val = decode_restriction_value(pku);
+            static_assert(offsetof(jl_binding_partition_t, restriction) == 0, "BindingPartition layout mismatch");
+            write_pointerfield(s, restriction_val);
+#ifndef _P64
+            write_uint(f, decode_restriction_kind(pku));
+#endif
+            write_uint(f, bpart->min_world);
+            write_uint(f, jl_atomic_load_relaxed(&bpart->max_world));
+            write_pointerfield(s, (jl_value_t*)jl_atomic_load_relaxed(&bpart->next));
+#ifdef _P64
+            write_uint(f, decode_restriction_kind(pku)); // This will be moved back into place during deserialization (if necessary)
+            static_assert(sizeof(jl_binding_partition_t) == 5*sizeof(void*), "BindingPartition layout mismatch");
+#else
+            write_uint(f, 0);
+            static_assert(sizeof(jl_binding_partition_t) == 6*sizeof(void*), "BindingPartition layout mismatch");
+#endif
+        }
         else {
             // Generic object::DataType serialization by field
             const char *data = (const char*)v;
@@ -1550,8 +1620,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
                 int mutabl = t->name->mutabl;
-                if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                    mutabl = 0;
                 jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], mutabl);
                 size_t fld_pos = offset + reloc_offset;
                 if (fld != NULL) {
@@ -1631,6 +1699,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         jl_atomic_store_release(&newci->min_world, 1);
                         jl_atomic_store_release(&newci->max_world, 0);
                     }
+                    newci->relocatability = 0;
                 }
                 jl_atomic_store_relaxed(&newci->invoke, NULL);
                 jl_atomic_store_relaxed(&newci->specsigflags, 0);
@@ -1726,7 +1795,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     }
                 }
                 void *superidx = ptrhash_get(&serialization_order, dt->super);
-                if (s->incremental && superidx != HT_NOTFOUND && (char*)superidx - 1 - (char*)HT_NOTFOUND > item && needs_uniquing((jl_value_t*)dt->super))
+                if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super))
                     arraylist_push(&s->uniquing_super, dt->super);
             }
             else if (jl_is_typename(v)) {
@@ -1920,7 +1989,7 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         assert(s->buildid_depmods_idxs && depsidx < jl_array_len(s->buildid_depmods_idxs));
         size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     case ExternalLinkage: {
         assert(link_ids);
@@ -1931,7 +2000,7 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         assert(depsidx < jl_array_len(s->buildid_depmods_idxs));
         size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     }
     abort();
@@ -2315,11 +2384,11 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
 
     void *idx = ptrhash_get(&serialization_order, cache);
     assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
-    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache);
+    assert(serialization_queue.items[from_seroder_entry(idx)] == cache);
     cache = cache_rehash_set(cache, sz);
     // redirect all references to the old cache to relocate to the new cache object
     ptrhash_put(&serialization_order, cache, idx);
-    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache;
+    serialization_queue.items[from_seroder_entry(idx)] = cache;
     return cache;
 }
 
@@ -2337,35 +2406,33 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
         jl_svecset(cache, ins++, jl_nothing);
 }
 
-static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig)
+static void strip_slotnames(jl_array_t *slotnames)
+{
+    // replace slot names with `?`, except unused_sym since the compiler looks at it
+    jl_sym_t *questionsym = jl_symbol("?");
+    int i, l = jl_array_len(slotnames);
+    for (i = 0; i < l; i++) {
+        jl_value_t *s = jl_array_ptr_ref(slotnames, i);
+        if (s != (jl_value_t*)jl_unused_sym)
+            jl_array_ptr_set(slotnames, i, questionsym);
+    }
+}
+
+static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, jl_code_instance_t *codeinst)
 {
     jl_code_info_t *ci = NULL;
     JL_GC_PUSH1(&ci);
     int compressed = 0;
     if (!jl_is_code_info(ci_)) {
         compressed = 1;
-        ci = jl_uncompress_ir(m, NULL, (jl_value_t*)ci_);
+        ci = jl_uncompress_ir(m, codeinst, (jl_value_t*)ci_);
     }
     else {
         ci = (jl_code_info_t*)ci_;
     }
-    // leave codelocs length the same so the compiler can assume that; just zero it
-    memset(jl_array_data(ci->codelocs, int32_t), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
-    // empty linetable
-    if (jl_is_array(ci->linetable))
-        jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
-    // replace slot names with `?`, except unused_sym since the compiler looks at it
-    jl_sym_t *questionsym = jl_symbol("?");
-    int i, l = jl_array_len(ci->slotnames);
-    for (i = 0; i < l; i++) {
-        jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i);
-        if (s != (jl_value_t*)jl_unused_sym)
-            jl_array_ptr_set(ci->slotnames, i, questionsym);
-    }
-    if (orig) {
-        m->slot_syms = jl_compress_argnames(ci->slotnames);
-        jl_gc_wb(m, m->slot_syms);
-    }
+    strip_slotnames(ci->slotnames);
+    ci->debuginfo = jl_nulldebuginfo;
+    jl_gc_wb(ci, ci->debuginfo);
     jl_value_t *ret = (jl_value_t*)ci;
     if (compressed)
         ret = (jl_value_t*)jl_compress_ir(m, ci);
@@ -2384,16 +2451,17 @@ static void strip_specializations_(jl_method_instance_t *mi)
                 record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing);
             }
             else if (jl_options.strip_metadata) {
-                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, 0);
+                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, codeinst);
                 if (jl_atomic_cmpswap_relaxed(&codeinst->inferred, &inferred, stripped)) {
                     jl_gc_wb(codeinst, stripped);
                 }
             }
         }
+        if (jl_options.strip_metadata)
+            record_field_change((jl_value_t**)&codeinst->debuginfo, (jl_value_t*)jl_nulldebuginfo);
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     if (jl_options.strip_ir) {
-        record_field_change((jl_value_t**)&mi->uninferred, NULL);
         record_field_change((jl_value_t**)&mi->backedges, NULL);
     }
 }
@@ -2404,31 +2472,46 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     if (m->source) {
         int stripped_ir = 0;
         if (jl_options.strip_ir) {
-            if (jl_atomic_load_relaxed(&m->unspecialized)) {
-                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
-                if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
-                    // we have a generic compiled version, so can remove the IR
-                    record_field_change(&m->source, jl_nothing);
-                    stripped_ir = 1;
+            int should_strip_ir = 0;
+            if (!should_strip_ir) {
+                if (jl_atomic_load_relaxed(&m->unspecialized)) {
+                    jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
+                    if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
+                        // we have a generic compiled version, so can remove the IR
+                        should_strip_ir = 1;
+                    }
                 }
             }
-            if (!stripped_ir) {
+            if (!should_strip_ir) {
                 int mod_setting = jl_get_module_compile(m->module);
-                // if the method is declared not to be compiled, keep IR for interpreter
                 if (!(mod_setting == JL_OPTIONS_COMPILE_OFF || mod_setting == JL_OPTIONS_COMPILE_MIN)) {
-                    record_field_change(&m->source, jl_nothing);
-                    stripped_ir = 1;
+                    // if the method is declared not to be compiled, keep IR for interpreter
+                    should_strip_ir = 1;
                 }
             }
+            if (should_strip_ir) {
+                record_field_change(&m->source, jl_nothing);
+                record_field_change((jl_value_t**)&m->roots, NULL);
+                stripped_ir = 1;
+            }
         }
-        if (jl_options.strip_metadata && !stripped_ir) {
-            m->source = strip_codeinfo_meta(m, m->source, 1);
-            jl_gc_wb(m, m->source);
+        if (jl_options.strip_metadata) {
+            if (!stripped_ir) {
+                m->source = strip_codeinfo_meta(m, m->source, NULL);
+                jl_gc_wb(m, m->source);
+            }
+            jl_array_t *slotnames = jl_uncompress_argnames(m->slot_syms);
+            JL_GC_PUSH1(&slotnames);
+            strip_slotnames(slotnames);
+            m->slot_syms = jl_compress_argnames(slotnames);
+            jl_gc_wb(m, m->slot_syms);
+            JL_GC_POP();
         }
     }
     if (jl_options.strip_metadata) {
         record_field_change((jl_value_t**)&m->file, (jl_value_t*)jl_empty_sym);
         m->line = 0;
+        record_field_change((jl_value_t**)&m->debuginfo, (jl_value_t*)jl_nulldebuginfo);
     }
     jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
     if (!jl_is_svec(specializations)) {
@@ -2573,7 +2656,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
         *edges = jl_alloc_vec_any(0);
         *method_roots_list = jl_alloc_vec_any(0);
         // Collect the new method roots for external specializations
-        jl_collect_new_roots(*method_roots_list, *new_ext_cis, worklist_key);
+        jl_collect_new_roots(&relocatable_ext_cis, *method_roots_list, *new_ext_cis, worklist_key);
         jl_collect_edges(*edges, *ext_targets, *new_ext_cis, world);
     }
     assert(edges_map == NULL); // jl_collect_edges clears this when done
@@ -2974,6 +3057,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     assert((ct->reentrant_timing & 0b1110) == 0);
     ct->reentrant_timing |= 0b1000;
     if (worklist) {
+        htable_new(&relocatable_ext_cis, 0);
         jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
                                       &extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges);
         if (!emit_split) {
@@ -2990,6 +3074,8 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, method_roots_list, ext_targets, edges);
     if (_native_data != NULL)
         native_functions = NULL;
+    if (worklist)
+        htable_free(&relocatable_ext_cis);
     // make sure we don't run any Julia code concurrently before this point
     // Re-enable running julia code for postoutput hooks, atexit, etc.
     jl_gc_enable_finalizers(ct, 1);
@@ -3042,10 +3128,10 @@ JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname)
 // Allow passing in a module handle directly, rather than a path
 JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
 {
-    void* *jl_RTLD_DEFAULT_handle_pointer;
+    void** (*get_jl_RTLD_DEFAULT_handle_addr)(void) = NULL;
     if (handle != jl_RTLD_DEFAULT_handle) {
-        int symbol_found = jl_dlsym(handle, "jl_RTLD_DEFAULT_handle_pointer", (void **)&jl_RTLD_DEFAULT_handle_pointer, 0);
-        if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != *jl_RTLD_DEFAULT_handle_pointer)
+        int symbol_found = jl_dlsym(handle, "get_jl_RTLD_DEFAULT_handle_addr", (void **)&get_jl_RTLD_DEFAULT_handle_addr, 0);
+        if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != (get_jl_RTLD_DEFAULT_handle_addr()))
             jl_error("System image file failed consistency check: maybe opened the wrong version?");
     }
     if (jl_options.cpu_target == NULL)
@@ -3078,6 +3164,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                                                  jl_array_t **ext_targets, jl_array_t **edges,
                                                  char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
 {
+    jl_task_t *ct = jl_current_task;
     int en = jl_gc_enable(0);
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
     jl_serializer_state s = {0};
@@ -3089,7 +3176,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_current_task->ptls;
+    s.ptls = ct->ptls;
     jl_value_t **const*const tags = get_tags();
     htable_t new_dt_objs;
     htable_new(&new_dt_objs, 0);
@@ -3262,6 +3349,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     arraylist_new(&cleanup_list, 0);
     arraylist_t delay_list;
     arraylist_new(&delay_list, 0);
+    JL_LOCK(&typecache_lock); // Might GC--prevent other threads from changing any type caches while we inspect them all
     for (size_t i = 0; i < s.uniquing_types.len; i++) {
         uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
         // check whether we are operating on the typetag
@@ -3389,6 +3477,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     }
     arraylist_grow(&cleanup_list, -cleanup_list.len);
     // finally cache all our new types now
+    jl_safepoint_suspend_all_threads(ct); // past this point, it is now not safe to observe the intermediate states on other threads via reflection, so temporarily pause those
     for (size_t i = 0; i < new_dt_objs.size; i += 2) {
         void *dt = table[i + 1];
         if (dt != HT_NOTFOUND) {
@@ -3402,6 +3491,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         assert(jl_is_datatype(obj));
         jl_cache_type_((jl_datatype_t*)obj);
     }
+    JL_UNLOCK(&typecache_lock); // Might GC
+    jl_safepoint_resume_all_threads(ct); // TODO: move this later to also protect MethodInstance allocations, but we would need to acquire all jl_specializations_get_linfo and jl_module_globalref locks, which is hard
     // Perform fixups: things like updating world ages, inserting methods & specializations, etc.
     for (size_t i = 0; i < s.uniquing_objs.len; i++) {
         uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
@@ -3438,6 +3529,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                 obj[0] = newobj;
             }
         }
+        else if (otyp == (uintptr_t)jl_binding_type) {
+            jl_value_t *m = obj[0];
+            if (jl_is_binding(m)) {
+                newobj = m; // already done
+            }
+            else {
+                arraylist_push(&cleanup_list, (void*)obj);
+                jl_value_t *name = obj[1];
+                newobj = (jl_value_t*)jl_get_module_binding((jl_module_t*)m, (jl_sym_t*)name, 1);
+                obj[0] = newobj;
+            }
+        }
         else {
             abort(); // should be unreachable
         }
@@ -3453,6 +3556,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         jl_value_t *t = jl_typeof(item);
         if (t == (jl_value_t*)jl_method_instance_type)
             memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored
+        else if (t == (jl_value_t*)jl_binding_type)
+            memset(o, 0xba, sizeof(jl_value_t*) * 3); // stored as mod/name
         o->bits.in_image = 1;
     }
     arraylist_free(&cleanup_list);
@@ -3488,6 +3593,19 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                 memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*));
                 mod->usings.items = newitems;
             }
+            // Move the binding bits back to their correct place
+#ifdef _P64
+            jl_svec_t *table = jl_atomic_load_relaxed(&mod->bindings);
+            for (size_t i = 0; i < jl_svec_len(table); i++) {
+                jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+                if ((jl_value_t*)b == jl_nothing)
+                    continue;
+                jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+                jl_atomic_store_relaxed(&bpart->restriction,
+                    encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved));
+                bpart->reserved = 0;
+            }
+#endif
         }
         else {
             abort();
@@ -3554,6 +3672,15 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     arraylist_push(&jl_linkage_blobs, (void*)image_base);
     arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
     arraylist_push(&jl_image_relocs, (void*)relocs_base);
+    if (restored == NULL) {
+        arraylist_push(&jl_top_mods, (void*)jl_top_module);
+    } else {
+        size_t len = jl_array_nrows(*restored);
+        assert(len > 0);
+        jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1);
+        assert(jl_is_module(topmod));
+        arraylist_push(&jl_top_mods, (void*)topmod);
+    }
     jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
     rebuild_image_blob_tree();
 
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index 9542e54182644..f39e5357c6782 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -209,6 +209,17 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     return found;
 }
 
+static int is_relocatable_ci(htable_t *relocatable_ext_cis, jl_code_instance_t *ci)
+{
+    if (!ci->relocatability)
+        return 0;
+    jl_method_instance_t *mi = ci->def;
+    jl_method_t *m = mi->def.method;
+    if (!ptrhash_has(relocatable_ext_cis, ci) && jl_object_in_image((jl_value_t*)m) && (!jl_is_method(m) || jl_object_in_image((jl_value_t*)m->module)))
+        return 0;
+    return 1;
+}
+
 // Given the list of CodeInstances that were inferred during the build, select
 // those that are (1) external, (2) still valid, (3) are inferred to be called
 // from the worklist or explicitly added by a `precompile` statement, and
@@ -258,7 +269,7 @@ static jl_array_t *queue_external_cis(jl_array_t *list)
 }
 
 // New roots for external methods
-static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_ext_cis, uint64_t key)
+static void jl_collect_new_roots(htable_t *relocatable_ext_cis, jl_array_t *roots, jl_array_t *new_ext_cis, uint64_t key)
 {
     htable_t mset;
     htable_new(&mset, 0);
@@ -269,6 +280,7 @@ static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_ext_cis, uin
         jl_method_t *m = ci->def->def.method;
         assert(jl_is_method(m));
         ptrhash_put(&mset, (void*)m, (void*)m);
+        ptrhash_put(relocatable_ext_cis, (void*)ci, (void*)ci);
     }
     int nwithkey;
     void *const *table = mset.table;
@@ -619,7 +631,7 @@ JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t requested_flags, uint8_t actua
         actual_flags &= ~1;
     }
 
-    // 2. Check all flags, execept opt level must be exact
+    // 2. Check all flags, except opt level must be exact
     uint8_t mask = (1 << OPT_LEVEL)-1;
     if ((actual_flags & mask) != (requested_flags & mask))
         return 0;
@@ -1255,7 +1267,8 @@ static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_a
                 JL_GC_PROMISE_ROOTED(owner);
 
                 assert(jl_atomic_load_relaxed(&codeinst->min_world) == minworld);
-                assert(jl_atomic_load_relaxed(&codeinst->max_world) == WORLD_AGE_REVALIDATION_SENTINEL);
+                // See #53586, #53109
+                // assert(jl_atomic_load_relaxed(&codeinst->max_world) == WORLD_AGE_REVALIDATION_SENTINEL);
                 assert(jl_atomic_load_relaxed(&codeinst->inferred));
                 jl_atomic_store_relaxed(&codeinst->max_world, maxvalid);
 
diff --git a/src/subtype.c b/src/subtype.c
index c6b51ec872e75..2011ca8b1c705 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -65,7 +65,6 @@ typedef struct jl_varbinding_t {
     jl_value_t *lb;
     jl_value_t *ub;
     int8_t right;       // whether this variable came from the right side of `A <: B`
-    int8_t occurs;      // occurs in any position
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
@@ -87,6 +86,14 @@ typedef struct jl_varbinding_t {
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
+typedef struct jl_ivarbinding_t {
+    jl_tvar_t **var;
+    jl_value_t **lb;
+    jl_value_t **ub;
+    jl_varbinding_t *root;
+    struct jl_ivarbinding_t *next;
+} jl_ivarbinding_t;
+
 // subtype algorithm state
 typedef struct jl_stenv_t {
     // N.B.: varbindings are created on the stack and rooted there
@@ -171,7 +178,7 @@ static int current_env_length(jl_stenv_t *e)
 typedef struct {
     int8_t *buf;
     int rdepth;
-    int8_t _space[32]; // == 8 * 4
+    int8_t _space[24]; // == 8 * 3
     jl_gcframe_t gcframe;
     jl_value_t *roots[24]; // == 8 * 3
 } jl_savedenv_t;
@@ -200,7 +207,6 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
             roots[i++] = v->ub;
             roots[i++] = (jl_value_t*)v->innervars;
         }
-        se->buf[j++] = v->occurs;
         se->buf[j++] = v->occurs_inv;
         se->buf[j++] = v->occurs_cov;
         se->buf[j++] = v->max_offset;
@@ -235,7 +241,7 @@ static void alloc_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
             ct->gcstack = &se->gcframe;
         }
     }
-    se->buf = (len > 8 ? (int8_t*)malloc_s(len * 4) : se->_space);
+    se->buf = (len > 8 ? (int8_t*)malloc_s(len * 3) : se->_space);
 #ifdef __clang_gcanalyzer__
     memset(se->buf, 0, len * 3);
 #endif
@@ -282,7 +288,6 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
             v->ub = roots[i++];
             v->innervars = (jl_array_t*)roots[i++];
         }
-        v->occurs = se->buf[j++];
         v->occurs_inv = se->buf[j++];
         v->occurs_cov = se->buf[j++];
         v->max_offset = se->buf[j++];
@@ -294,15 +299,6 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
         memset(&e->envout[e->envidx], 0, (e->envsz - e->envidx)*sizeof(void*));
 }
 
-static void clean_occurs(jl_stenv_t *e)
-{
-    jl_varbinding_t *v = e->vars;
-    while (v) {
-        v->occurs = 0;
-        v = v->prev;
-    }
-}
-
 #define flip_offset(e) ((e)->Loffset *= -1)
 
 // type utilities
@@ -591,6 +587,8 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
 
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
 
+#define has_next_union_state(e, R) ((((R) ? &(e)->Runions : &(e)->Lunions)->more) != 0)
+
 static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
@@ -671,8 +669,6 @@ static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int par
 // of determining whether the variable is concrete.
 static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param) JL_NOTSAFEPOINT
 {
-    if (vb != NULL)
-        vb->occurs = 1;
     if (vb != NULL && param) {
         // saturate counters at 2; we don't need values bigger than that
         if (param == 2 && e->invdepth > vb->depth0) {
@@ -881,10 +877,20 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
     // in the environment, rename to get a fresh var.
     JL_GC_PUSH1(&u);
     while (btemp != NULL) {
-        if (btemp->var == u->var ||
-            // outer var can only refer to inner var if bounds changed
+        int aliased = btemp->var == u->var ||
+            // outer var can only refer to inner var if bounds changed (mainly for subtyping path)
             (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
-            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
+            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var));
+        if (!aliased && btemp->innervars != NULL) {
+            for (size_t i = 0; i < jl_array_len(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t*)jl_array_ptr_ref(btemp->innervars, i);
+                if (ivar == u->var) {
+                    aliased = 1;
+                    break;
+                }
+            }
+        }
+        if (aliased) {
             u = jl_rename_unionall(u);
             break;
         }
@@ -897,7 +903,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
@@ -962,7 +968,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
     if (R && ans && e->envidx < e->envsz) {
         jl_value_t *val;
         if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
-            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0); // special token result that represents N::Int in the envout
+            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0, 0); // special token result that represents N::Int in the envout
         else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
             val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
         else if (vb.lb == vb.ub)
@@ -1297,6 +1303,9 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
     return ans;
 }
 
+static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e);
+static int has_exists_typevar(jl_value_t *x, jl_stenv_t *e) JL_NOTSAFEPOINT;
+
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -1304,7 +1313,31 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
     if (jl_is_uniontype(x)) {
-        if (x == y) return 1;
+        if (obviously_egal(x, y))
+            return 1;
+        if (e->Runions.depth == 0 && jl_is_typevar(y) && !jl_has_free_typevars(x)) {
+            // Similar to fast path for repeated elements: if there have been no outer
+            // unions on the right, and the right side is a typevar, then we can handle the
+            // typevar first before picking a union element, under the theory that it may
+            // be easy to match or reject this whole union in comparing and setting the lb
+            // and ub of the variable binding, without needing to examine each element.
+            // However, if x contains any free typevars, then each element with a free
+            // typevar must be handled separately from the union of all elements without
+            // free typevars, since the typevars presence might lead to those elements
+            // getting eliminated (omit_bad_union) or degenerate (Union{Ptr{T}, Ptr}) or
+            // combined (Union{T, S} where {T, S <: T}).
+            jl_tvar_t *yvar = (jl_tvar_t *)y;
+            jl_varbinding_t *yb = lookup(e, yvar);
+            while (e->intersection && yb != NULL && yb->lb == yb->ub && jl_is_typevar(yb->lb)) {
+                yvar = (jl_tvar_t *)yb->lb;
+                yb = lookup(e, yvar);
+            }
+            // Note: `x <: ∃y` performs a local ∀-∃ check between `x` and `yb->ub`.
+            // We need to ensure that there's no ∃ typevar as otherwise that check
+            // might cause false alarm due to the accumulated env change.
+            if (yb == NULL || yb->right == 0 || !has_exists_typevar(yb->ub, e))
+                return subtype_var(yvar, x, e, 1, param);
+        }
         x = pick_union_element(x, e, 0);
     }
     if (jl_is_uniontype(y)) {
@@ -1347,7 +1380,8 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
                 if (yy) record_var_occurrence(yy, e, param);
                 if (yr) {
                     record_var_occurrence(xx, e, param);
-                    return subtype(xx->lb, yy->ub, e, 0);
+                    int trysub = e->intersection ? try_subtype_by_bounds(xx->lb, yy->ub, e) : 0;
+                    return trysub || subtype(xx->lb, yy->ub, e, 0);
                 }
                 return var_lt((jl_tvar_t*)x, y, e, param);
             }
@@ -2479,7 +2513,7 @@ static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_
 
 static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
 
-// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circulation constraints.
+// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circular constraints.
 static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
     if (jl_is_uniontype(a))
@@ -2488,22 +2522,21 @@ static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
     else if (jl_is_uniontype(b))
         return try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->a, e) ||
                try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->b, e);
-    else if (jl_egal(a, b))
+    else if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || obviously_egal(a, b))
         return 1;
     else if (!jl_is_typevar(b))
         return 0;
-    jl_varbinding_t *vb = e->vars;
-    while (vb != NULL) {
-        if (subtype_by_bounds(b, (jl_value_t *)vb->var, e) && obviously_in_union(a, vb->ub))
-            return 1;
-        vb = vb->prev;
-    }
-    return 0;
+    else if (jl_is_typevar(a) && subtype_by_bounds(a, b, e))
+        return 1;
+    // check if `Union{a, ...} <: b`.
+    jl_varbinding_t *vb = lookup(e, (jl_tvar_t *)b);
+    jl_value_t *blb = vb ? vb->lb : ((jl_tvar_t *)b)->lb;
+    return obviously_in_union(a, blb);
 }
 
 static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
-    if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || try_subtype_by_bounds(a, b, e))
+    if (try_subtype_by_bounds(a, b, e))
         return 1;
     jl_savedenv_t se;
     save_env(e, &se, 1);
@@ -2764,12 +2797,9 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
                 res = jl_bottom_type;
             }
             else if (obviously_egal(var->lb, ub)) {
-                JL_TRY {
-                    res = jl_substitute_var(body, var, ub);
-                }
-                JL_CATCH {
+                res = jl_substitute_var_nothrow(body, var, ub, 2);
+                if (res == NULL)
                     res = jl_bottom_type;
-                }
             }
             else {
                 if (ub != var->ub) {
@@ -2797,12 +2827,56 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
     return res;
 }
 
+// TODO: fuse with reachable_var?
+static int has_typevar_via_flatten_env(jl_value_t *x, jl_tvar_t *t, jl_ivarbinding_t *allvars, int8_t *checked) {
+    if (jl_is_unionall(x)) {
+        jl_tvar_t *var = ((jl_unionall_t *)x)->var;
+        if (has_typevar_via_flatten_env(var->lb, t, allvars, checked) ||
+            has_typevar_via_flatten_env(var->ub, t, allvars, checked))
+            return 1;
+        return has_typevar_via_flatten_env(((jl_unionall_t *)x)->body, t, allvars, checked);
+    }
+    else if (jl_is_uniontype(x)) {
+        return has_typevar_via_flatten_env(((jl_uniontype_t *)x)->a, t, allvars, checked) ||
+            has_typevar_via_flatten_env(((jl_uniontype_t *)x)->b, t, allvars, checked);
+    }
+    else if (jl_is_vararg(x)) {
+        jl_vararg_t *v = (jl_vararg_t *)x;
+        return (v->T && has_typevar_via_flatten_env(v->T, t, allvars, checked)) ||
+            (v->N && has_typevar_via_flatten_env(v->N, t, allvars, checked));
+    }
+    else if (jl_is_datatype(x)) {
+        for (size_t i = 0; i < jl_nparams(x); i++) {
+            if (has_typevar_via_flatten_env(jl_tparam(x, i), t, allvars, checked))
+                return 1;
+        }
+        return 0;
+    }
+    else if (jl_is_typevar(x)) {
+        if (t == (jl_tvar_t *)x)
+            return 1;
+        size_t ind = 0;
+        jl_ivarbinding_t *itemp = allvars;
+        while (itemp && *itemp->var != (jl_tvar_t *)x)
+        {
+            ind++;
+            itemp = itemp->next;
+        }
+        if (itemp == NULL || checked[ind])
+            return 0;
+        checked[ind] = 1;
+        return has_typevar_via_flatten_env(*itemp->lb, t, allvars, checked) ||
+            has_typevar_via_flatten_env(*itemp->ub, t, allvars, checked);
+    }
+    return 0;
+}
+
 // Caller might not have rooted `res`
 static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
     jl_value_t *varval = NULL, *ilb = NULL, *iub = NULL, *nivar = NULL;
-    jl_tvar_t *newvar = vb->var;
-    JL_GC_PUSH5(&res, &newvar, &ilb, &iub, &nivar);
+    jl_tvar_t *newvar = vb->var, *ivar = NULL;
+    JL_GC_PUSH6(&res, &newvar, &ivar, &nivar, &ilb, &iub);
     // try to reduce var to a single value
     if (jl_is_long(vb->ub) && jl_is_typevar(vb->lb)) {
         varval = vb->ub;
@@ -2835,184 +2909,296 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
     if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
-    // remove/replace/rewrap free occurrences of this var in the environment
-
-    // I. Handle indirect innervars (make them behave like direct innervars).
-    //   1) record if btemp->lb/ub has indirect innervars.
-    //   2) substitute `vb->var` with `varval`/`varval`
-    //   note: We only store the innervar in the outmost `varbinding`,
-    //       thus we must check all inner env to ensure the recording/substitution
-    //       is complete
-    int len = current_env_length(e);
-    int8_t *blinding_has_innerdep = (int8_t *)alloca(len);
-    memset(blinding_has_innerdep, 0, len);
+    // flatten all innervar into a (reversed) list
+    size_t icount = 0;
+    if (vb->innervars)
+        icount += jl_array_nrows(vb->innervars);
     for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-        if (btemp->innervars != NULL) {
-            for (size_t i = 0; i < jl_array_len(btemp->innervars); i++) {
-                jl_tvar_t *ivar = (jl_tvar_t*)jl_array_ptr_ref(btemp->innervars, i);
-                ilb = ivar->lb; iub = ivar->ub;
-                int has_innerdep = 0;
-                if (jl_has_typevar(ilb, vb->var)) {
-                    has_innerdep = 1;
-                    if (varval) {
-                        JL_TRY {
-                            ilb = jl_substitute_var(ilb, vb->var, varval);
-                        }
-                        JL_CATCH {
-                            res = jl_bottom_type;
-                        }
-                    }
-                    else if (newvar != vb->var) {
-                        ilb = jl_substitute_var(ilb, vb->var, (jl_value_t*)newvar);
-                    }
-                }
-                if (jl_has_typevar(iub, vb->var)) {
-                    has_innerdep = 1;
-                    if (varval) {
-                        JL_TRY {
-                            iub = jl_substitute_var(iub, vb->var, varval);
-                        }
-                        JL_CATCH {
-                            res = jl_bottom_type;
-                        }
-                    }
-                    else if (newvar != vb->var) {
-                        iub = jl_substitute_var(iub, vb->var, (jl_value_t*)newvar);
-                    }
-                }
-                if (!has_innerdep) continue;
-                int need_substitution = 0;
-                if (ilb != ivar->lb || iub != ivar->ub) {
-                    need_substitution = 1;
-                    nivar = (jl_value_t *)jl_new_typevar(ivar->name, ilb, iub);
-                    jl_array_ptr_set(btemp->innervars, i, nivar);
-                    if (jl_has_typevar(res, ivar))
-                        res = jl_substitute_var(res, ivar, nivar);
-                }
-                int envind = 0;
-                for (jl_varbinding_t *btemp2 = e->vars; btemp2 != btemp->prev; btemp2 = btemp2->prev) {
-                    if (jl_has_typevar(btemp2->lb, ivar)) {
-                        if (need_substitution)
-                            btemp2->lb = jl_substitute_var(btemp2->lb, ivar, nivar);
-                        blinding_has_innerdep[envind] |= 1;
-                    }
-                    if (jl_has_typevar(btemp2->ub, ivar)) {
-                        if (need_substitution)
-                            btemp2->ub = jl_substitute_var(btemp2->ub, ivar, nivar);
-                        blinding_has_innerdep[envind] |= 2;
-                    }
-                    envind++;
-                }
-            }
+        if (btemp->innervars != NULL)
+            icount += jl_array_nrows(btemp->innervars);
+    }
+    jl_svec_t *p = NULL;
+    jl_value_t **iparams;
+    jl_value_t **roots;
+    JL_GC_PUSHARGS(roots, icount < 22 ? 3*icount : 1);
+    if (icount < 22) {
+        iparams = roots;
+    }
+    else {
+        p = jl_alloc_svec(3*icount);
+        roots[0] = (jl_value_t*)p;
+        iparams = jl_svec_data(p);
+    }
+    jl_ivarbinding_t *allvars = NULL;
+    size_t niparams = 0;
+    if (vb->innervars) {
+        for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
+            jl_tvar_t *ivar = (jl_tvar_t *)jl_array_ptr_ref(vb->innervars, i);
+            jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+            inew->var = (jl_tvar_t **)&iparams[niparams++]; *inew->var = ivar;
+            inew->lb = &iparams[niparams++]; *inew->lb = ivar->lb;
+            inew->ub = &iparams[niparams++]; *inew->ub = ivar->ub;
+            inew->root = vb;
+            inew->next = allvars;
+            allvars = inew;
         }
     }
-    // II. Handle direct innervars.
-    jl_varbinding_t *wrap = NULL;
-    int envind = 0;
     for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-        int has_innerdep = blinding_has_innerdep[envind++];
-        int lb_has_innerdep = has_innerdep & 1;
-        int ub_has_innerdep = has_innerdep & 2;
-        assert(!has_innerdep || btemp->depth0 == vb->depth0);
-        int lb_has_dep = jl_has_typevar(btemp->lb, vb->var);
-        int ub_has_dep = jl_has_typevar(btemp->ub, vb->var);
-        if (lb_has_innerdep || lb_has_dep) {
-            if (vb->lb == (jl_value_t*)btemp->var) {
+        jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+        inew->var = &btemp->var;
+        inew->lb = &btemp->lb;
+        inew->ub = &btemp->ub;
+        inew->root = btemp;
+        inew->next = allvars;
+        allvars = inew;
+        if (btemp->innervars) {
+            for (size_t i = 0; i < jl_array_nrows(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t *)jl_array_ptr_ref(btemp->innervars, i);
+                jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+                inew->var = (jl_tvar_t **)&iparams[niparams++]; *inew->var = ivar;
+                inew->lb = &iparams[niparams++]; *inew->lb = ivar->lb;
+                inew->ub = &iparams[niparams++]; *inew->ub = ivar->ub;
+                inew->root = btemp;
+                inew->next = allvars;
+                allvars = inew;
+            }
+        }
+    }
+
+    // remove/replace/rewrap free occurrences of this var in the environment
+    int wrapped = 0;
+    jl_ivarbinding_t *pwrap = NULL;
+    int vcount = icount + current_env_length(e);
+    int8_t *checked = (int8_t *)alloca(vcount);
+    for (jl_ivarbinding_t *btemp = allvars, *pbtemp = NULL; btemp != NULL; btemp = btemp->next) {
+        int bdepth0 = btemp->root->depth0;
+        int innerflag = 0;
+        ivar = *btemp->var;
+        ilb = *btemp->lb;
+        iub = *btemp->ub;
+        if (jl_has_typevar(ilb, vb->var)) {
+            assert(btemp->root->var == ivar || bdepth0 == vb->depth0);
+            if (vb->lb == (jl_value_t*)ivar) {
+                JL_GC_POP();
                 JL_GC_POP();
                 return jl_bottom_type;
             }
             if (varval) {
-                if (lb_has_dep) { // inner substitution has been handled
-                    JL_TRY {
-                        btemp->lb = jl_substitute_var(btemp->lb, vb->var, varval);
-                    }
-                    JL_CATCH {
-                        res = jl_bottom_type;
-                    }
+                JL_TRY {
+                    *btemp->lb = jl_substitute_var(ilb, vb->var, varval);
+                }
+                JL_CATCH {
+                    res = jl_bottom_type;
                 }
             }
-            else if (btemp->lb == (jl_value_t*)vb->var) {
-                btemp->lb = vb->lb;
-            }
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
-                // if our variable is T, and some outer variable has constraint S = Ref{T},
-                // move the `where T` outside `where S` instead of putting it here. issue #21243.
-                if (newvar != vb->var && lb_has_dep) // inner substitution has been handled
-                    btemp->lb = jl_substitute_var(btemp->lb, vb->var, (jl_value_t*)newvar);
-                wrap = btemp;
+            else if (ilb == (jl_value_t*)vb->var) {
+                *btemp->lb = vb->lb;
             }
             else {
-                btemp->lb = jl_new_struct(jl_unionall_type, vb->var, btemp->lb);
+                innerflag |= 1;
             }
-            assert((jl_value_t*)btemp->var != btemp->lb);
         }
-        if (ub_has_innerdep || ub_has_dep) {
-            if (vb->ub == (jl_value_t*)btemp->var) {
-                // TODO: handle `omit_bad_union` correctly if `ub_has_innerdep`
-                btemp->ub = omit_bad_union(btemp->ub, vb->var);
-                if (btemp->ub == jl_bottom_type && btemp->ub != btemp->lb) {
+        if (jl_has_typevar(iub, vb->var)) {
+            assert(btemp->root->var == ivar || bdepth0 == vb->depth0);
+            if (vb->ub == (jl_value_t*)ivar) {
+                *btemp->ub = omit_bad_union(iub, vb->var);
+                if (*btemp->ub == jl_bottom_type && *btemp->ub != *btemp->lb) {
+                    JL_GC_POP();
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
             }
             if (varval) {
-                if (ub_has_dep) { // inner substitution has been handled
-                    JL_TRY {
-                        btemp->ub = jl_substitute_var(btemp->ub, vb->var, varval);
-                    }
-                    JL_CATCH {
-                        res = jl_bottom_type;
-                    }
-                }
+                iub = jl_substitute_var_nothrow(iub, vb->var, varval, 2);
+                if (iub == NULL)
+                    res = jl_bottom_type;
+                else
+                    *btemp->ub = iub;
             }
-            else if (btemp->ub == (jl_value_t*)vb->var) {
+            else if (iub == (jl_value_t*)vb->var) {
                 // TODO: this loses some constraints, such as in this test, where we replace T4<:S3 (e.g. T4==S3 since T4 only appears covariantly once) with T4<:Any
                 // a = Tuple{Float64,T3,T4} where T4 where T3
                 // b = Tuple{S2,Tuple{S3},S3} where S2 where S3
                 // Tuple{Float64, T3, T4} where {S3, T3<:Tuple{S3}, T4<:S3}
-                btemp->ub = vb->ub;
+                *btemp->ub = vb->ub;
             }
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
-                if (newvar != vb->var && ub_has_dep) // inner substitution has been handled
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
-                wrap = btemp;
+            else {
+                innerflag |= 2;
             }
-            else
-                btemp->ub = jl_new_struct(jl_unionall_type, vb->var, btemp->ub);
-            assert((jl_value_t*)btemp->var != btemp->ub);
+            if (innerflag) {
+                memset(checked, 0, vcount);
+                if (bdepth0 != vb->depth0 ||
+                    has_typevar_via_flatten_env(vb->lb, ivar, allvars, checked) ||
+                    has_typevar_via_flatten_env(vb->ub, ivar, allvars, checked)) {
+                    if (innerflag & 1)
+                        *btemp->lb = jl_new_struct(jl_unionall_type, vb->var, ilb);
+                    if (innerflag & 2)
+                        *btemp->ub = jl_new_struct(jl_unionall_type, vb->var, iub);
+                }
+                else {
+                    assert(btemp->root != vb);
+                    // if our variable is T, and some outer variable has constraint S = Ref{T},
+                    // move the `where T` outside `where S` instead of putting it here. issue #21243.
+                    if (newvar != vb->var) {
+                        if (innerflag & 1)
+                            *btemp->lb = jl_substitute_var(ilb, vb->var, (jl_value_t*)newvar);
+                        if (innerflag & 2)
+                            *btemp->ub = jl_substitute_var(iub, vb->var, (jl_value_t*)newvar);
+                    }
+                    if (!wrapped)
+                        pwrap = pbtemp;
+                    wrapped = 1;
+                }
+            }
+            assert((jl_value_t*)ivar != *btemp->lb);
+            assert((jl_value_t*)ivar != *btemp->ub);
+        }
+        pbtemp = btemp;
+    }
+
+    // Insert the newvar into the (reversed) var list if needed.
+    if (wrapped) {
+        jl_ivarbinding_t *wrap = pwrap == NULL ? allvars : pwrap->next;
+        jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+        inew->var = &newvar;
+        inew->lb = &newvar->lb;
+        inew->ub = &newvar->ub;;
+        inew->root = wrap->root;
+        inew->next = wrap;
+        if (pwrap != NULL)
+            pwrap->next = inew;
+        else
+            allvars = inew;
+        vcount++;
+    }
+
+    // Re-sort the innervar inside the (reversed) var list.
+    // `jl_has_typevar` is used as the partial-ordering predicate.
+    // If this is slow, we could possibly switch to a simpler graph sort, such as Tarjan's SCC.
+    if (icount > 0) {
+        jl_ivarbinding_t *pib1 = NULL;
+#ifndef NDEBUG
+        size_t sort_count = 0;
+#endif
+        while (1) {
+            jl_ivarbinding_t *ib1 = pib1 == NULL ? allvars : pib1->next;
+            if (ib1 == NULL) break;
+            assert((++sort_count) <= (vcount * (vcount + 1)) >> 1);
+            int lbfree = jl_has_free_typevars(*ib1->lb);
+            int ubfree = jl_has_free_typevars(*ib1->ub);
+            if (lbfree || ubfree) {
+                int changed = 0;
+                jl_ivarbinding_t *pib2 = ib1, *ib2 = ib1->next;
+                while (ib2 != NULL) {
+                    int isinnervar = ib2->root->var != *ib2->var;
+                    if (isinnervar && ib1->root->depth0 == ib2->root->depth0 &&
+                        ((lbfree && jl_has_typevar(*ib1->lb, *ib2->var)) ||
+                         (ubfree && jl_has_typevar(*ib1->ub, *ib2->var)))) {
+                        pib2->next = ib2->next;
+                        ib2->next = ib1;
+                        ib2->root = ib1->root;
+                        if (pib1)
+                            pib1->next = ib2;
+                        else
+                            allvars = ib2;
+                        changed = 1;
+                        break;
+                    }
+                    pib2 = ib2;
+                    ib2 = ib2->next;
+                }
+                if (changed) continue;
+            }
+            pib1 = ib1;
+        }
+    }
+
+    // Freeze the innervars' lb/ub and perform substitution if needed.
+    for (jl_ivarbinding_t *btemp1 = allvars; btemp1 != NULL; btemp1 = btemp1->next) {
+        ivar = *btemp1->var;
+        ilb = *btemp1->lb;
+        iub = *btemp1->ub;
+        int isinnervar = btemp1->root->var != ivar;
+        if (isinnervar && (ivar->lb != ilb || ivar->ub != iub)) {
+            nivar = (jl_value_t *)jl_new_typevar(ivar->name, ilb, iub);
+            if (jl_has_typevar(res, ivar))
+                res = jl_substitute_var(res, ivar, nivar);
+            for (jl_ivarbinding_t *btemp2 = btemp1->next; btemp2 != NULL; btemp2 = btemp2->next) {
+                ilb = *btemp2->lb;
+                iub = *btemp2->ub;
+                if (jl_has_typevar(ilb, ivar))
+                    *btemp2->lb = jl_substitute_var(ilb, ivar, nivar);
+                if (jl_has_typevar(iub, ivar))
+                    *btemp2->ub = jl_substitute_var(iub, ivar, nivar);
+            }
+            if (!wrapped && !varval) {
+                // newvar also needs bounds substitution.
+                if (jl_has_typevar(vb->lb, ivar))
+                    vb->lb = jl_substitute_var(vb->lb, ivar, nivar);
+                if (jl_has_typevar(vb->ub, ivar))
+                    vb->ub = jl_substitute_var(vb->ub, ivar, nivar);
+            }
+            *btemp1->var = (jl_tvar_t *)nivar;
         }
     }
 
-    if (wrap) {
-        // We only assign the newvar with the outmost var.
-        // This make sure we never create a UnionAll with 2 identical vars.
-        if (wrap->innervars == NULL)
-            wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-        jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)newvar);
-        // TODO: should we move all the innervars here too?
+    // Switch back the innervars' storage.
+    while (1) {
+        jl_ivarbinding_t *btemp = allvars;
+        jl_varbinding_t *root = btemp ? btemp->root : vb;
+        size_t icount = 0;
+        while (btemp && btemp->root == root) {
+            btemp = btemp->next;
+            icount++;
+        }
+        if (root != vb) icount--;
+        if (root->innervars != NULL) {
+            jl_array_t *rinnervars = root->innervars;
+            JL_GC_PROMISE_ROOTED(rinnervars);
+            size_t len = jl_array_nrows(rinnervars);
+            if (icount > len)
+                jl_array_grow_end(rinnervars, icount - len);
+            if (icount < len)
+                jl_array_del_end(rinnervars, len - icount);
+        }
+        else if (icount > 0) {
+            root->innervars = jl_alloc_array_1d(jl_array_any_type, icount);
+        }
+        btemp = allvars;
+        for (size_t i = icount; i > 0; i--) {
+            jl_array_ptr_set(root->innervars, i - 1, (jl_value_t*)*btemp->var);
+            btemp = btemp->next;
+        }
+        if (root == vb) break;
+        assert(*btemp->var == root->var);
+        allvars = btemp->next;
+        assert(allvars == NULL || allvars->root != root);
     }
+    JL_GC_POP();
 
     // if `v` still occurs, re-wrap body in `UnionAll v` or eliminate the UnionAll
     if (jl_has_typevar(res, vb->var)) {
         if (varval) {
-            JL_TRY {
-                // you can construct `T{x} where x` even if T's parameter is actually
-                // limited. in that case we might get an invalid instantiation here.
-                res = jl_substitute_var(res, vb->var, varval);
-                // simplify chains of UnionAlls where bounds become equal
-                while (jl_is_unionall(res) && obviously_egal(((jl_unionall_t*)res)->var->lb,
-                                                             ((jl_unionall_t*)res)->var->ub))
-                    res = jl_instantiate_unionall((jl_unionall_t*)res, ((jl_unionall_t*)res)->var->lb);
+            // you can construct `T{x} where x` even if T's parameter is actually
+            // limited. in that case we might get an invalid instantiation here.
+            res = jl_substitute_var_nothrow(res, vb->var, varval, 2);
+            // simplify chains of UnionAlls where bounds become equal
+            while (res != NULL && jl_is_unionall(res) && obviously_egal(((jl_unionall_t*)res)->var->lb,
+                                                         ((jl_unionall_t*)res)->var->ub)) {
+                jl_unionall_t * ures = (jl_unionall_t *)res;
+                res = jl_substitute_var_nothrow(ures->body, ures->var, ures->var->lb, 2);
             }
-            JL_CATCH {
+            if (res == NULL)
                 res = jl_bottom_type;
-            }
         }
         else {
+            // re-fresh newvar if bounds changed.
+            if (vb->lb != newvar->lb || vb->ub != newvar->ub)
+                newvar = jl_new_typevar(newvar->name, vb->lb, vb->ub);
             if (newvar != vb->var)
                 res = jl_substitute_var(res, vb->var, (jl_value_t*)newvar);
             varval = (jl_value_t*)newvar;
-            if (!wrap)
+            if (!wrapped)
                 res = jl_type_unionall((jl_tvar_t*)newvar, res);
         }
     }
@@ -3020,24 +3206,7 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
     if (vb->innervars != NULL) {
         for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
             jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb->innervars, i);
-            // the `btemp->prev` walk is only giving a sort of post-order guarantee (since we are
-            // iterating 2 trees at once), so once we set `wrap`, there might remain other branches
-            // of the type walk that now still may have incomplete bounds: finish those now too
-            jl_varbinding_t *wrap = NULL;
-            for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-                if (btemp->depth0 == vb->depth0 && (jl_has_typevar(btemp->lb, var) || jl_has_typevar(btemp->ub, var))) {
-                    wrap = btemp;
-                }
-            }
-            if (wrap) {
-                if (wrap->innervars == NULL)
-                    wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-                jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)var);
-            }
-            else if (res != jl_bottom_type) {
-                if (jl_has_typevar(res, var))
-                    res = jl_type_unionall((jl_tvar_t*)var, res);
-            }
+            res = jl_type_unionall(var, res);
         }
     }
 
@@ -3056,9 +3225,6 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
 static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param, jl_varbinding_t *vb)
 {
     jl_varbinding_t *btemp = e->vars;
-    // if the var for this unionall (based on identity) already appears somewhere
-    // in the environment, rename to get a fresh var.
-    // TODO: might need to look inside types in btemp->lb and btemp->ub
     int envsize = 0;
     while (btemp != NULL) {
         envsize++;
@@ -3066,13 +3232,9 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
             vb->limited = 1;
             return t;
         }
-        if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
-            btemp->ub == (jl_value_t*)u->var) {
-            u = jl_rename_unionall(u);
-            break;
-        }
         btemp = btemp->prev;
     }
+    u = unalias_unionall(u, e);
     JL_GC_PUSH1(&u);
     vb->var = u->var;
     e->vars = vb;
@@ -3163,7 +3325,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 {
     jl_value_t *res = NULL;
     jl_savedenv_t se;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars);
     save_env(e, &se, 1);
@@ -3192,7 +3354,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
                 vb.ub = vb.var->ub;
             }
             restore_env(e, &se, vb.constraintkind == 1 ? 1 : 0);
-            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
+            vb.occurs_cov = vb.occurs_inv = 0;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
@@ -3239,7 +3401,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
             ii = (jl_value_t*)vmy;
         else {
             JL_GC_PUSH1(&ii);
-            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1);
+            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1, 0);
             JL_GC_POP();
         }
         return ii;
@@ -3295,7 +3457,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
         else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
             ii = (jl_value_t*)vmy;
         else
-            ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1);
+            ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1, 0);
     }
     JL_GC_POP();
     return ii;
@@ -3893,77 +4055,12 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     return jl_bottom_type;
 }
 
-static int merge_env(jl_stenv_t *e, jl_savedenv_t *se, int count)
+static int merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se, int count)
 {
-    if (count == 0)
-        alloc_env(e, se, 1);
-    jl_value_t **roots = NULL;
-    int nroots = 0;
-    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
-        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
-        assert(jl_is_svec(sv));
-        roots = jl_svec_data(sv);
-        nroots = jl_svec_len(sv);
-    }
-    else {
-        roots = se->roots;
-        nroots = se->gcframe.nroots >> 2;
-    }
-    int m = 0, n = 0;
-    jl_varbinding_t *v = e->vars;
-    while (v != NULL) {
-        if (count == 0) {
-            // need to initialize this
-            se->buf[m] = 0;
-            se->buf[m+1] = 0;
-            se->buf[m+2] = 0;
-            se->buf[m+3] = v->max_offset;
-        }
-        if (v->occurs) {
-            // only merge lb/ub/innervars if this var occurs.
-            jl_value_t *b1, *b2;
-            b1 = roots[n];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->lb;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n] = b1 ? simple_meet(b1, b2, 0) : b2;
-            b1 = roots[n+1];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->ub;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n+1] = b1 ? simple_join(b1, b2) : b2;
-            b1 = roots[n+2];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = (jl_value_t*)v->innervars;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            if (b2 && b1 != b2) {
-                if (b1)
-                    jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
-                else
-                    roots[n+2] = b2;
-            }
-            // record the meeted vars.
-            se->buf[m] = 1;
-        }
-        // always merge occurs_inv/cov by max (never decrease)
-        if (v->occurs_inv > se->buf[m+1])
-            se->buf[m+1] = v->occurs_inv;
-        if (v->occurs_cov > se->buf[m+2])
-            se->buf[m+2] = v->occurs_cov;
-        // always merge max_offset by min
-        if (!v->intersected && v->max_offset < se->buf[m+3])
-            se->buf[m+3] = v->max_offset;
-        m = m + 4;
-        n = n + 3;
-        v = v->prev;
+    if (count == 0) {
+        save_env(e, me, 1);
+        return 1;
     }
-    assert(n == nroots); (void)nroots;
-    return count + 1;
-}
-
-// merge untouched vars' info.
-static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
-{
     jl_value_t **merged = NULL;
     jl_value_t **saved = NULL;
     int nroots = 0;
@@ -3985,47 +4082,49 @@ static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
     }
     assert(nroots == current_env_length(e) * 3);
     assert(nroots % 3 == 0);
-    for (int n = 0, m = 0; n < nroots; n += 3, m += 4) {
-        if (merged[n] == NULL)
-            merged[n] = saved[n];
-        if (merged[n+1] == NULL)
-            merged[n+1] = saved[n+1];
-        jl_value_t *b1, *b2;
+    int m = 0, n = 0;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        jl_value_t *b0, *b1, *b2;
+        // merge `lb`
+        b0 = saved[n];
+        b1 = merged[n];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->lb;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n] = (b1 == b0 || b2 == b0) ? b0 : simple_meet(b1, b2, 0);
+        // merge `ub`
+        b0 = saved[n+1];
+        b1 = merged[n+1];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->ub;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n+1] = (b1 == b0 || b2 == b0) ? b0 : simple_join(b1, b2);
+        // merge `innervars`
         b1 = merged[n+2];
         JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-        b2 = saved[n+2];
-        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know this came from our GC frame
+        b2 = (jl_value_t*)v->innervars;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
         if (b2 && b1 != b2) {
             if (b1)
                 jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
             else
                 merged[n+2] = b2;
         }
-        me->buf[m] |= se->buf[m];
-    }
-}
-
-static void expand_local_env(jl_stenv_t *e, jl_value_t *res)
-{
-    jl_varbinding_t *v = e->vars;
-    // Here we pull in some typevar missed in fastpath.
-    while (v != NULL) {
-        v->occurs = v->occurs || jl_has_typevar(res, v->var);
-        assert(v->occurs == 0 || v->occurs == 1);
-        v = v->prev;
-    }
-    v = e->vars;
-    while (v != NULL) {
-        if (v->occurs == 1) {
-            jl_varbinding_t *v2 = e->vars;
-            while (v2 != NULL) {
-                if (v2 != v && v2->occurs == 0)
-                    v2->occurs = -(jl_has_typevar(v->lb, v2->var) || jl_has_typevar(v->ub, v2->var));
-                v2 = v2->prev;
-            }
-        }
+        // merge occurs_inv/cov by max (never decrease)
+        if (v->occurs_inv > me->buf[m])
+            me->buf[m] = v->occurs_inv;
+        if (v->occurs_cov > me->buf[m+1])
+            me->buf[m+1] = v->occurs_cov;
+        // merge max_offset by min
+        if (!v->intersected && v->max_offset < me->buf[m+2])
+            me->buf[m+2] = v->max_offset;
+        m = m + 3;
+        n = n + 3;
         v = v->prev;
     }
+    assert(n == nroots); (void)nroots;
+    return count + 1;
 }
 
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
@@ -4038,12 +4137,9 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     jl_savedenv_t se, me;
     save_env(e, &se, 1);
     int niter = 0, total_iter = 0;
-    clean_occurs(e);
     is[0] = intersect(x, y, e, 0); // root
-    if (is[0] != jl_bottom_type) {
-        expand_local_env(e, is[0]);
-        niter = merge_env(e, &me, niter);
-    }
+    if (is[0] != jl_bottom_type)
+        niter = merge_env(e, &me, &se, niter);
     restore_env(e, &se, 1);
     while (next_union_state(e, 1)) {
         if (e->emptiness_only && is[0] != jl_bottom_type)
@@ -4051,12 +4147,9 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         e->Runions.depth = 0;
         e->Runions.more = 0;
 
-        clean_occurs(e);
         is[1] = intersect(x, y, e, 0);
-        if (is[1] != jl_bottom_type) {
-            expand_local_env(e, is[1]);
-            niter = merge_env(e, &me, niter);
-        }
+        if (is[1] != jl_bottom_type)
+            niter = merge_env(e, &me, &se, niter);
         restore_env(e, &se, 1);
         if (is[0] == jl_bottom_type)
             is[0] = is[1];
@@ -4065,13 +4158,18 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
             is[0] = jl_type_union(is, 2);
         }
         total_iter++;
-        if (niter > 4 || total_iter > 400000) {
+        if (has_next_union_state(e, 1) && (niter > 4 || total_iter > 400000)) {
             is[0] = y;
+            // we give up precise intersection here, just restore the saved env
+            restore_env(e, &se, 1);
+            if (niter > 0) {
+                free_env(&me);
+                niter = 0;
+            }
             break;
         }
     }
     if (niter) {
-        final_merge_env(e, &me, &se);
         restore_env(e, &me, 1);
         free_env(&me);
     }
@@ -4519,7 +4617,7 @@ static jl_value_t *insert_nondiagonal(jl_value_t *type, jl_varbinding_t *troot,
         JL_GC_PUSH2(&newt, &n);
         newt = insert_nondiagonal(t, troot, widen2ub);
         if (t != newt)
-            type = (jl_value_t *)jl_wrap_vararg(newt, n, 0);
+            type = (jl_value_t *)jl_wrap_vararg(newt, n, 0, 0);
         JL_GC_POP();
     }
     else if (jl_is_datatype(type)) {
@@ -4556,7 +4654,7 @@ static jl_value_t *_widen_diagonal(jl_value_t *t, jl_varbinding_t *troot) {
 
 static jl_value_t *widen_diagonal(jl_value_t *t, jl_unionall_t *u, jl_varbinding_t *troot)
 {
-    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
+    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
     jl_value_t *nt;
     JL_GC_PUSH2(&vb.innervars, &nt);
     if (jl_is_unionall(u->body))
@@ -4579,6 +4677,56 @@ JL_DLLEXPORT jl_value_t *jl_widen_diagonal(jl_value_t *t, jl_unionall_t *ua)
 }
 
 // specificity comparison
+static int count_missing_wrap(jl_value_t *x, jl_typeenv_t *env)
+{
+    if (!jl_has_free_typevars(x))
+        return 0;
+    jl_typeenv_t *wrapped = NULL;
+    int count = 0;
+    for (jl_typeenv_t *env2 = env; env2 != NULL; env2 = env2->prev) {
+        int need_wrap = 0;
+        for (jl_typeenv_t *env3 = wrapped; env3 != NULL && need_wrap == 0; env3 = env3->prev) {
+            if (env3->var == env2->var)
+                need_wrap = -1;
+            else if (jl_has_typevar(env3->var->lb, env2->var) || jl_has_typevar(env3->var->ub, env2->var))
+                need_wrap = 1;
+        }
+        need_wrap = need_wrap == 0 ? jl_has_typevar(x, env2->var) :
+                    need_wrap == -1 ? 0 : 1;
+        if (need_wrap) {
+            count++;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = env2->var;
+            newenv->val = NULL;
+            newenv->prev = wrapped;
+            wrapped = newenv;
+        }
+    }
+    return count;
+}
+
+static int obvious_subtype_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *subtype, int wrapx, int wrapy)
+{
+    if (wrapx != 0 || wrapy != 0) {
+        int wrap_count = wrapx - wrapy;
+        while (wrap_count > 0 && jl_is_unionall(y))
+        {
+            y = ((jl_unionall_t*)y)->body;
+            wrap_count--;
+        }
+        while (wrap_count < 0 && jl_is_unionall(x))
+        {
+            x = ((jl_unionall_t*)x)->body;
+            wrap_count++;
+        }
+        if (wrap_count > 0) {
+            if (obvious_subtype(jl_unwrap_unionall(x), y, y0, subtype) && !*subtype)
+                return 1;
+            return 0;
+        }
+    }
+    return obvious_subtype(x, y, y0, subtype);
+}
 
 static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, jl_typeenv_t *env)
 {
@@ -4601,12 +4749,14 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
         a = b;
         b = temp;
     }
+    int wrapa = count_missing_wrap(a, env);
+    int wrapb = count_missing_wrap(b, env);
     // first check if a <: b has an obvious answer
     int subtype_ab = 2;
     if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
         subtype_ab = 1;
     }
-    else if (obvious_subtype(a, b, b0, &subtype_ab)) {
+    else if (obvious_subtype_msp(a, b, b0, &subtype_ab, wrapa, wrapb)) {
 #ifdef NDEBUG
         if (subtype_ab == 0)
             return 0;
@@ -4620,7 +4770,7 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
     if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
         subtype_ba = 1;
     }
-    else if (obvious_subtype(b, a, a0, &subtype_ba)) {
+    else if (obvious_subtype_msp(b, a, a0, &subtype_ba, wrapb, wrapa)) {
 #ifdef NDEBUG
         if (subtype_ba == 0)
             return 0;
@@ -4685,7 +4835,9 @@ static int sub_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, jl_typeenv_t *e
         return 1;
     }
     int obvious_sub = 2;
-    if (obvious_subtype(x, y, y0, &obvious_sub)) {
+    int wrapx = count_missing_wrap(x, env);
+    int wrapy = count_missing_wrap(y, env);
+    if (obvious_subtype_msp(x, y, y0, &obvious_sub, wrapx, wrapy)) {
 #ifdef NDEBUG
         return obvious_sub;
 #endif
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index 4e9868ed45c23..6513370da4dae 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -99,7 +99,7 @@ typedef intptr_t ssize_t;
 #define BYTE_ORDER     __BYTE_ORDER
 #endif
 
-#if defined(__APPLE__) || defined(__FreeBSD__)
+#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)
 #include <machine/endian.h>
 #define __LITTLE_ENDIAN  LITTLE_ENDIAN
 #define __BIG_ENDIAN     BIG_ENDIAN
@@ -123,6 +123,13 @@ typedef intptr_t ssize_t;
 #define STATIC_INLINE static inline
 #define FORCE_INLINE static inline __attribute__((always_inline))
 
+#ifdef _OS_WINDOWS_
+#define EXTERN_INLINE_DECLARE inline
+#else
+#define EXTERN_INLINE_DECLARE inline __attribute__ ((visibility("default")))
+#endif
+#define EXTERN_INLINE_DEFINE extern inline JL_DLLEXPORT
+
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
 #  define NOINLINE_DECL(f) __declspec(noinline) f
diff --git a/src/support/platform.h b/src/support/platform.h
index 9ab28ac1c70c3..a0dd84c9c20b6 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -16,6 +16,7 @@
  *          _COMPILER_GCC_
  *      OS:
  *          _OS_FREEBSD_
+ *          _OS_OPENBSD_
  *          _OS_LINUX_
  *          _OS_WINDOWS_
  *          _OS_DARWIN_
@@ -81,6 +82,8 @@
 
 #if defined(__FreeBSD__)
 #define _OS_FREEBSD_
+#elif defined(__OpenBSD__)
+#define _OS_OPENBSD_
 #elif defined(__linux__)
 #define _OS_LINUX_
 #elif defined(_WIN32) || defined(_WIN64)
diff --git a/src/support/strtod.c b/src/support/strtod.c
index 24f556d0c086b..e0ad1bf33435a 100644
--- a/src/support/strtod.c
+++ b/src/support/strtod.c
@@ -11,7 +11,7 @@
 extern "C" {
 #endif
 
-#if !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_) && !defined(__OpenBSD__)
 // This code path should be used for systems that support the strtod_l function
 
 // Cache locale object
diff --git a/src/support/utf8.c b/src/support/utf8.c
index 17dcf5f1efd51..46a6515e9b753 100644
--- a/src/support/utf8.c
+++ b/src/support/utf8.c
@@ -27,11 +27,10 @@
 
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
-#define snprintf _snprintf
 #else
-#ifndef __FreeBSD__
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__)
 #include <alloca.h>
-#endif /* __FreeBSD__ */
+#endif /* !__FreeBSD__ && !__OpenBSD__ */
 #endif
 #include <assert.h>
 
diff --git a/src/support/utf8.h b/src/support/utf8.h
index f1c886e764064..eab86f602ee61 100644
--- a/src/support/utf8.h
+++ b/src/support/utf8.h
@@ -12,7 +12,7 @@ extern "C" {
 /* is c the start of a utf8 sequence? */
 #define isutf(c) (((c)&0xC0)!=0x80)
 
-#define UEOF ((uint32_t)-1)
+#define UEOF (UINT32_MAX)
 
 /* convert UTF-8 data to wide character */
 size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
diff --git a/src/support/win32-clang-ABI-bug/optional b/src/support/win32-clang-ABI-bug/optional
index 135888d415f73..a2ecdad1e33ce 100644
--- a/src/support/win32-clang-ABI-bug/optional
+++ b/src/support/win32-clang-ABI-bug/optional
@@ -23,6 +23,7 @@
 #include <new>
 #include <utility>
 #include <type_traits>
+#include "llvm-version.h"
 
 namespace std {
 
@@ -56,6 +57,15 @@ namespace optional_detail {
 //
 // The move constructible / assignable conditions emulate the remaining behavior
 // of std::is_trivially_copyable.
+#if JL_LLVM_VERSION >= 170000
+template <typename T,
+          bool = (std::is_trivially_copy_constructible<T>::value &&
+                  std::is_trivially_copy_assignable<T>::value &&
+                  (std::is_trivially_move_constructible<T>::value ||
+                   !std::is_move_constructible<T>::value) &&
+                  (std::is_trivially_move_assignable<T>::value ||
+                   !std::is_move_assignable<T>::value))>
+#else
 template <typename T,
           bool = (llvm::is_trivially_copy_constructible<T>::value &&
                   std::is_trivially_copy_assignable<T>::value &&
@@ -63,6 +73,7 @@ template <typename T,
                    !std::is_move_constructible<T>::value) &&
                   (std::is_trivially_move_assignable<T>::value ||
                    !std::is_move_assignable<T>::value))>
+#endif
 class OptionalStorage {
   union {
     char empty;
diff --git a/src/symbol.c b/src/symbol.c
index b488a0cc77065..ef2c11e0842e8 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,6 +15,7 @@
 extern "C" {
 #endif
 
+uv_mutex_t symtab_lock;
 static _Atomic(jl_sym_t*) symtab = NULL;
 
 #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1)
@@ -35,7 +36,7 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
     jl_sym_t *sym;
     size_t nb = symbol_nbytes(len);
-    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(nb, 0, sizeof(void*), 0);
+    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(nb, 0, sizeof(void*), 0);
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
     jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED);
@@ -86,15 +87,15 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw)
     _Atomic(jl_sym_t*) *slot;
     jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot);
     if (node == NULL) {
-        uv_mutex_lock(&gc_perm_lock);
+        uv_mutex_lock(&symtab_lock);
         // Someone might have updated it, check and look up again
         if (jl_atomic_load_relaxed(slot) != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&symtab_lock);
             return node;
         }
         node = mk_symbol(str, len);
         jl_atomic_store_release(slot, node);
-        uv_mutex_unlock(&gc_perm_lock);
+        uv_mutex_unlock(&symtab_lock);
     }
     return node;
 }
diff --git a/src/sys.c b/src/sys.c
index 0c0016ee07657..712d232da363a 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -478,25 +478,10 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT
 {
-    int cpu = jl_cpu_threads();
-    int masksize = uv_cpumask_size();
-    if (masksize < 0 || jl_running_under_rr(0))
-        return cpu;
-    uv_thread_t tid = uv_thread_self();
-    char *cpumask = (char *)calloc(masksize, sizeof(char));
-    int err = uv_thread_getaffinity(&tid, cpumask, masksize);
-    if (err) {
-        free(cpumask);
-        jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err),
-                       err);
-        return cpu;
-    }
-    int n = 0;
-    for (size_t i = 0; i < masksize; i++) {
-        n += cpumask[i];
-    }
-    free(cpumask);
-    return n < cpu ? n : cpu;
+    // We want the more conservative estimate of the two.
+    int cpu_threads = jl_cpu_threads();
+    int available_parallelism = uv_available_parallelism();
+    return available_parallelism < cpu_threads ? available_parallelism : cpu_threads;
 }
 
 
@@ -635,6 +620,38 @@ JL_DLLEXPORT long jl_SC_CLK_TCK(void)
 #endif
 }
 
+#ifdef _OS_OPENBSD_
+// Helper for jl_pathname_for_handle()
+struct dlinfo_data {
+    void       *searched;
+    const char *result;
+};
+
+static int dlinfo_helper(struct dl_phdr_info *info, size_t size, void *vdata)
+{
+    struct dlinfo_data *data = (struct dlinfo_data *)vdata;
+    void *handle;
+
+    /* ensure dl_phdr_info at compile-time to be compatible with the one at runtime */
+    if (sizeof(*info) < size)
+        return -1;
+
+    /* dlopen the name */
+    handle = dlopen(info->dlpi_name, RTLD_LAZY | RTLD_NOLOAD);
+    if (handle == NULL)
+        return 0;
+
+    /* check if the opened library is the same as the searched handle */
+    if (data->searched == handle)
+        data->result = info->dlpi_name;
+
+    dlclose(handle);
+
+    /* continue if still not found */
+    return (data->result != NULL);
+}
+#endif
+
 // Takes a handle (as returned from dlopen()) and returns the absolute path to the image loaded
 JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 {
@@ -677,6 +694,14 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
     free(pth16);
     return filepath;
 
+#elif defined(_OS_OPENBSD_)
+    struct dlinfo_data data = {
+        .searched = handle,
+        .result = NULL,
+    };
+    dl_iterate_phdr(&dlinfo_helper, &data);
+    return data.result;
+
 #else // Linux, FreeBSD, ...
 
     struct link_map *map;
@@ -754,11 +779,11 @@ JL_DLLEXPORT size_t jl_maxrss(void)
 
 // FIXME: `rusage` is available on OpenBSD, DragonFlyBSD and NetBSD as well.
 //        All of them return `ru_maxrss` in kilobytes.
-#elif defined(_OS_LINUX_) || defined(_OS_DARWIN_) || defined (_OS_FREEBSD_)
+#elif defined(_OS_LINUX_) || defined(_OS_DARWIN_) || defined (_OS_FREEBSD_) || defined (_OS_OPENBSD_)
     struct rusage rusage;
     getrusage( RUSAGE_SELF, &rusage );
 
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
+#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) || defined (_OS_OPENBSD_)
     return (size_t)(rusage.ru_maxrss * 1024);
 #else
     return (size_t)rusage.ru_maxrss;
diff --git a/src/task.c b/src/task.c
index 7c8770530c326..f86e0ab3a880d 100644
--- a/src/task.c
+++ b/src/task.c
@@ -49,27 +49,27 @@ extern "C" {
 // c.f. interceptor in jl_dlopen as well
 void (*real_siglongjmp)(jmp_buf _Buf, int _Value) = NULL;
 #endif
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) {
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, to->stkbuf, to->bufsz);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) {
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
         __sanitizer_start_switch_fiber(NULL, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) {
-    __sanitizer_finish_switch_fiber(current->ctx.asan_fake_stack, NULL, NULL);
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) {
+    __sanitizer_finish_switch_fiber(current->asan_fake_stack, NULL, NULL);
         //(const void**)&last->stkbuf,
         //&last->bufsz);
 }
 #else
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) JL_NOTSAFEPOINT {}
 #endif
 
 #if defined(_COMPILER_TSAN_ENABLED_)
@@ -85,19 +85,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
-            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
-        } \
-        _tsan_macro_ctx->tsan_state = NULL; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        struct jl_stack_context_t *_tsan_macro_ctx = (_ctx); \
-        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
-    } while (0)
-#endif
 #else
 // just do minimal type-checking on the arguments
 #define tsan_destroy_ctx(_ptls, _ctx) do { \
@@ -108,16 +95,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         (void)_tsan_macro_ctx; \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#endif
 #endif
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
@@ -134,7 +111,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
 #define ROOT_TASK_STACK_ADJUSTMENT 3000000
 #endif
 
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
 static void jl_set_fiber(jl_ucontext_t *t);
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
 static void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
@@ -214,17 +190,17 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     assert(stackbase > frame_addr);
     size_t nb = stackbase - frame_addr;
     void *buf;
-    if (lastt->bufsz < nb) {
-        asan_free_copy_stack(lastt->stkbuf, lastt->bufsz);
+    if (lastt->ctx.bufsz < nb) {
+        asan_free_copy_stack(lastt->ctx.stkbuf, lastt->ctx.bufsz);
         buf = (void*)jl_gc_alloc_buf(ptls, nb);
-        lastt->stkbuf = buf;
-        lastt->bufsz = nb;
+        lastt->ctx.stkbuf = buf;
+        lastt->ctx.bufsz = nb;
     }
     else {
-        buf = lastt->stkbuf;
+        buf = lastt->ctx.stkbuf;
     }
     *pt = NULL; // clear the gc-root for the target task before copying the stack for saving
-    lastt->copy_stack = nb;
+    lastt->ctx.copy_stack = nb;
     lastt->sticky = 1;
     memcpy_stack_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb);
     // this task's stack could have been modified after
@@ -233,75 +209,117 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     jl_gc_wb_back(lastt);
 }
 
-JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
+JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
 {
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
     if (!p) {
         // switch to a stackframe that's beyond the bounds of the last switch
-        p = _x;
-        if ((char*)&_x > _x) {
-            p = (char*)alloca((char*)&_x - _x);
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
         }
         restore_stack(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
     }
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#else
+#pragma message("warning: CFI_NORETURN not implemented for this platform, so profiling of copy_stacks may segfault in this build")
+#endif
+#else
+CFI_NORETURN
+#endif
     memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 
 #if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    jl_setcontext(t->copy_ctx);
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
 #endif
     abort(); // unreachable
 }
 
-JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
+JL_NO_ASAN static void restore_stack2(jl_ucontext_t *t, jl_ptls_t ptls, jl_ucontext_t *lastt)
 {
     assert(t->copy_stack && !lastt->copy_stack);
     size_t nb = t->copy_stack;
-    char *_x = (char*)ptls->stackbase - nb;
-    void *_y = t->stkbuf;
-    assert(_x != NULL && _y != NULL);
-    memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
+    if (nb > 1) {
+        char *_x = (char*)ptls->stackbase - nb;
+        void *_y = t->stkbuf;
+        assert(_x != NULL && _y != NULL);
+        memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb);
+    }
+#if defined(_OS_WINDOWS_)
+    // jl_swapcontext and setjmp are the same on Windows, so we can just use swapcontext directly
+    tsan_switch_to_ctx(t);
+    jl_swapcontext(lastt->ctx, t->copy_ctx);
+#else
 #if defined(JL_HAVE_UNW_CONTEXT)
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx.ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-#elif defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK) || defined(_OS_WINDOWS_)
-    if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0))
+#elif defined(JL_HAVE_ASM)
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #else
 #error COPY_STACKS is incompatible with this platform
 #endif
-    tsan_switch_to_copyctx(&t->ctx);
-#if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    tsan_switch_to_ctx(t);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
+#endif
+}
+
+JL_NO_ASAN static void NOINLINE restore_stack3(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
+{
+#if !defined(JL_HAVE_ASM)
+    char *_x = (char*)ptls->stackbase;
+    if (!p) {
+        // switch to a stackframe that's well beyond the bounds of the next switch
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
+        }
+        restore_stack3(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
+    }
+#endif
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#endif
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+CFI_NORETURN
 #endif
+    tsan_switch_to_ctx(t);
+    jl_start_fiber_set(t); // (doesn't return)
+    abort();
 }
+
 #endif
 
 /* Rooted by the base module */
 static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
 
-void JL_NORETURN jl_finish_task(jl_task_t *t)
+void JL_NORETURN jl_finish_task(jl_task_t *ct)
 {
-    jl_task_t *ct = jl_current_task;
     JL_PROBE_RT_FINISH_TASK(ct);
     JL_SIGATOMIC_BEGIN();
-    if (jl_atomic_load_relaxed(&t->_isexception))
-        jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
+    if (jl_atomic_load_relaxed(&ct->_isexception))
+        jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED);
     else
-        jl_atomic_store_release(&t->_state, JL_TASK_STATE_DONE);
-    if (t->copy_stack) { // early free of stkbuf
-        asan_free_copy_stack(t->stkbuf, t->bufsz);
-        t->stkbuf = NULL;
+        jl_atomic_store_release(&ct->_state, JL_TASK_STATE_DONE);
+    if (ct->ctx.copy_stack) { // early free of stkbuf
+        asan_free_copy_stack(ct->ctx.stkbuf, ct->ctx.bufsz);
+        ct->ctx.stkbuf = NULL;
     }
     // ensure that state is cleared
     ct->ptls->in_finalizer = 0;
@@ -315,12 +333,12 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
             jl_atomic_store_release(&task_done_hook_func, done);
     }
     if (done != NULL) {
-        jl_value_t *args[2] = {done, (jl_value_t*)t};
+        jl_value_t *args[2] = {done, (jl_value_t*)ct};
         JL_TRY {
             jl_apply(args, 2);
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception(), ct);
+            jl_no_exc_handler(jl_current_exception(ct), ct);
         }
     }
     jl_gc_debug_critical_error();
@@ -345,33 +363,33 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid
     if (ptls2) {
         *ptid = jl_atomic_load_relaxed(&task->tid);
 #ifdef COPY_STACKS
-        if (task->copy_stack) {
+        if (task->ctx.copy_stack) {
             *size = ptls2->stacksize;
             return (char *)ptls2->stackbase - *size;
         }
 #endif
     }
-    *size = task->bufsz - off;
-    return (void *)((char *)task->stkbuf + off);
+    *size = task->ctx.bufsz - off;
+    return (void *)((char *)task->ctx.stkbuf + off);
 }
 
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
                                        char **total_start, char **total_end)
 {
-    if (!task->started) {
+    if (!task->ctx.started) {
         *total_start = *active_start = 0;
         *total_end = *active_end = 0;
         return;
     }
 
     jl_ptls_t ptls2 = task->ptls;
-    if (task->copy_stack && ptls2) {
+    if (task->ctx.copy_stack && ptls2) {
         *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
         *total_end = *active_end = (char*)ptls2->stackbase;
     }
-    else if (task->stkbuf) {
-        *total_start = *active_start = (char*)task->stkbuf;
+    else if (task->ctx.stkbuf) {
+        *total_start = *active_start = (char*)task->ctx.stkbuf;
 #ifndef _OS_WINDOWS_
         jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
         if (ptls0->root_task == task) {
@@ -384,12 +402,12 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
         }
 #endif
 
-        *total_end = *active_end = (char*)task->stkbuf + task->bufsz;
+        *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
 #ifdef COPY_STACKS
         // save_stack stores the stack of an inactive task in stkbuf, and the
         // actual number of used bytes in copy_stack.
-        if (task->copy_stack > 1)
-            *active_end = (char*)task->stkbuf + task->copy_stack;
+        if (task->ctx.copy_stack > 1)
+            *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack;
 #endif
     }
     else {
@@ -450,20 +468,16 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
 #endif
 
     int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE;
-    if (!t->started && !t->copy_stack) {
+    if (!t->ctx.started && !t->ctx.copy_stack) {
         // may need to allocate the stack
-        if (t->stkbuf == NULL) {
-            t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-            if (t->stkbuf == NULL) {
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+            if (t->ctx.stkbuf == NULL) {
 #ifdef COPY_STACKS
                 // fall back to stack copying if mmap fails
-                t->copy_stack = 1;
+                t->ctx.copy_stack = 1;
+                t->ctx.bufsz = 0;
                 t->sticky = 1;
-                t->bufsz = 0;
-                if (always_copy_stacks)
-                    memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-                else
-                    memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx));
 #else
                 jl_throw(jl_memory_exception);
 #endif
@@ -471,28 +485,45 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
 
+    union {
+        _jl_ucontext_t ctx;
+        jl_stack_context_t copy_ctx;
+    } lasttstate;
+
     if (killed) {
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
         lastt->gcstack = NULL;
         lastt->eh = NULL;
-        if (!lastt->copy_stack && lastt->stkbuf) {
+        if (!lastt->ctx.copy_stack && lastt->ctx.stkbuf) {
             // early free of stkbuf back to the pool
             jl_release_task_stack(ptls, lastt);
         }
     }
     else {
+        if (lastt->ctx.copy_stack) { // save the old copy-stack
+#ifdef _OS_WINDOWS_
+            lasttstate.copy_ctx.uc_stack.ss_sp = (char*)ptls->stackbase - ptls->stacksize;
+            lasttstate.copy_ctx.uc_stack.ss_size = ptls->stacksize;
+#endif
 #ifdef COPY_STACKS
-        if (lastt->copy_stack) { // save the old copy-stack
-            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
-            if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
-                sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
-                // TODO: mutex unlock the thread we just switched from
+            if (jl_setjmp(lasttstate.copy_ctx.uc_mcontext, 0)) {
+#ifdef MIGRATE_TASKS
+                ptls = lastt->ptls;
+#endif
+                lastt->ctx.copy_ctx = NULL;
+                sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
                 return;
             }
-        }
-        else
+            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
+            lastt->ctx.copy_ctx = &lasttstate.copy_ctx;
+#else
+            abort();
 #endif
-        *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+        }
+        else {
+            *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+            lastt->ctx.ctx = &lasttstate.ctx;
+        }
     }
 
     // set up global state for new task and clear global state for old task
@@ -507,41 +538,44 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
     ptls->previous_task = lastt;
 #endif
 
-    if (t->started) {
+    if (t->ctx.started) {
+        if (t->ctx.copy_stack) {
 #ifdef COPY_STACKS
-        if (t->copy_stack) {
-            if (lastt->copy_stack) {
+            if (lastt->ctx.copy_stack) {
                 // Switching from copystack to copystack. Clear any shadow stack
                 // memory above the saved shadow stack.
-                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->copy_stack;
+                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->ctx.copy_stack;
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 if (stackbottom < stacktop)
-                    asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                    asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
+            }
+            if (!killed && !lastt->ctx.copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                restore_stack2(&t->ctx, ptls, &lastt->ctx); // half jl_swap_fiber and half restore_stack
             }
-            if (!killed && !lastt->copy_stack) {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                restore_stack2(t, ptls, lastt);
-            } else {
-                tsan_switch_to_copyctx(&t->ctx);
+            else {
+                tsan_switch_to_ctx(&t->ctx);
                 if (killed) {
-                    sanitizer_start_switch_fiber_killed(ptls, t);
-                    tsan_destroy_copyctx(ptls, &lastt->ctx);
-                } else {
-                    sanitizer_start_switch_fiber(ptls, lastt, t);
+                    sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
+                    tsan_destroy_ctx(ptls, &lastt->ctx);
+                }
+                else {
+                    sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
                 }
 
-                if (lastt->copy_stack) {
-                    restore_stack(t, ptls, NULL); // (doesn't return)
+                if (lastt->ctx.copy_stack) {
+                    restore_stack(&t->ctx, ptls, NULL); // (doesn't return)
+                    abort();
                 }
                 else {
-                    restore_stack(t, ptls, (char*)1); // (doesn't return)
+                    restore_stack(&t->ctx, ptls, (char*)1); // (doesn't return)
+                    abort();
                 }
             }
-        }
-        else
 #endif
-        {
-            if (lastt->copy_stack) {
+        }
+        else {
+            if (lastt->ctx.copy_stack) {
                 // Switching away from a copystack to a non-copystack. Clear
                 // the whole shadow stack now, because otherwise we won't know
                 // how much stack memory to clear the next time we switch to
@@ -550,22 +584,23 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 // We're not restoring the stack, but we still need to unpoison the
                 // stack, so it starts with a pristine stack.
-                asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
             }
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_set_fiber(&t->ctx); // (doesn't return)
                 abort(); // unreachable
             }
             else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                if (lastt->copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                if (lastt->ctx.copy_stack) {
                     // Resume at the jl_setjmp earlier in this function,
                     // don't do a full task swap
                     tsan_switch_to_ctx(&t->ctx);
                     jl_set_fiber(&t->ctx); // (doesn't return)
+                    abort();
                 }
                 else {
                     jl_swap_fiber(&lastt->ctx, &t->ctx);
@@ -574,41 +609,58 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
     else {
-        if (lastt->copy_stack) {
+#ifdef _COMPILER_TSAN_ENABLED_
+        t->ctx.tsan_state = __tsan_create_fiber(0);
+#endif
+        if (lastt->ctx.copy_stack) {
             uintptr_t stacktop = (uintptr_t)ptls->stackbase;
             uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
             // We're not restoring the stack, but we still need to unpoison the
             // stack, so it starts with a pristine stack.
-            asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+            asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
         }
-        if (t->copy_stack && always_copy_stacks) {
+        if (t->ctx.copy_stack) {
+#ifdef COPY_STACKS
             tsan_switch_to_ctx(&t->ctx);
+            // create a temporary non-copy_stack context for starting this fiber
+            jl_ucontext_t ctx = t->ctx;
+            ctx.ctx = NULL;
+            ctx.stkbuf = (char*)ptls->stackbase - ptls->stacksize;
+            ctx.bufsz = ptls->stacksize;
+            ctx.copy_stack = 0;
+            ctx.started = 0;
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
-            } else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
+                if (lastt->ctx.copy_stack)
+                    restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                else
+                    jl_start_fiber_set(&ctx);
+                abort();
+            }
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                abort();
+            }
+            else {
+                jl_start_fiber_swap(&lastt->ctx, &ctx);
             }
-#ifdef COPY_STACKS
-#if defined(_OS_WINDOWS_)
-            jl_setcontext(&t->ctx.copy_ctx);
 #else
-            jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
-#endif
+            abort();
 #endif
-            abort(); // unreachable
         }
         else {
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
-            sanitizer_start_switch_fiber(ptls, lastt, t);
-            if (lastt->copy_stack) {
-                // Resume at the jl_setjmp earlier in this function
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                // copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here
                 tsan_switch_to_ctx(&t->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
@@ -618,7 +670,14 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
             }
         }
     }
-    sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
+
+#ifdef MIGRATE_TASKS
+    ptls = lastt->ptls;
+#endif
+    assert(ptls);
+    assert(lastt == jl_atomic_load_relaxed(&ptls->current_task));
+    lastt->ctx.ctx = NULL;
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
 }
 
 JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
@@ -630,7 +689,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
         return;
     }
     int8_t gc_state = jl_gc_unsafe_enter(ptls);
-    if (t->started && t->stkbuf == NULL)
+    if (t->ctx.started && t->ctx.stkbuf == NULL)
         jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
         jl_error("task switch not allowed from inside gc finalizer");
@@ -655,7 +714,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
     ptls->previous_task = NULL;
     assert(t != ct);
     assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid);
-    if (!t->sticky && !t->copy_stack)
+    if (!t->sticky && !t->ctx.copy_stack)
         jl_atomic_store_release(&t->tid, -1);
 #else
     assert(ptls == ct->ptls);
@@ -688,7 +747,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
     // NULL exception objects are used when rethrowing. we don't have a handler to process
     // the exception stack, so at least report the exception at the top of the stack.
     if (!e)
-        e = jl_current_exception();
+        e = jl_current_exception(ct);
 
     jl_printf((JL_STREAM*)STDERR_FILENO, "fatal: error thrown and no exception handler available.\n");
     jl_static_show((JL_STREAM*)STDERR_FILENO, e);
@@ -712,48 +771,31 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 #define pop_timings_stack() /* Nothing */
 #endif
 
-#define throw_internal_body(altstack)                                          \
-    assert(!jl_get_safe_restore());                                            \
-    jl_ptls_t ptls = ct->ptls;                                                 \
-    ptls->io_wait = 0;                                                         \
-    jl_gc_unsafe_enter(ptls);                                                  \
-    if (exception) {                                                           \
-        /* The temporary ptls->bt_data is rooted by special purpose code in the\
-           GC. This exists only for the purpose of preserving bt_data until we \
-           set ptls->bt_size=0 below. */                                       \
-        jl_push_excstack(ct, &ct->excstack, exception,                             \
-                          ptls->bt_data, ptls->bt_size);                       \
-        ptls->bt_size = 0;                                                     \
-    }                                                                          \
-    assert(ct->excstack && ct->excstack->top);                                 \
-    jl_handler_t *eh = ct->eh;                                                 \
-    if (eh != NULL) {                                                          \
-        if (altstack) ptls->sig_exception = NULL;                              \
-        pop_timings_stack()                                                    \
-        asan_unpoison_task_stack(ct, &eh->eh_ctx);                             \
-        jl_longjmp(eh->eh_ctx, 1);                                             \
-    }                                                                          \
-    else {                                                                     \
-        jl_no_exc_handler(exception, ct);                                      \
-    }                                                                          \
-    assert(0);
-
 static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
-CFI_NORETURN
     JL_GC_PUSH1(&exception);
-    throw_internal_body(0);
-    jl_unreachable();
-}
-
-/* On the signal stack, we don't want to create any asan frames, but we do on the
-   normal, stack, so we split this function in two, depending on which context
-   we're calling it in. This also lets us avoid making a GC frame on the altstack,
-   which might end up getting corrupted if we recur here through another signal. */
-JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception)
-{
-CFI_NORETURN
-    throw_internal_body(1);
+    jl_ptls_t ptls = ct->ptls;
+    ptls->io_wait = 0;
+    jl_gc_unsafe_enter(ptls);
+    if (exception) {
+        /* The temporary ptls->bt_data is rooted by special purpose code in the\
+           GC. This exists only for the purpose of preserving bt_data until we
+           set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+    }
+    assert(ct->excstack && ct->excstack->top);
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        pop_timings_stack()
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp(eh->eh_ctx, 1);
+    }
+    else {
+        jl_no_exc_handler(exception, ct);
+    }
+    assert(0);
     jl_unreachable();
 }
 
@@ -783,24 +825,6 @@ JL_DLLEXPORT void jl_rethrow(void)
     throw_internal(ct, NULL);
 }
 
-// Special case throw for errors detected inside signal handlers.  This is not
-// (cannot be) called directly in the signal handler itself, but is returned to
-// after the signal handler exits.
-JL_DLLEXPORT JL_NO_ASAN void JL_NORETURN jl_sig_throw(void)
-{
-CFI_NORETURN
-    jl_jmp_buf *safe_restore = jl_get_safe_restore();
-    jl_task_t *ct = jl_current_task;
-    if (safe_restore) {
-        asan_unpoison_task_stack(ct, safe_restore);
-        jl_longjmp(*safe_restore, 1);
-    }
-    jl_ptls_t ptls = ct->ptls;
-    jl_value_t *e = ptls->sig_exception;
-    JL_GC_PROMISE_ROOTED(e);
-    throw_internal_altstack(ct, e);
-}
-
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now
@@ -889,31 +913,46 @@ As in DotMix and SplitMix, each task is assigned unique task "pedigree"
 coordinates. Our pedigree construction is a bit different and uses only binary
 coordinates rather than arbitrary integers. Each pedigree is an infinite
 sequence of ones and zeros with only finitely many ones. Each task has a "fork
-index": the root task has index 0; the fork index of a task the jth child task
-of a parent task with fork index i is i+j. The root task's coordinates are all
-zeros; each child task's coordinates are the same as its parents except at its
-fork index, where the parent has a zero while the child has a one. A task's
-coordinates after its fork index are all zeros. The coordinates of a tasks
-ancestors are all prefixes of its own coordinates, padded with zeros.
-
-Also as in DotMix and SplitMix, we generate a sequence of pseudorandom weights
+index": the root task has index 0; the fork index of the jth child task of a
+parent task with fork index i is i+j. The root task's coordinates are all zeros;
+each child task's coordinates are the same as its parents except at its fork
+index, where the parent has a zero while the child has a one; each task's
+coordinates after its fork index are all zeros. The last common ancestor of two
+tasks has coordinates that are the longest common prefix of their coordinates.
+
+Also as in DotMix and SplitMix, we generate a sequence of pseudorandom "weights"
 to combine with the coordinates of each task. This sequence is common across all
-tasks, and different mix values for each task derive from their coordinates
-being different. In DotMix and SplitMix, this is a literal dot product: the
-pseudorandom weights are multiplied by corresponding task coordinate and added
-up. While this does provably make collisions as unlikely as randomly assigned
-task seeds, this linear construction can be used to create linearly correlated
-states between tasks. However, it turns out that the compression construction
-need not be linear, commutative, associative, etc. which allows us to avoid any
-linear or other obvious correlations between related sets of tasks.
-
-To generalize SplitMix's optimized dot product construction, we similarly
-compute each task's compression function value incrementally by combining the
-parent's compression value with pseudorandom weight corresponding with the
-child's fork index. Formally, if the parent's compression value is c then we can
-compute the child's compression value as c′ = f(c, wᵢ) where w is the vector of
-pseudorandom weights. What is f? It can be any function that is bijective in
-each argument for all values of the other argument:
+tasks, and different mix values for tasks stem entirely from task coordinates
+being different. In DotMix and SplitMix the mix function is a literal dot
+product: the pseudorandom weights are multiplied by corresponding task
+coordinate and summed. While this does provably make collisions as unlikely as
+random seeding, this linear construction can be used to create linearly
+correlated states between more than two tasks. However, it turns out that the
+compression mixing construction need not be linear, nor commutative, nor
+associative. In fact, the mixing function need only be bijective in both
+arguments. This allows us to use a much more non-trivial mixing function and
+avoid any linear or other obvious correlations between related sets of tasks.
+
+We maintain an LCG in rngState[4] to generate pseudorandom weights. An LCG by
+itself is a very bad RNG, but we combine this one with xoshiro256 state
+registers in a non-trivial way and then apply the PCG-RXS-M-XS-64 output
+function to that. Even if the xoshiro256 states are all zeros, which they should
+never be, the output would be the same as PCG-RXS-M-XS-64, which is a solid
+statistical RNG. Each time a child is forked, we update the LCG in both parent
+and child tasks, corresponding to increasing the fork index. In the parent,
+that's all we have to do -- the main RNG state remains unchanged. Recall that
+spawning a child should not affect subsequent RNG draws in the parent. The next
+time the parent forks a child, the mixing weight used will be different. In the
+child, we use the LCG state to perturb the child's main RNG state registers,
+rngState[0..3].
+
+To generalize SplitMix's optimized dot product construction, we also compute
+each task's compression function value incrementally by combining the parent's
+compression value with pseudorandom weight corresponding with the child's fork
+index. Formally, if the parent's compression value is c then we can compute the
+child's compression value as c′ = f(c, wᵢ) where w is the vector of pseudorandom
+weights. What is f? It can be any function that is bijective in each argument
+for all values of the other argument:
 
     * For all c: w ↦ f(c, w) is bijective
     * For all w: c ↦ f(c, w) is bijective
@@ -933,57 +972,80 @@ bijection on each argument witnessed by these inverses:
     * c′ ↦ (2c′+1)(2w+1)⁻¹÷2 % 2^64
     * w′ ↦ (2c+1)⁻¹(2w′+1)÷2 % 2^64
 
-The second PCG output step is a bijection and designed to be significantly
-non-linear -- non-linear enough to mask the linearity of the LCG that drives the
-PCG-RXS-M-XS-64 RNG and allows it to pass statistical RNG test suites despite
-having the same size state and output. In particular, since this mixing function
-is highly non-associative and non-linear, we (hopefully) don't have any
-discernible relationship between these values:
+Here (2w+1)⁻¹ is the modular inverse of (2w+1) mod 2^64, guaranteed to exist
+since 2w+1 is odd. The second PCG output step is a bijection and designed to be
+significantly non-linear -- non-linear enough to mask the linearity of the LCG
+that drives the PCG-RXS-M-XS-64 RNG and allows it to pass statistical RNG test
+suites despite having the same size state and output. In particular, since this
+mixing function is highly non-associative and non-linear, we (hopefully) don't
+have any discernible relationship between these values:
 
     * c₀₀ = c
     * c₁₀ = f(c, wᵢ)
     * c₀₁ = f(c, wⱼ)
     * c₁₁ = f(f(c, wᵢ), wⱼ)
 
-When f is simply `+` then these have a very obvious relationship:
+When f is simply `+` then these have a very obvious linear relationship:
 
     c₀₀ + c₁₁ == c₁₀ + c₀₁
 
-This relationship holds regardless of what wᵢ and wⱼ are and is precisely what
-allows easy creation of correlated tasks with the DotMix/SplitMix construction
-that we previously used. Expressing any relationship between these values with
-our mixing function would require inverting the PCG output function (doable but
-non-trivial), knowing the weights wᵢ and wⱼ, and then applying the inversion
-functions for those weights appropriately. Since the weights are pseudo-randomly
-generated and not directly observable, this is infeasible.
-
-We maintain an LCG in rngState[4] to generate pseudorandom weights. An LCG by
-itself is a very bad RNG, but we combine this one with xoshiro256 state
-registers in a non-trivial way and then apply the PCG-RXS-M-XS-64 output
-function to that. Even if the xoshiro256 states are all zeros, which they should
-never be, the output would be the same as PCG-RXS-M-XS-64, which is a solid
-statistical RNG.
-
-Each time a child is forked, we update the LCG in both parent and child tasks,
-corresponding to increasing the fork index. In the parent, that's all we have to
-do -- the main RNG state remains unchanged. Recall that spawning a child should
-*not* affect subsequent RNG draws in the parent. The next time the parent forks
-a child, the mixing weight used will be different. In the child, we use the LCG
-state to perturb the child's main RNG state registers, rngState[0..3].
-
-Since we want these registers to behave independently, we use four different
-variations on f to mix the LCG state with each of the four main RNG registers.
-Each variation first xors the LCG state with a different random constant before
-combining that value above with the old register state via multiplication; the
-PCG-RXS-M-XS-64 output function is then applied to that mixed state, with a
-different multiplier constant for each variation / register index. Xor is used
-in the first step since we multiply the result with the state immediately after
-and multiplication distributes over `+` and commutes with `*`, which makes both
-options suspect; multiplication doesn't distribute over or commute with xor. We
-also use a different odd multiplier in PCG-RXS-M-XS-64 for each RNG register.
-These three sources of variation (different xor constants, different xoshiro256
-state, different PCG multipliers) are sufficient for each of the four outputs to
-behave statistically independently.
+This relationship holds regardless of what wᵢ and wⱼ are and allows easy
+creation of correlated tasks with the way we were previously using the
+DotMix/SplitMix construction. SplitMix itself does not output the raw dot
+product, probably because the authors were aware of this linearity issue;
+instead: they apply the MurmurHash3 finalizer to the dot-product to get an
+output that masks linear relationships. I had failed to understand the
+importance of that finalizer. One possible fix for our task splitting
+correlation issue would have been to also apply a non-linear finalizer
+(MurmurHash3 is one of the best) to our dot product before using it to perturb
+the xoshiro256 state. There are two problems with that fix, however:
+
+1. It requires accumulating the dot product somewhere. The old approach
+   accumulates dot products directly in the xoshiro registers; if we were to
+   accumulate and then finalize, the dot product has to be stored somewhere
+   in each task. We want our tasks to be as small as possible, so adding
+   another 64-bit field that we never change would be unfortunate.
+
+2. We still need to apply the PCG finalizer to the internal LCG in order to
+   generate dot product weights. SplitMix uses a shared static array of
+   1024 pre-generated random weights; we could do the same, but that limits
+   the number of task splits to a max of 1024 before weights have to be
+   reused. We can't use the LCG directly because it's highly linear and we
+   need four variations of the internal RNG stream for the four xoshiro256
+   registers. That means we'd have to apply the PCG finalizer, add it to
+   our dot product accumulator field in the child task, then apply the
+   MurmurHash3 finalizer to that dot product and use the result to purturb
+   the main RNG state.
+
+We avoid both problems by recognizing that the mixing function can be much less
+simple while still allowing the essential collision resistance proof to go
+through. We replace addition with a highly non-linear, non-associative mixing
+function that includes the PCG output function. This allows us to continue to use
+the xoshiro state registers for mixing function accumulation as well as for its
+primary purpose. It also obviates the need for double finalization: it would
+have been disastrous to use LCG state directly as weights for a linear
+construction like SplitMix, but using it as the input to a non-linear mixer that
+includes the strongest PCG output function is reasonable (and precisely what
+PCG-RXS-M-XS-64 does). Since the output of the mixing function is already
+non-linearly finalized, there's no need to apply yet another finalizer.
+
+Since there are four xoshiro256 registers that we want to behave independently
+as mix accumulators, we use four different variations on the mixing function,
+keyed by register index (0-3). Each variation first xors the LCG state with a
+different random constant before combining that value above with the old
+register state via multiplication. The PCG-RXS-M-XS-64 output function is then
+applied to that mixed state, with a different multiplier constant for each
+variation / register index. Xor is used in the first step since we multiply the
+result with the state immediately after and multiplication distributes over `+`
+and commutes with `*`, making both suspect options. Multiplication doesn't
+distribute over or commute with xor. We also use a different odd multiplier in
+PCG-RXS-M-XS-64 for each RNG register. These four sources of variation
+(different initial state, different xor constants, different xoshiro256 state,
+different PCG multipliers) are hopefully sufficient for each of the four outputs
+to behave statistically independently, in the sense that even if two different
+tasks happen to have a state collision in one 64-bit register, it is highly
+improbable that all four registers collide at the same time, giving an actual
+main RNG state collision.
 
 [1]: https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf
 
@@ -1034,26 +1096,28 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
     jl_set_typetagof(t, jl_task_tag, 0);
     JL_PROBE_RT_NEW_TASK(ct, t);
-    t->copy_stack = 0;
+    t->ctx.copy_stack = 0;
     if (ssize == 0) {
         // stack size unspecified; use default
         if (always_copy_stacks) {
-            t->copy_stack = 1;
-            t->bufsz = 0;
+            t->ctx.copy_stack = 1;
+            t->ctx.bufsz = 0;
         }
         else {
-            t->bufsz = JL_STACK_SIZE;
+            t->ctx.bufsz = JL_STACK_SIZE;
         }
-        t->stkbuf = NULL;
+        t->ctx.stkbuf = NULL;
     }
     else {
         // user requested dedicated stack of a certain size
         if (ssize < MINSTKSZ)
             ssize = MINSTKSZ;
-        t->bufsz = ssize;
-        t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-        if (t->stkbuf == NULL)
+        t->ctx.bufsz = ssize;
+        t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.bufsz = 0;
             jl_throw(jl_memory_exception);
+        }
     }
     t->next = jl_nothing;
     t->queue = jl_nothing;
@@ -1072,30 +1136,21 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->sticky = 1;
     t->gcstack = NULL;
     t->excstack = NULL;
-    t->started = 0;
+    t->ctx.started = 0;
     t->priority = 0;
-    jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved
+    jl_atomic_store_relaxed(&t->tid, -1);
     t->threadpoolid = ct->threadpoolid;
     t->ptls = NULL;
     t->world_age = ct->world_age;
     t->reentrant_timing = 0;
     jl_timing_task_init(t);
 
-#ifdef COPY_STACKS
-    if (!t->copy_stack) {
-#if defined(JL_DEBUG_BUILD)
-        memset(&t->ctx, 0, sizeof(t->ctx));
-#endif
-    }
-    else {
-        if (always_copy_stacks)
-            memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-        else
-            memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx));
-    }
-#endif
+    if (t->ctx.copy_stack)
+        t->ctx.copy_ctx = NULL;
+    else
+        t->ctx.ctx = NULL;
 #ifdef _COMPILER_TSAN_ENABLED_
-    t->ctx.tsan_state = __tsan_create_fiber(0);
+    t->ctx.tsan_state = NULL;
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
     t->ctx.asan_fake_stack = NULL;
@@ -1159,7 +1214,7 @@ CFI_NORETURN
     jl_task_t *ct = jl_current_task;
 #endif
     jl_ptls_t ptls = ct->ptls;
-    sanitizer_finish_switch_fiber(ptls->previous_task, ct);
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &ct->ctx);
     _start_task();
 }
 
@@ -1173,6 +1228,7 @@ CFI_NORETURN
 #else
     jl_task_t *ct = jl_current_task;
 #endif
+    ct->ctx.ctx = NULL;
     jl_ptls_t ptls = ct->ptls;
     jl_value_t *res;
     assert(ptls->finalizers_inhibited == 0);
@@ -1180,11 +1236,11 @@ CFI_NORETURN
 #ifdef MIGRATE_TASKS
     jl_task_t *pt = ptls->previous_task;
     ptls->previous_task = NULL;
-    if (!pt->sticky && !pt->copy_stack)
+    if (!pt->sticky && !pt->ctx.copy_stack)
         jl_atomic_store_release(&pt->tid, -1);
 #endif
 
-    ct->started = 1;
+    ct->ctx.started = 1;
     JL_PROBE_RT_START_TASK(ct);
     jl_timing_block_task_enter(ct, ptls, NULL);
     if (jl_atomic_load_relaxed(&ct->_isexception)) {
@@ -1203,7 +1259,7 @@ CFI_NORETURN
             res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
-            res = jl_current_exception();
+            res = jl_current_exception(ct);
             jl_atomic_store_relaxed(&ct->_isexception, 1);
             goto skip_pop_exception;
         }
@@ -1221,64 +1277,52 @@ skip_pop_exception:;
 #ifdef _OS_WINDOWS_
 #define setcontext jl_setcontext
 #define swapcontext jl_swapcontext
-#define makecontext jl_makecontext
 #endif
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static int make_fiber(jl_ucontext_t *t, _jl_ucontext_t *ctx)
 {
 #ifndef _OS_WINDOWS_
-    int r = getcontext(t);
-    if (r != 0)
-        jl_error("getcontext failed");
+    int r = getcontext(ctx);
+    if (r != 0) abort();
 #endif
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->uc_stack.ss_sp = stk;
-    t->uc_stack.ss_size = *ssize;
+    ctx->uc_stack.ss_sp = (char*)t->stkbuf;
+    ctx->uc_stack.ss_size = t->bufsz;
 #ifdef _OS_WINDOWS_
-    makecontext(t, &start_task);
+    jl_makecontext(ctx, &start_task);
 #else
-    t->uc_link = NULL;
-    makecontext(t, &start_task, 0);
+    ctx->uc_link = NULL;
+    makecontext(ctx, &start_task, 0);
 #endif
-    return (char*)stk;
+    return 1;
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
+    setcontext(&ctx);
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
     assert(lastt);
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    swapcontext(lastt->ctx, &ctx);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    swapcontext(lastt->ctx, t->ctx);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
-}
-#endif
-
-#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM)
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
-    if (stkbuf == NULL)
-        return NULL;
-#ifndef __clang_gcanalyzer__
-    ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
-    ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
-#endif
-    return stkbuf;
+    setcontext(t->ctx);
 }
 #endif
 
 #if defined(JL_HAVE_UNW_CONTEXT)
+#ifdef _OS_WINDOWS_
+#error unw_context_t not defined in Windows
+#endif
 static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 {
     volatile int returns = 0;
@@ -1292,15 +1336,15 @@ static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
     unw_resume(&c);
@@ -1308,14 +1352,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 #elif defined(JL_HAVE_ASM)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
     tsan_switch_to_ctx(t);
     jl_set_fiber(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->ctx.uc_mcontext, 1);
+    jl_longjmp(t->ctx->uc_mcontext, 1);
 }
 #endif
 
@@ -1336,14 +1380,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
-    int r = unw_getcontext(&t->ctx);
+    int r = unw_getcontext(t->ctx);
     if (r)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
 #if defined __linux__
@@ -1359,43 +1403,46 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-    r = unw_getcontext(&t->ctx);
+    r = unw_getcontext(t->ctx);
     if (r != 0)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
     if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
         abort();
     if (unw_set_reg(&c, UNW_REG_IP, fn))
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 #endif
 
 #if defined(JL_HAVE_ASM)
+#ifdef _OS_WINDOWS_
+#error JL_HAVE_ASM not defined in Windows
+#endif
 JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
 #ifdef JL_HAVE_UNW_CONTEXT
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
 #else
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #endif
     tsan_switch_to_ctx(t);
@@ -1403,8 +1450,9 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *
 }
 JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+CFI_NORETURN
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
 #ifdef _CPU_X86_64_
@@ -1473,115 +1521,6 @@ JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 }
 #endif
 
-#if defined(JL_HAVE_SIGALTSTACK)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN support not currently implemented for this tasking model
-#endif
-
-static void start_basefiber(int sig)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
-        start_task(); // sanitizer_finish_switch_fiber is part of start_task
-}
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    stack_t uc_stack, osigstk;
-    struct sigaction sa, osa;
-    sigset_t set, oset;
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    // setup
-    jl_ptls_t ptls = jl_current_task->ptls;
-    _jl_ucontext_t base_ctx;
-    memcpy(&base_ctx, &ptls->base_ctx, sizeof(base_ctx));
-    sigfillset(&set);
-    if (pthread_sigmask(SIG_BLOCK, &set, &oset) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("pthread_sigmask failed");
-    }
-    uc_stack.ss_sp = stk;
-    uc_stack.ss_size = *ssize;
-    uc_stack.ss_flags = 0;
-    if (sigaltstack(&uc_stack, &osigstk) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaltstack failed");
-    }
-    memset(&sa, 0, sizeof(sa));
-    sigemptyset(&sa.sa_mask);
-    sa.sa_handler = start_basefiber;
-    sa.sa_flags = SA_ONSTACK;
-    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaction failed");
-    }
-    // emit signal
-    pthread_kill(pthread_self(), SIGUSR2); // initializes jl_basectx
-    sigdelset(&set, SIGUSR2);
-    sigsuspend(&set);
-    // cleanup
-    if (sigaction(SIGUSR2, &osa, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaction failed");
-    }
-    if (osigstk.ss_size < MINSTKSZ && (osigstk.ss_flags | SS_DISABLE))
-       osigstk.ss_size = MINSTKSZ;
-    if (sigaltstack(&osigstk, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaltstack failed");
-    }
-    if (pthread_sigmask(SIG_SETMASK, &oset, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("pthread_sigmask failed");
-    }
-    if (&ptls->base_ctx != t) {
-        memcpy(&t, &ptls->base_ctx, sizeof(base_ctx));
-        memcpy(&ptls->base_ctx, &base_ctx, sizeof(base_ctx)); // restore COPY_STACKS context
-    }
-    return (char*)stk;
-}
-static void jl_start_fiber_set(jl_ucontext_t *t) {
-    jl_longjmp(t->ctx.uc_mcontext, 1); // (doesn't return)
-}
-static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    assert(lastt);
-    if (lastt && jl_setjmp(lastt->ctx.uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t);
-}
-static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t); // doesn't return
-}
-static void jl_set_fiber(jl_ucontext_t *t)
-{
-    jl_longjmp(t->ctx.uc_mcontext, 1);
-}
-#endif
-
-#if defined(JL_HAVE_ASYNCIFY)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN support not currently implemented for this tasking model
-#endif
-
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
-{
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->stackbottom = stk;
-    t->stacktop = ((char*)stk) + *ssize;
-    return (char*)stk;
-}
-// jl_*_fiber implemented in js
-#endif
-
 // Initialize a root task using the given stack.
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 {
@@ -1611,14 +1550,14 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     }
 #endif
     if (always_copy_stacks) {
-        ct->copy_stack = 1;
-        ct->stkbuf = NULL;
-        ct->bufsz = 0;
+        ct->ctx.copy_stack = 1;
+        ct->ctx.stkbuf = NULL;
+        ct->ctx.bufsz = 0;
     }
     else {
-        ct->copy_stack = 0;
-        ct->stkbuf = stack;
-        ct->bufsz = ssize;
+        ct->ctx.copy_stack = 0;
+        ct->ctx.stkbuf = stack;
+        ct->ctx.bufsz = ssize;
     }
 
 #ifdef USE_TRACY
@@ -1626,7 +1565,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     strcpy(unique_string, "Root");
     ct->name = unique_string;
 #endif
-    ct->started = 1;
+    ct->ctx.started = 1;
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
     ct->tls = jl_nothing;
@@ -1650,6 +1589,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     JL_GC_PROMISE_ROOTED(ct);
     jl_set_pgcstack(&ct->gcstack);
     assert(jl_current_task == ct);
+    assert(jl_current_task->ptls == ptls);
 
 #ifdef _COMPILER_TSAN_ENABLED_
     ct->ctx.tsan_state = __tsan_get_current_fiber();
@@ -1665,21 +1605,18 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     if (always_copy_stacks) {
         // when this is set, we will attempt to corrupt the process stack to switch tasks,
         // although this is unreliable, and thus not recommended
-        ptls->stackbase = stack_hi;
-        ptls->stacksize = ssize;
-#ifdef _OS_WINDOWS_
-        ptls->copy_stack_ctx.uc_stack.ss_sp = stack_hi;
-        ptls->copy_stack_ctx.uc_stack.ss_size = ssize;
-#endif
-        if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
-            start_task(); // sanitizer_finish_switch_fiber is part of start_task
+        ptls->stackbase = jl_get_frame_addr();
+        ptls->stacksize =  (char*)ptls->stackbase - (char*)stack_lo;
     }
     else {
-        ssize = JL_STACK_SIZE;
-        char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+        size_t bufsz = JL_STACK_SIZE;
+        void *stkbuf = jl_malloc_stack(&bufsz, NULL);
         if (stkbuf != NULL) {
-            ptls->stackbase = stkbuf + ssize;
-            ptls->stacksize = ssize;
+            ptls->stackbase = (char*)stkbuf + bufsz;
+            ptls->stacksize = bufsz;
+        }
+        else {
+            ptls->stacksize = 0;
         }
     }
 #endif
@@ -1692,7 +1629,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 
 JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    return t->started;
+    return t->ctx.started;
 }
 
 JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT
diff --git a/src/threading.c b/src/threading.c
index 8879a34d6b701..44b1192528531 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -74,6 +74,16 @@ JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
 
 JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 {
+#ifdef _OS_DARWIN_
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL && ct->ptls) {
+        if (sr == NULL)
+            pthread_setspecific(jl_safe_restore_key, (void*)sr);
+        ct->ptls->safe_restore = sr;
+        if (sr == NULL)
+            return;
+    }
+#endif
     pthread_setspecific(jl_safe_restore_key, (void*)sr);
 }
 #endif
@@ -82,51 +92,17 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 // The tls_states buffer:
 //
 // On platforms that do not use ELF (i.e. where `__thread` is emulated with
-// lower level API) (Mac, Windows), we use the platform runtime API to create
+// lower level API) (Windows), we use the platform runtime API to create
 // TLS variable directly.
 // This is functionally equivalent to using `__thread` but can be
 // more efficient since we can have better control over the creation and
 // initialization of the TLS buffer.
 //
-// On platforms that use ELF (Linux, FreeBSD), we use a `__thread` variable
+// On platforms that support native TLS (ELF platforms + Macos) we use a `__thread` variable
 // as the fallback in the shared object. For better efficiency, we also
 // create a `__thread` variable in the main executable using a static TLS
 // model.
-#if defined(_OS_DARWIN_)
-// Mac doesn't seem to have static TLS model so the runtime TLS getter
-// registration will only add overhead to TLS access. The `__thread` variables
-// are emulated with `pthread_key_t` so it is actually faster to use it directly.
-static pthread_key_t jl_pgcstack_key;
-
-__attribute__((constructor)) void jl_init_tls(void)
-{
-    pthread_key_create(&jl_pgcstack_key, NULL);
-}
-
-JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
-{
-    return (jl_gcframe_t**)pthread_getspecific(jl_pgcstack_key);
-}
-
-void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
-{
-    pthread_setspecific(jl_pgcstack_key, (void*)pgcstack);
-}
-
-void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k)
-{
-    // for codegen
-    *f = pthread_getspecific;
-    *k = jl_pgcstack_key;
-}
-
-
-JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k)
-{
-    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
-}
-
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 // Apparently windows doesn't have a static TLS model (or one that can be
 // reliably used from a shared library) either..... Use `TLSAlloc` instead.
 
@@ -338,6 +314,18 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
     return -1; // everything else uses threadpool -1 (does not belong to any threadpool)
 }
 
+// get thread local rng
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT
+{
+    return jl_current_task->ptls->rngseed;
+}
+
+// get thread local rng
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT
+{
+    jl_current_task->ptls->rngseed = new_seed;
+}
+
 jl_ptls_t jl_init_threadtls(int16_t tid)
 {
 #ifndef _OS_WINDOWS_
@@ -404,6 +392,12 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     jl_fence();
     uv_mutex_unlock(&tls_lock);
 
+#if !defined(_OS_WINDOWS_) && !defined(JL_DISABLE_LIBUNWIND) && !defined(LLVMLIBUNWIND)
+    // ensures libunwind TLS space for this thread is allocated eagerly
+    // to make unwinding async-signal-safe even when using thread local caches.
+    unw_ensure_tls();
+#endif
+
     return ptls;
 }
 
@@ -432,10 +426,33 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
     return &ct->gcstack;
 }
 
+
+void jl_safepoint_suspend_all_threads(jl_task_t *ct)
+{
+    // TODO: prevent jl_n_threads changing or jl_safepoint_resume_thread calls on another thread
+    //uv_mutex_lock(&tls_lock);
+    //disallow_resume = ct->tid;
+    //uv_mutex_unlock(&tls_lock);
+    for (int16_t tid = 0; tid < jl_atomic_load_relaxed(&jl_n_threads); tid++) {
+        if (tid != jl_atomic_load_relaxed(&ct->tid))
+            jl_safepoint_suspend_thread(tid, 1);
+    };
+}
+
+void jl_safepoint_resume_all_threads(jl_task_t *ct)
+{
+    //uv_mutex_lock(&tls_lock);
+    //if (disallow_resume != ct->tid) return;
+    //uv_mutex_unlock(&tls_lock);
+    for (int16_t tid = 0; tid < jl_atomic_load_relaxed(&jl_n_threads); tid++) {
+        if (tid != jl_atomic_load_relaxed(&ct->tid))
+            jl_safepoint_resume_thread(tid);
+    };
+}
+
 void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
 void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT;
-
-void jl_free_thread_gc_state(jl_ptls_t ptls);
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
 
 static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 {
@@ -464,7 +481,7 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
         }
         if (signal_stack != NULL) {
             if (signal_stack_size)
-                jl_free_stack(signal_stack, signal_stack_size);
+                _jl_free_stack(ptls ,signal_stack, signal_stack_size);
             else
                 free(signal_stack);
         }
@@ -525,7 +542,6 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 //// the other threads time to fail and emit their failure message
 //__attribute__((destructor)) static void _waitthreaddeath(void) { sleep(1); }
 
-JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
 JL_DLLEXPORT ssize_t jl_tls_offset = -1;
@@ -708,14 +724,9 @@ void jl_init_threading(void)
         }
         else {
             // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
-            // set the number of mark threads to half of compute threads
+            // set the number of mark threads to the number of compute threads
             // and number of sweep threads to 0
-            if (nthreads <= 1) {
-                jl_n_markthreads = 0;
-            }
-            else {
-                jl_n_markthreads = (nthreads / 2) - 1;
-            }
+            jl_n_markthreads = nthreads - 1; // -1 for the master (mutator) thread which may also do marking
             // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
             // cap the number of threads that may run the mark phase to
             // the number of CPU cores
@@ -740,10 +751,10 @@ void jl_init_threading(void)
     jl_atomic_store_release(&jl_all_tls_states, (jl_ptls_t*)calloc(jl_all_tls_states_size, sizeof(jl_ptls_t)));
     jl_atomic_store_release(&jl_n_threads, jl_all_tls_states_size);
     jl_n_gcthreads = ngcthreads;
-    gc_first_tid = nthreads;
+    gc_first_tid = nthreads + nthreadsi;
 }
 
-static uv_barrier_t thread_init_done;
+uv_barrier_t thread_init_done;
 
 void jl_start_threads(void)
 {
@@ -782,30 +793,20 @@ void jl_start_threads(void)
     uv_barrier_init(&thread_init_done, nthreads);
 
     // GC/System threads need to be after the worker threads.
-    int nworker_threads = nthreads - ngcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
 
-    for (i = 1; i < nthreads; ++i) {
+    for (i = 1; i < nmutator_threads; ++i) {
         jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
-        if (i < nworker_threads) {
-            uv_thread_create(&uvtid, jl_threadfun, t);
-            if (exclusive) {
-                mask[i] = 1;
-                uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-                mask[i] = 0;
-            }
-        }
-        else if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
-            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
-        }
-        else {
-            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
+        uv_thread_create(&uvtid, jl_threadfun, t);
+        if (exclusive) {
+            mask[i] = 1;
+            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+            mask[i] = 0;
         }
         uv_thread_detach(&uvtid);
     }
-
-    uv_barrier_wait(&thread_init_done);
 }
 
 _Atomic(unsigned) _threadedregion; // keep track of whether to prioritize IO or threading
@@ -989,6 +990,52 @@ JL_DLLEXPORT int jl_alignment(size_t sz)
     return jl_gc_alignment(sz);
 }
 
+// Return values:
+//     0  == success
+//     1  == invalid thread id provided
+//     2  == ptls2 was NULL
+//     <0 == uv_thread_getaffinity exit code
+JL_DLLEXPORT int jl_getaffinity(int16_t tid, char *mask, int cpumasksize) {
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
+        return 1;
+
+    // TODO: use correct lock. system_id is only legal if the thread is alive.
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL)
+        return 2;
+    uv_thread_t uvtid = ptls2->system_id;
+
+    int ret_uv = uv_thread_getaffinity(&uvtid, mask, cpumasksize);
+    if (ret_uv != 0)
+        return ret_uv;
+
+    return 0; // success
+}
+
+// Return values:
+//     0  == success
+//     1  == invalid thread id provided
+//     2  == ptls2 was NULL
+//     <0 == uv_thread_getaffinity exit code
+JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize) {
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
+        return 1;
+
+    // TODO: use correct lock. system_id is only legal if the thread is alive.
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL)
+        return 2;
+    uv_thread_t uvtid = ptls2->system_id;
+
+    int ret_uv = uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+    if (ret_uv != 0)
+        return ret_uv;
+
+    return 0; // success
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/threading.h b/src/threading.h
index 260ecffa30dd5..cb26537699713 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -12,6 +12,8 @@ extern "C" {
 
 #define PROFILE_JL_THREADING            0
 
+extern uv_barrier_t thread_init_done;
+
 extern _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
 
 typedef struct _jl_threadarg_t {
diff --git a/src/timing.c b/src/timing.c
index 590e52b8d523d..265e50ad3dd74 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -6,7 +6,7 @@
 #include "options.h"
 #include "stdio.h"
 
-#if defined(USE_TRACY) || defined(USE_ITTAPI)
+#if defined(USE_TRACY) || defined(USE_ITTAPI) || defined(USE_NVTX)
 #define DISABLE_FREQUENT_EVENTS
 #endif
 
@@ -49,6 +49,10 @@ static arraylist_t jl_timing_ittapi_events;
 static jl_mutex_t jl_timing_ittapi_events_lock;
 #endif //USE_ITTAPI
 
+#ifdef USE_NVTX
+static nvtxDomainHandle_t jl_timing_nvtx_domain;
+#endif
+
 #ifdef USE_TIMING_COUNTS
 static int cmp_counts_events(const void *a, const void *b) {
     jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a;
@@ -139,6 +143,13 @@ void jl_init_timing(void)
     qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST,
           sizeof(const char *), indirect_strcmp);
 
+#ifdef USE_NVTX
+    jl_timing_nvtx_domain = nvtxDomainCreateA("julia");
+    for (int i = 0; i < JL_TIMING_SUBSYSTEM_LAST; i++) {
+        nvtxDomainNameCategoryA(jl_timing_nvtx_domain, i + 1, jl_timing_subsystems[i]);
+    }
+#endif
+
     int i __attribute__((unused)) = 0;
 #ifdef USE_ITTAPI
     i = 0;
@@ -317,6 +328,25 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
     event->ittapi_event = _jl_timing_ittapi_event_create(name);
 #endif // USE_ITTAPI
 
+#ifdef USE_NVTX
+    nvtxEventAttributes_t nvtx_attrs = {0};
+    nvtx_attrs.version = NVTX_VERSION;
+    nvtx_attrs.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+
+    nvtxStringHandle_t nvtx_message = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, name);
+    nvtx_attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+    nvtx_attrs.message.registered = nvtx_message;
+
+    // 0 is the default (unnamed) category
+    nvtx_attrs.category = maybe_subsystem == JL_TIMING_SUBSYSTEM_LAST ? 0 : maybe_subsystem+1;
+
+    // simple Knuth hash to get nice colors
+    nvtx_attrs.colorType = NVTX_COLOR_ARGB;
+    nvtx_attrs.color = (nvtx_attrs.category * 2654435769) >> 8;
+
+    event->nvtx_attrs = nvtx_attrs;
+#endif // USE_NVTX
+
 #ifdef USE_TRACY
     event->tracy_srcloc.name = name;
     event->tracy_srcloc.function = function;
@@ -347,6 +377,7 @@ JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) {
     uint64_t t = cycleclock(); (void)t;
     _COUNTS_START(&block->counts_ctx, t);
     _ITTAPI_START(block);
+    _NVTX_START(block);
     _TRACY_START(block);
 
     jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack;
@@ -362,6 +393,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) {
     if (block->is_running) {
         uint64_t t = cycleclock(); (void)t;
         _ITTAPI_STOP(block);
+        _NVTX_STOP(block);
         _TRACY_STOP(block->tracy_ctx);
         _COUNTS_STOP(block, t);
 
diff --git a/src/timing.h b/src/timing.h
index 1cc82b67e2b6a..61118cc3b41ab 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -66,7 +66,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block);
 #define HAVE_TIMING_SUPPORT
 #endif
 
-#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_TIMING_COUNTS )
+#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_NVTX ) || defined( USE_TIMING_COUNTS )
 #define ENABLE_TIMINGS
 #endif
 
@@ -115,6 +115,12 @@ typedef struct ___tracy_source_location_data TracySrcLocData;
 #include <ittapi/ittnotify.h>
 #endif
 
+#ifdef USE_NVTX
+#pragma GCC visibility push(default)
+#include <nvtx3/nvToolsExt.h>
+#pragma GCC visibility pop
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -276,6 +282,20 @@ typedef struct _jl_timing_counts_t {
 #define _ITTAPI_STOP(block)
 #endif
 
+
+#ifdef USE_NVTX
+#define _NVTX_EVENT_MEMBER              nvtxEventAttributes_t nvtx_attrs;
+#define _NVTX_BLOCK_MEMBER              nvtxRangeId_t nvtx_rangeid;
+#define _NVTX_START(block)              (block)->nvtx_rangeid = nvtxDomainRangeStartEx(jl_timing_nvtx_domain, &(block)->event->nvtx_attrs)
+#define _NVTX_STOP(block)               nvtxDomainRangeEnd(jl_timing_nvtx_domain, (block)->nvtx_rangeid)
+#else
+#define _NVTX_EVENT_MEMBER
+#define _NVTX_BLOCK_MEMBER
+#define _NVTX_START(block)
+#define _NVTX_STOP(block)
+#endif
+
+
 /**
  * Top-level jl_timing implementation
  **/
@@ -292,6 +312,7 @@ extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST];
 struct _jl_timing_event_t { // typedef in julia.h
     _TRACY_EVENT_MEMBER
     _ITTAPI_EVENT_MEMBER
+    _NVTX_EVENT_MEMBER
     _COUNTS_EVENT_MEMBER
 
     int subsystem;
@@ -310,6 +331,7 @@ struct _jl_timing_block_t { // typedef in julia.h
 
     _TRACY_BLOCK_MEMBER
     _ITTAPI_BLOCK_MEMBER
+    _NVTX_BLOCK_MEMBER
     _COUNTS_BLOCK_MEMBER
 
     uint8_t is_running;
@@ -362,6 +384,12 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 #define _ITTAPI_COUNTER_MEMBER
 #endif
 
+#ifdef USE_NVTX
+#define _NVTX_COUNTER_MEMBER void * __nvtx_null;
+#else
+#define _NVTX_COUNTER_MEMBER
+#endif
+
 #ifdef USE_TRACY
 # define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
 # else
@@ -376,6 +404,7 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 
 typedef struct {
     _ITTAPI_COUNTER_MEMBER
+    _NVTX_COUNTER_MEMBER
     _TRACY_COUNTER_MEMBER
     _COUNTS_MEMBER
 } jl_timing_counter_t;
diff --git a/src/toplevel.c b/src/toplevel.c
index 8f67a0e1aa3e7..5d17a3fcf89a7 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -82,7 +82,7 @@ void jl_module_run_initializer(jl_module_t *m)
         }
         else {
             jl_rethrow_other(jl_new_struct(jl_initerror_type, m->name,
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
         }
     }
 }
@@ -155,25 +155,31 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
     else {
-        jl_binding_t *b = jl_get_binding_wr(parent_module, name);
-        jl_declare_constant(b, parent_module, name);
-        jl_value_t *old = NULL;
-        if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
-            if (!jl_is_module(old)) {
-                jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
+        jl_binding_t *b = jl_get_module_binding(parent_module, name, 1);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, ct->world_age);
+        jl_ptr_kind_union_t pku = encode_restriction(NULL, BINDING_KIND_CONST);
+        jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)newm, BINDING_KIND_CONST);
+        if (!jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku)) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_CONST) {
+                jl_declare_constant_val(b, parent_module, name, (jl_value_t*)newm);
+            } else {
+                // As a special exception allow binding replacement of modules
+                if (!jl_is_module(decode_restriction_value(pku))) {
+                    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
+                }
+                if (jl_generating_output())
+                    jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
+                jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
+                pku = jl_atomic_exchange(&bpart->restriction, new_pku);
+            }
+            jl_gc_wb(bpart, newm);
+            if (decode_restriction_value(pku) != NULL && jl_is_module(decode_restriction_value(pku))) {
+                // create a hidden gc root for the old module
+                JL_LOCK(&jl_modules_mutex);
+                uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, decode_restriction_value(pku));
+                *refcnt += 1;
+                JL_UNLOCK(&jl_modules_mutex);
             }
-            if (jl_generating_output())
-                jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
-            jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
-            old = jl_atomic_exchange(&b->value, (jl_value_t*)newm);
-        }
-        jl_gc_wb(b, newm);
-        if (old != NULL) {
-            // create a hidden gc root for the old module
-            JL_LOCK(&jl_modules_mutex);
-            uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)old);
-            *refcnt += 1;
-            JL_UNLOCK(&jl_modules_mutex);
         }
     }
 
@@ -203,40 +209,19 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
         // add `eval` function
         form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno);
-        jl_toplevel_eval_flex(newm, form, 0, 1);
+        jl_toplevel_eval_flex(newm, form, 0, 1, &filename, &lineno);
         form = NULL;
     }
 
     for (int i = 0; i < jl_array_nrows(exprs); i++) {
         // process toplevel form
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
+        form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, filename, lineno);
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        (void)jl_toplevel_eval_flex(newm, form, 1, 1);
+        (void)jl_toplevel_eval_flex(newm, form, 1, 1, &filename, &lineno);
     }
     ct->world_age = last_age;
 
-#if 0
-    // some optional post-processing steps
-    size_t i;
-    jl_svec_t *table = jl_atomic_load_relaxed(&newm->bindings);
-    for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
-        if ((void*)b != jl_nothing) {
-            // remove non-exported macros
-            if (jl_symbol_name(b->name)[0]=='@' &&
-                !b->exportp && b->owner == b)
-                b->value = NULL;
-            // error for unassigned exports
-            /*
-            if (b->exportp && b->owner==b && b->value==NULL)
-                jl_errorf("identifier %s exported from %s is not initialized",
-                          jl_symbol_name(b->name), jl_symbol_name(newm->name));
-            */
-        }
-    }
-#endif
-
     JL_LOCK(&jl_modules_mutex);
     uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)newm);
     assert(*refcnt > (uintptr_t)HT_NOTFOUND);
@@ -286,13 +271,13 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     return (jl_value_t*)newm;
 }
 
-static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast)
+static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast, const char **toplevel_filename, int *toplevel_lineno)
 {
     jl_task_t *ct = jl_current_task;
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 3);
-    args[1] = jl_toplevel_eval_flex(m, x, fast, 0);
-    args[2] = jl_toplevel_eval_flex(m, f, fast, 0);
+    args[1] = jl_toplevel_eval_flex(m, x, fast, 0, toplevel_filename, toplevel_lineno);
+    args[2] = jl_toplevel_eval_flex(m, f, fast, 0, toplevel_filename, toplevel_lineno);
     if (jl_is_module(args[1])) {
         JL_TYPECHK(getglobal, symbol, args[2]);
         args[0] = jl_eval_global_var((jl_module_t*)args[1], (jl_sym_t*)args[2]);
@@ -308,30 +293,71 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
     return args[0];
 }
 
-void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    jl_ptr_kind_union_t new_pku = encode_restriction(ty, BINDING_KIND_GLOBAL);
+    while (1) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL) {
+            if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+                if (jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+                    break;
+                continue;
+            } else {
+                jl_errorf("cannot set type for imported global %s.%s.",
+                        jl_symbol_name(mod->name), jl_symbol_name(sym));
+            }
+        }
+        if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_errorf("cannot set type for imported constant %s.%s.",
+                    jl_symbol_name(mod->name), jl_symbol_name(sym));
+        }
+        jl_value_t *old_ty = decode_restriction_value(pku);
+        if (!jl_types_equal(ty, old_ty)) {
+            jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
+                    jl_symbol_name(mod->name), jl_symbol_name(sym));
+        }
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+            break;
+    }
+    jl_gc_wb(bpart, ty);
+}
+
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var);
+void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type) {
     // create uninitialized mutable binding for "global x" decl sometimes or probably
+    jl_module_t *gm;
+    jl_sym_t *gs;
+    assert(!jl_is_expr(arg)); // Should have been resolved before this
+    if (jl_is_globalref(arg)) {
+        gm = jl_globalref_mod(arg);
+        gs = jl_globalref_name(arg);
+    }
+    else {
+        assert(jl_is_symbol(arg));
+        gm = m;
+        gs = (jl_sym_t*)arg;
+    }
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    while (decode_restriction_kind(pku) == BINDING_KIND_GUARD || decode_restriction_kind(pku) == BINDING_KIND_FAILED) {
+        check_safe_newbinding(gm, gs);
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction(NULL, BINDING_KIND_DECLARED)))
+            break;
+    }
+    if (set_type) {
+        jl_binding_set_type(b, gm, gs, set_type);
+    }
+}
+
+void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type)
+{
     size_t i, l = jl_array_nrows(ex->args);
     for (i = 0; i < l; i++) {
         jl_value_t *arg = jl_exprarg(ex, i);
-        jl_module_t *gm;
-        jl_sym_t *gs;
-        if (jl_is_globalref(arg)) {
-            gm = jl_globalref_mod(arg);
-            gs = jl_globalref_name(arg);
-        }
-        else {
-            assert(jl_is_symbol(arg));
-            gm = m;
-            gs = (jl_sym_t*)arg;
-        }
-        if (!jl_binding_resolved_p(gm, gs)) {
-            jl_binding_t *b = jl_get_binding_wr(gm, gs);
-            if (set_type) {
-                jl_value_t *old_ty = NULL;
-                // maybe set the type too, perhaps
-                jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-            }
-        }
+        jl_declare_global(m, arg, NULL);
     }
 }
 
@@ -397,9 +423,7 @@ static void expr_attributes(jl_value_t *v, jl_array_t *body, int *has_ccall, int
             jl_sym_t *name = jl_globalref_name(f);
             if (jl_binding_resolved_p(mod, name)) {
                 jl_binding_t *b = jl_get_binding(mod, name);
-                if (b && b->constp) {
-                    called = jl_atomic_load_relaxed(&b->value);
-                }
+                called = jl_get_binding_value_if_const(b);
             }
         }
         else if (jl_is_quotenode(f)) {
@@ -568,6 +592,7 @@ int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT
          ((jl_expr_t*)e)->head == jl_public_sym ||
          ((jl_expr_t*)e)->head == jl_thunk_sym ||
          ((jl_expr_t*)e)->head == jl_global_sym ||
+         ((jl_expr_t*)e)->head == jl_globaldecl_sym ||
          ((jl_expr_t*)e)->head == jl_const_sym ||
          ((jl_expr_t*)e)->head == jl_toplevel_sym ||
          ((jl_expr_t*)e)->head == jl_error_sym ||
@@ -598,13 +623,29 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
     return 1;
 }
 
-static jl_method_instance_t *method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src)
 {
-    jl_method_instance_t *li = jl_new_method_instance_uninit();
-    jl_atomic_store_relaxed(&li->uninferred, (jl_value_t*)src);
-    li->specTypes = (jl_value_t*)jl_emptytuple_type;
-    li->def.module = module;
-    return li;
+    // Do not compress this, we expect it to be shortlived.
+    jl_code_instance_t *ci = jl_new_codeinst(mi, (jl_value_t*)jl_uninferred_sym,
+        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing,
+        (jl_value_t*)src, 0, src->min_world, src->max_world,
+        0, NULL, 1, NULL);
+    return ci;
+}
+
+JL_DLLEXPORT jl_method_instance_t *jl_method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
+{
+    jl_method_instance_t *mi = jl_new_method_instance_uninit();
+    mi->specTypes = (jl_value_t*)jl_emptytuple_type;
+    mi->def.module = module;
+    JL_GC_PUSH1(&mi);
+
+    jl_code_instance_t *ci = jl_new_codeinst_for_uninferred(mi, src);
+    jl_atomic_store_relaxed(&mi->cache, ci);
+    jl_gc_wb(mi, ci);
+
+    JL_GC_POP();
+    return mi;
 }
 
 static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym_t *asname)
@@ -612,21 +653,16 @@ static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym
     assert(m);
     jl_sym_t *name = asname ? asname : import->name;
     // TODO: this is a bit race-y with what error message we might print
-    jl_binding_t *b = jl_get_module_binding(m, name, 0);
-    jl_binding_t *b2;
-    if (b != NULL && (b2 = jl_atomic_load_relaxed(&b->owner)) != NULL) {
-        if (b2->constp && jl_atomic_load_relaxed(&b2->value) == (jl_value_t*)import)
-            return;
-        if (b2 != b)
-            jl_errorf("importing %s into %s conflicts with an existing global",
-                      jl_symbol_name(name), jl_symbol_name(m->name));
-    }
-    else {
-        b = jl_get_binding_wr(m, name);
+    jl_binding_t *b = jl_get_module_binding(m, name, 1);
+    if (jl_get_binding_value_if_const(b) == (jl_value_t*)import)
+        return;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GUARD && decode_restriction_kind(pku) != BINDING_KIND_FAILED) {
+        jl_errorf("importing %s into %s conflicts with an existing global",
+                    jl_symbol_name(name), jl_symbol_name(m->name));
     }
-    jl_declare_constant(b, m, name);
-    jl_checked_assignment(b, m, name, (jl_value_t*)import);
-    b->imported = 1;
+    jl_declare_constant_val2(b, m, name, (jl_value_t*)import, BINDING_KIND_CONST_IMPORT);
 }
 
 // in `import A.B: x, y, ...`, evaluate the `A.B` part if it exists
@@ -642,7 +678,7 @@ static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_
                     jl_module_t *from = eval_import_path(m, NULL, path->args, &name, keyword);
                     if (name != NULL) {
                         from = (jl_module_t*)jl_eval_global_var(from, name);
-                        if (!jl_is_module(from))
+                        if (!from || !jl_is_module(from))
                             jl_errorf("invalid %s path: \"%s\" does not name a module", keyword, jl_symbol_name(name));
                     }
                     return from;
@@ -666,46 +702,111 @@ static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword
 // Eval `throw(ErrorException(msg)))` in module `m`.
 // Used in `jl_toplevel_eval_flex` instead of `jl_throw` so that the error
 // location in julia code gets into the backtrace.
-static void jl_eval_throw(jl_module_t *m, jl_value_t *exc)
+static void jl_eval_throw(jl_module_t *m, jl_value_t *exc, const char *filename, int lineno)
 {
     jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2);
     JL_GC_PUSH1(&throw_ex);
     jl_exprargset(throw_ex, 0, jl_builtin_throw);
     jl_exprargset(throw_ex, 1, exc);
-    jl_toplevel_eval_flex(m, throw_ex, 0, 0);
+    jl_toplevel_eval_flex(m, throw_ex, 0, 0, &filename, &lineno);
     JL_GC_POP();
 }
 
 // Format error message and call jl_eval
-static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
+static void jl_eval_errorf(jl_module_t *m, const char *filename, int lineno, const char* fmt, ...)
 {
     va_list args;
     va_start(args, fmt);
     jl_value_t *exc = jl_vexceptionf(jl_errorexception_type, fmt, args);
     va_end(args);
     JL_GC_PUSH1(&exc);
-    jl_eval_throw(m, exc);
+    jl_eval_throw(m, exc, filename, lineno);
     JL_GC_POP();
 }
 
-jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded)
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val, enum jl_partition_kind constant_kind)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    int did_warn = 0;
+    while (1) {
+        if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            if (!val)
+                return bpart;
+            jl_value_t *old = decode_restriction_value(pku);
+            if (jl_egal(val, old))
+                break;
+            if (!did_warn) {
+                if (jl_typeof(val) != jl_typeof(old) || jl_is_type(val) || jl_is_module(val))
+                    jl_errorf("invalid redefinition of constant %s.%s",
+                        jl_symbol_name(mod->name),
+                        jl_symbol_name(var));
+                else
+                    jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
+                        jl_symbol_name(mod->name),
+                        jl_symbol_name(var));
+                did_warn = 1;
+            }
+        } else if (!jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (jl_bkind_is_some_import(decode_restriction_kind(pku))) {
+                jl_errorf("cannot declare %s.%s constant; it was already declared as an import",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+            } else {
+                jl_errorf("cannot declare %s.%s constant; it was already declared global",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+            }
+        }
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction(val, constant_kind))) {
+            jl_gc_wb(bpart, val);
+            break;
+        }
+    }
+    return bpart;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val)
+{
+    return jl_declare_constant_val2(b, mod, var, val, BINDING_KIND_CONST);
+}
+
+JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t *val)
+{
+    jl_module_t *gm;
+    jl_sym_t *gs;
+    if (jl_is_globalref(arg)) {
+        gm = jl_globalref_mod(arg);
+        gs = jl_globalref_name(arg);
+    }
+    else {
+        assert(jl_is_symbol(arg));
+        gm = m;
+        gs = (jl_sym_t*)arg;
+    }
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_declare_constant_val(b, gm, gs, val);
+}
+
+JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno)
 {
     jl_task_t *ct = jl_current_task;
     if (!jl_is_expr(e)) {
         if (jl_is_linenode(e)) {
-            jl_lineno = jl_linenode_line(e);
+            *toplevel_lineno = jl_linenode_line(e);
             jl_value_t *file = jl_linenode_file(e);
             if (file != jl_nothing) {
                 assert(jl_is_symbol(file));
-                jl_filename = jl_symbol_name((jl_sym_t*)file);
+                *toplevel_filename = jl_symbol_name((jl_sym_t*)file);
             }
+            // Not thread safe. For debugging and last resort error messages (jl_critical_error) only.
+            jl_filename = *toplevel_filename;
+            jl_lineno = *toplevel_lineno;
             return jl_nothing;
         }
         if (jl_is_symbol(e)) {
             char *n = jl_symbol_name((jl_sym_t*)e), *n0 = n;
             while (*n == '_') ++n;
             if (*n == 0 && n > n0)
-                jl_eval_errorf(m, "all-underscore identifiers are write-only and their values cannot be used in expressions");
+                jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno, "all-underscore identifiers are write-only and their values cannot be used in expressions");
         }
         return jl_interpret_toplevel_expr_in(m, e, NULL, NULL);
     }
@@ -714,12 +815,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     if (ex->head == jl_dot_sym && jl_expr_nargs(ex) != 1) {
         if (jl_expr_nargs(ex) != 2)
-            jl_eval_errorf(m, "syntax: malformed \".\" expression");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno, "syntax: malformed \".\" expression");
         jl_value_t *lhs = jl_exprarg(ex, 0);
         jl_value_t *rhs = jl_exprarg(ex, 1);
         // only handle `a.b` syntax here, so qualified names can be eval'd in pure contexts
         if (jl_is_quotenode(rhs) && jl_is_symbol(jl_fieldref(rhs, 0))) {
-            return jl_eval_dot_expr(m, lhs, rhs, fast);
+            return jl_eval_dot_expr(m, lhs, rhs, fast, toplevel_filename, toplevel_lineno);
         }
     }
 
@@ -729,12 +830,13 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     jl_method_instance_t *mfunc = NULL;
     jl_code_info_t *thk = NULL;
-    JL_GC_PUSH3(&mfunc, &thk, &ex);
+    jl_value_t *root = NULL;
+    JL_GC_PUSH4(&mfunc, &thk, &ex, &root);
 
     size_t last_age = ct->world_age;
     if (!expanded && jl_needs_lowering(e)) {
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno);
+        ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, *toplevel_filename, *toplevel_lineno);
         ct->world_age = last_age;
     }
     jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL;
@@ -757,16 +859,17 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
                 name = NULL;
                 jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "using");
-                jl_module_t *u = import;
-                if (name != NULL)
-                    u = (jl_module_t*)jl_eval_global_var(import, name);
                 if (from) {
                     // `using A: B` and `using A: B.c` syntax
                     jl_module_use(m, import, name);
                 }
                 else {
+                    jl_module_t *u = import;
+                    if (name != NULL)
+                        u = (jl_module_t*)jl_eval_global_var(import, name);
                     if (!jl_is_module(u))
-                        jl_eval_errorf(m, "invalid using path: \"%s\" does not name a module",
+                        jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                            "invalid using path: \"%s\" does not name a module",
                                        jl_symbol_name(name));
                     // `using A` and `using A.B` syntax
                     jl_module_using(m, u);
@@ -792,7 +895,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                     continue;
                 }
             }
-            jl_eval_errorf(m, "syntax: malformed \"using\" statement");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: malformed \"using\" statement");
         }
         JL_GC_POP();
         return jl_nothing;
@@ -839,7 +943,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                     continue;
                 }
             }
-            jl_eval_errorf(m, "syntax: malformed \"import\" statement");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: malformed \"import\" statement");
         }
         JL_GC_POP();
         return jl_nothing;
@@ -849,33 +954,25 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         for (size_t i = 0; i < jl_array_nrows(ex->args); i++) {
             jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
             if (!jl_is_symbol(name))
-                jl_eval_errorf(m, exp ? "syntax: malformed \"export\" statement" :
-                                        "syntax: malformed \"public\" statement");
+                jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                     exp ? "syntax: malformed \"export\" statement" :
+                           "syntax: malformed \"public\" statement");
             jl_module_public(m, name, exp);
         }
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_global_sym) {
-        jl_eval_global_expr(m, ex, 0);
+        size_t i, l = jl_array_nrows(ex->args);
+        for (i = 0; i < l; i++) {
+            jl_value_t *arg = jl_exprarg(ex, i);
+            jl_declare_global(m, arg, NULL);
+        }
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_const_sym) {
-        jl_sym_t *arg = (jl_sym_t*)jl_exprarg(ex, 0);
-        jl_module_t *gm;
-        jl_sym_t *gs;
-        if (jl_is_globalref(arg)) {
-            gm = jl_globalref_mod(arg);
-            gs = jl_globalref_name(arg);
-        }
-        else {
-            assert(jl_is_symbol(arg));
-            gm = m;
-            gs = (jl_sym_t*)arg;
-        }
-        jl_binding_t *b = jl_get_binding_wr(gm, gs);
-        jl_declare_constant(b, gm, gs);
+        jl_eval_const_decl(m, jl_exprarg(ex, 0), NULL);
         JL_GC_POP();
         return jl_nothing;
     }
@@ -883,17 +980,19 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         jl_value_t *res = jl_nothing;
         int i;
         for (i = 0; i < jl_array_nrows(ex->args); i++) {
-            res = jl_toplevel_eval_flex(m, jl_array_ptr_ref(ex->args, i), fast, 0);
+            res = jl_toplevel_eval_flex(m, jl_array_ptr_ref(ex->args, i), fast, 0, toplevel_filename, toplevel_lineno);
         }
         JL_GC_POP();
         return res;
     }
     else if (head == jl_error_sym || head == jl_incomplete_sym) {
         if (jl_expr_nargs(ex) == 0)
-            jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head));
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "malformed \"%s\" expression", jl_symbol_name(head));
         if (jl_is_string(jl_exprarg(ex, 0)))
-            jl_eval_errorf(m, "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
-        jl_eval_throw(m, jl_exprarg(ex, 0));
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
+        jl_eval_throw(m, jl_exprarg(ex, 0), *toplevel_filename, *toplevel_lineno);
     }
     else if (jl_is_symbol(ex)) {
         JL_GC_POP();
@@ -908,7 +1007,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
     assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
     if (!jl_is_code_info(thk) || !jl_typetagis(thk->code, jl_array_any_type)) {
-        jl_eval_errorf(m, "malformed \"thunk\" statement");
+        jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+            "malformed \"thunk\" statement");
     }
     body_attributes((jl_array_t*)thk->code, &has_ccall, &has_defs, &has_loops, &has_opaque, &forced_compile);
 
@@ -920,7 +1020,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
         // use codegen
-        mfunc = method_instance_for_thunk(thk, m);
+        mfunc = jl_method_instance_for_thunk(thk, m);
         jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
         // Don't infer blocks containing e.g. method definitions, since it's probably not
         // worthwhile and also unsound (see #24316).
@@ -929,7 +1029,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         size_t world = jl_atomic_load_acquire(&jl_world_counter);
         ct->world_age = world;
         if (!has_defs && jl_get_module_infer(m) != 0) {
-            (void)jl_type_infer(mfunc, world, 0, SOURCE_MODE_NOT_REQUIRED);
+            (void)jl_type_infer(mfunc, world, SOURCE_MODE_NOT_REQUIRED);
         }
         result = jl_invoke(/*func*/NULL, /*args*/NULL, /*nargs*/0, mfunc);
         ct->world_age = last_age;
@@ -949,7 +1049,9 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v)
 {
-    return jl_toplevel_eval_flex(m, v, 1, 0);
+    const char *filename = jl_filename;
+    int lieno = jl_lineno;
+    return jl_toplevel_eval_flex(m, v, 1, 0, &filename, &lieno);
 }
 
 // Check module `m` is open for `eval/include`, or throw an error.
@@ -1006,11 +1108,11 @@ JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
 
 JL_DLLEXPORT jl_value_t *jl_infer_thunk(jl_code_info_t *thk, jl_module_t *m)
 {
-    jl_method_instance_t *li = method_instance_for_thunk(thk, m);
+    jl_method_instance_t *li = jl_method_instance_for_thunk(thk, m);
     JL_GC_PUSH1(&li);
     jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
     jl_task_t *ct = jl_current_task;
-    jl_code_instance_t *ci = jl_type_infer(li, ct->world_age, 0, SOURCE_MODE_NOT_REQUIRED);
+    jl_code_instance_t *ci = jl_type_infer(li, ct->world_age, SOURCE_MODE_NOT_REQUIRED);
     JL_GC_POP();
     if (ci)
         return ci->rettype;
@@ -1049,7 +1151,8 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     size_t last_age = ct->world_age;
     int lineno = 0;
     jl_lineno = 0;
-    jl_filename = jl_string_data(filename);
+    const char *filename_str = jl_string_data(filename);
+    jl_filename = filename_str;
     int err = 0;
 
     JL_TRY {
@@ -1064,7 +1167,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
             expression = jl_expand_with_loc_warn(expression, module,
                                                  jl_string_data(filename), lineno);
             ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-            result = jl_toplevel_eval_flex(module, expression, 1, 1);
+            result = jl_toplevel_eval_flex(module, expression, 1, 1, &filename_str, &lineno);
         }
     }
     JL_CATCH {
@@ -1081,7 +1184,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
             jl_rethrow();
         else
             jl_rethrow_other(jl_new_struct(jl_loaderror_type, filename, result,
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
     }
     JL_GC_POP();
     return result;
diff --git a/src/win32_ucontext.c b/src/win32_ucontext.c
index c6d4372308004..ca54877f97728 100644
--- a/src/win32_ucontext.c
+++ b/src/win32_ucontext.c
@@ -62,6 +62,8 @@ void jl_makecontext(win32_ucontext_t *ucp, void (*func)(void))
     Registration[0].Handler = &__julia_personality;
     Registration[1].Next = (PEXCEPTION_REGISTRATION_RECORD)0xFFFFFFFF;
     Registration[1].Handler = UnHandler;
+#else
+#error jl_makecontext not defined for CPU type
 #endif
     stack_top -= sizeof(void*);
     *(void**)stack_top = 0; // push rta
diff --git a/src/work-stealing-queue.h b/src/work-stealing-queue.h
index 73c6e34e36de8..9ec283b610e62 100644
--- a/src/work-stealing-queue.h
+++ b/src/work-stealing-queue.h
@@ -3,6 +3,8 @@
 #ifndef WORK_STEALING_QUEUE_H
 #define WORK_STEALING_QUEUE_H
 
+#include <stdalign.h>
+
 #include "julia_atomics.h"
 #include "assert.h"
 
@@ -41,10 +43,10 @@ static inline void free_ws_array(ws_array_t *a)
 }
 
 typedef struct {
-    _Atomic(int64_t) top;
-    char _padding[JL_CACHE_BYTE_ALIGNMENT - sizeof(_Atomic(int64_t))];
-    _Atomic(int64_t) bottom; // put on a separate cache line. conservatively estimate cache line size as 128 bytes
-    _Atomic(ws_array_t *) array;
+    // align to JL_CACHE_BYTE_ALIGNMENT
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(int64_t) top;
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(int64_t) bottom;
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(ws_array_t *) array;
 } ws_queue_t;
 
 static inline ws_array_t *ws_queue_push(ws_queue_t *q, void *elt, int32_t eltsz) JL_NOTSAFEPOINT
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index 968d190c2b443..9bca72f6c7a14 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -9,7 +9,9 @@ that would be convenient to place within an immutable, life-cycled datastore.
 module Artifacts
 
 import Base: get, SHA1
-using Base.BinaryPlatforms, Base.TOML
+using Base.BinaryPlatforms: AbstractPlatform, Platform, HostPlatform
+using Base.BinaryPlatforms: tags, triplet, select_platform
+using Base.TOML: TOML
 
 export artifact_exists, artifact_path, artifact_meta, artifact_hash,
        select_downloadable_artifacts, find_artifacts_toml, @artifact_str
@@ -173,13 +175,11 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
         end
     end
 
-    overrides = Dict{Symbol,Any}(
-        # Overrides by UUID
-        :UUID => overrides_uuid,
-
-        # Overrides by hash
-        :hash => overrides_hash
-    )
+    overrides = Dict{Symbol,Any}()
+    # Overrides by UUID
+    overrides[:UUID] = overrides_uuid
+    # Overrides by hash
+    overrides[:hash] = overrides_hash
 
     ARTIFACT_OVERRIDES[] = overrides
     return overrides
@@ -349,7 +349,7 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
 
             # If we've got a platform-specific friend, override all hashes:
             artifact_dict_name = artifact_dict[name]
-            if isa(artifact_dict_name, Array)
+            if isa(artifact_dict_name, Vector{Any})
                 for entry in artifact_dict_name
                     entry = entry::Dict{String,Any}
                     hash = SHA1(entry["git-tree-sha1"]::String)
@@ -400,7 +400,7 @@ function artifact_meta(name::String, artifact_dict::Dict, artifacts_toml::String
 
     # If it's an array, find the entry that best matches our current platform
     if isa(meta, Vector)
-        dl_dict = Dict{AbstractPlatform,Dict{String,Any}}()
+        dl_dict = Dict{Platform,Dict{String,Any}}()
         for x in meta
             x = x::Dict{String, Any}
             dl_dict[unpack_platform(x, name, artifacts_toml)] = x
@@ -542,11 +542,11 @@ function jointail(dir, tail)
     end
 end
 
-function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, @nospecialize(lazyartifacts))
-    moduleroot = Base.moduleroot(__module__)
-    if haskey(Base.module_keys, moduleroot)
+function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, ::Val{LazyArtifacts}) where LazyArtifacts
+    pkg = Base.PkgId(__module__)
+    if pkg.uuid !== nothing
         # Process overrides for this UUID, if we know what it is
-        process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid)
+        process_overrides(artifact_dict, pkg.uuid)
     end
 
     # If the artifact exists, we're in the happy path and we can immediately
@@ -561,11 +561,11 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
     # If not, try determining what went wrong:
     meta = artifact_meta(name, artifact_dict, artifacts_toml; platform)
     if meta !== nothing && get(meta, "lazy", false)
-        if lazyartifacts isa Module && isdefined(lazyartifacts, :ensure_artifact_installed)
-            if nameof(lazyartifacts) in (:Pkg, :Artifacts)
+        if LazyArtifacts isa Module && isdefined(LazyArtifacts, :ensure_artifact_installed)
+            if nameof(LazyArtifacts) in (:Pkg, :Artifacts)
                 Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true)
             end
-            return jointail(lazyartifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail)
+            return jointail(LazyArtifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail)
         end
         error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.")
     end
@@ -693,14 +693,14 @@ macro artifact_str(name, platform=nothing)
     local artifact_dict = load_artifacts_toml(artifacts_toml)
 
     # Invalidate calling .ji file if Artifacts.toml file changes
-    Base.include_dependency(artifacts_toml)
+    Base.include_dependency(artifacts_toml, track_content = true)
 
     # Check if the user has provided `LazyArtifacts`, and thus supports lazy artifacts
     # If not, check to see if `Pkg` or `Pkg.Artifacts` has been imported.
-    lazyartifacts = nothing
+    LazyArtifacts = nothing
     for module_name in (:LazyArtifacts, :Pkg, :Artifacts)
         if isdefined(__module__, module_name)
-            lazyartifacts = GlobalRef(__module__, module_name)
+            LazyArtifacts = GlobalRef(__module__, module_name)
             break
         end
     end
@@ -712,7 +712,7 @@ macro artifact_str(name, platform=nothing)
         platform = HostPlatform()
         artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform)
         return quote
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), Val($(LazyArtifacts)))::String
         end
     else
         if platform === nothing
@@ -721,7 +721,7 @@ macro artifact_str(name, platform=nothing)
         return quote
             local platform = $(esc(platform))
             local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml), platform)
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, Val($(LazyArtifacts)))::String
         end
     end
 end
@@ -759,6 +759,6 @@ precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},))
 precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String))
 precompile(parse_mapping, (String, String, String))
 precompile(parse_mapping, (Dict{String, Any}, String, String))
-
+precompile(Tuple{typeof(Artifacts._artifact_str), Module, String, Base.SubString{String}, String, Base.Dict{String, Any}, Base.SHA1, Base.BinaryPlatforms.Platform, Any})
 
 end # module Artifacts
diff --git a/stdlib/CRC32c/src/CRC32c.jl b/stdlib/CRC32c/src/CRC32c.jl
index 35d2d4cb339d6..03bef027bde16 100644
--- a/stdlib/CRC32c/src/CRC32c.jl
+++ b/stdlib/CRC32c/src/CRC32c.jl
@@ -8,6 +8,7 @@ See [`CRC32c.crc32c`](@ref) for more information.
 module CRC32c
 
 import Base.FastContiguousSubArray
+import Base: DenseBytes
 
 export crc32c
 
@@ -35,7 +36,10 @@ but note that the result may be endian-dependent.
 function crc32c end
 
 
-crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = Base._crc32c(a, crc)
+function crc32c(a::DenseBytes, crc::UInt32=0x00000000)
+    Base._crc32c(a, crc)
+end
+
 crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000) = Base._crc32c(s, crc)
 
 """
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 3e15ff6b87b71..5aab865b5f6fc 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -4,7 +4,7 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 # NOTE: When updating this, also make sure to update the value
 # `CSL_NEXT_GLIBCXX_VERSION` in `Make.inc`, to properly disable
 # automatic usage of BB-built CSLs on extremely up-to-date systems!
-version = "1.1.0+0"
+version = "1.1.1+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
index bd7a0571f9d5a..b4df77c5167da 100644
--- a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
+++ b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule CompilerSupportLibraries_jll
 using Base, Libdl, Base.BinaryPlatforms
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
@@ -15,6 +14,8 @@ export libgfortran, libstdcxx, libgomp
 const PATH = Ref("")
 const LIBPATH = Ref("")
 artifact_dir::String = ""
+libgcc_s_handle::Ptr{Cvoid} = C_NULL
+libgcc_s_path::String = ""
 libgfortran_handle::Ptr{Cvoid} = C_NULL
 libgfortran_path::String = ""
 libstdcxx_handle::Ptr{Cvoid} = C_NULL
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index 35ec6771efc55..38b4f7ae86d29 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -93,7 +93,7 @@ parser know which periods to parse in each slot.
 
 As in the case of constructors above such as `Date(2013)`, delimited `DateFormat`s allow for
 missing parts of dates and times so long as the preceding parts are given. The other parts are given the usual
-default values.  For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
+default values. For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
 `Date("31/12", dateformat"d/m/y")` gives `0001-12-31`.  (Note that the default year is
 1 AD/CE.)
 An empty string, however, always throws an `ArgumentError`.
@@ -545,7 +545,7 @@ it could represent, in days, a value of 28, 29, 30, or 31 depending on the year
 Or a year could represent 365 or 366 days in the case of a leap year. [`Period`](@ref) types are
 simple [`Int64`](@ref) wrappers and are constructed by wrapping any `Int64` convertible type, i.e. `Year(1)`
 or `Month(3.0)`. Arithmetic between [`Period`](@ref) of the same type behave like integers, and
-limited `Period-Real` arithmetic is available.  You can extract the underlying integer with
+limited `Period-Real` arithmetic is available. You can extract the underlying integer with
 [`Dates.value`](@ref).
 
 ```jldoctest
@@ -684,9 +684,9 @@ value in the days field is uncertain.
 See the [API reference](@ref stdlib-dates-api) for additional information
 on methods exported from the `Dates` module.
 
-# [API reference](@id stdlib-dates-api)
+## [API reference](@id stdlib-dates-api)
 
-## Dates and Time Types
+### Dates and Time Types
 
 ```@docs
 Dates.Period
@@ -701,7 +701,7 @@ Dates.TimeZone
 Dates.UTC
 ```
 
-## Dates Functions
+### Dates Functions
 
 ```@docs
 Dates.DateTime(::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
@@ -730,7 +730,7 @@ Dates.now(::Type{Dates.UTC})
 Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType})
 ```
 
-### Accessor Functions
+#### Accessor Functions
 
 ```@docs
 Dates.year
@@ -758,7 +758,7 @@ Dates.monthday
 Dates.yearmonthday
 ```
 
-### Query Functions
+#### Query Functions
 
 ```@docs
 Dates.dayname
@@ -777,7 +777,7 @@ Dates.quarterofyear
 Dates.dayofquarter
 ```
 
-### Adjuster Functions
+#### Adjuster Functions
 
 ```@docs
 Base.trunc(::Dates.TimeType, ::Type{Dates.Period})
@@ -797,7 +797,7 @@ Dates.tonext(::Function, ::Dates.TimeType)
 Dates.toprev(::Function, ::Dates.TimeType)
 ```
 
-### Periods
+#### Periods
 
 ```@docs
 Dates.Period(::Any)
@@ -808,7 +808,7 @@ Dates.default
 Dates.periods
 ```
 
-### Rounding Functions
+#### Rounding Functions
 
 `Date` and `DateTime` values can be rounded to a specified resolution (e.g., 1 month or 15 minutes)
 with `floor`, `ceil`, or `round`.
@@ -837,7 +837,7 @@ Dates.date2epochdays
 Dates.datetime2epochms
 ```
 
-### Conversion Functions
+#### Conversion Functions
 
 ```@docs
 Dates.today
diff --git a/stdlib/Dates/src/io.jl b/stdlib/Dates/src/io.jl
index 3980ad3a7245f..388edb693d76f 100644
--- a/stdlib/Dates/src/io.jl
+++ b/stdlib/Dates/src/io.jl
@@ -111,7 +111,25 @@ end
 
 ### Parse tokens
 
-for c in "yYmdHIMS"
+for c in "yY"
+    @eval begin
+        @inline function tryparsenext(d::DatePart{$c}, str, i, len)
+            val = tryparsenext_sign(str, i, len)
+            if val !== nothing
+                coefficient, i = val
+            else
+                coefficient = 1
+            end
+            # The sign character does not affect fixed length `DatePart`s
+            val = tryparsenext_base10(str, i, len, min_width(d), max_width(d))
+            val === nothing && return nothing
+            y, ii = val
+            return y * coefficient, ii
+        end
+    end
+end
+
+for c in "mdHIMS"
     @eval begin
         @inline function tryparsenext(d::DatePart{$c}, str, i, len)
             return tryparsenext_base10(str, i, len, min_width(d), max_width(d))
diff --git a/stdlib/Dates/src/parse.jl b/stdlib/Dates/src/parse.jl
index 62d44177de877..e8624cf9243c5 100644
--- a/stdlib/Dates/src/parse.jl
+++ b/stdlib/Dates/src/parse.jl
@@ -156,6 +156,18 @@ If successful, returns a 2-element tuple `(values, pos)`:
     end
 end
 
+@inline function tryparsenext_sign(str::AbstractString, i::Int, len::Int)
+    i > len && return nothing
+    c, ii = iterate(str, i)::Tuple{Char, Int}
+    if c == '+'
+        return 1, ii
+    elseif c == '-'
+        return -1, ii
+    else
+        return nothing
+    end
+end
+
 @inline function tryparsenext_base10(str::AbstractString, i::Int, len::Int, min_width::Int=1, max_width::Int=0)
     i > len && return nothing
     min_pos = min_width <= 0 ? i : i + min_width - 1
@@ -200,14 +212,22 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF
     i, end_pos = firstindex(s), lastindex(s)
     i > end_pos && throw(ArgumentError("Cannot parse an empty string as a DateTime"))
 
+    coefficient = 1
     local dy
     dm = dd = Int64(1)
     th = tm = ts = tms = Int64(0)
 
+    # Optional sign
+    let val = tryparsenext_sign(s, i, end_pos)
+        if val !== nothing
+            coefficient, i = val
+        end
+    end
+
     let val = tryparsenext_base10(s, i, end_pos, 1)
         val === nothing && @goto error
         dy, i = val
-        i > end_pos && @goto error
+        i > end_pos && @goto done
     end
 
     c, i = iterate(s, i)::Tuple{Char, Int}
@@ -272,7 +292,7 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF
     end
 
     @label done
-    return DateTime(dy, dm, dd, th, tm, ts, tms)
+    return DateTime(dy * coefficient, dm, dd, th, tm, ts, tms)
 
     @label error
     throw(ArgumentError("Invalid DateTime string"))
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index c88a1bed4bba9..8f28f95d4a90e 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -451,11 +451,11 @@ toms(c::Second)      = 1000 * value(c)
 toms(c::Minute)      = 60000 * value(c)
 toms(c::Hour)        = 3600000 * value(c)
 toms(c::Period)      = 86400000 * days(c)
-toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(toms, c.periods))
+toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, toms(p))::Float64, c.periods)
 tons(x)              = toms(x) * 1000000
 tons(x::Microsecond) = value(x) * 1000
 tons(x::Nanosecond)  = value(x)
-tons(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(tons, c.periods))
+tons(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, tons(p))::Float64, c.periods)
 days(c::Millisecond) = div(value(c), 86400000)
 days(c::Second)      = div(value(c), 86400)
 days(c::Minute)      = div(value(c), 1440)
@@ -465,7 +465,7 @@ days(c::Week)        = 7 * value(c)
 days(c::Year)        = 365.2425 * value(c)
 days(c::Quarter)     = 91.310625 * value(c)
 days(c::Month)       = 30.436875 * value(c)
-days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(days, c.periods))
+days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, days(p))::Float64, c.periods)
 seconds(x::Nanosecond) = value(x) / 1000000000
 seconds(x::Microsecond) = value(x) / 1000000
 seconds(x::Millisecond) = value(x) / 1000
diff --git a/stdlib/Dates/src/types.jl b/stdlib/Dates/src/types.jl
index 7391c277b0718..1978864b92554 100644
--- a/stdlib/Dates/src/types.jl
+++ b/stdlib/Dates/src/types.jl
@@ -203,7 +203,7 @@ function totaldays(y, m, d)
 end
 
 # If the year is divisible by 4, except for every 100 years, except for every 400 years
-isleapyear(y) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
+isleapyear(y::Integer) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
 
 # Number of days in month
 const DAYSINMONTH = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
@@ -492,3 +492,8 @@ end
 
 Base.OrderStyle(::Type{<:AbstractTime}) = Base.Ordered()
 Base.ArithmeticStyle(::Type{<:AbstractTime}) = Base.ArithmeticWraps()
+
+# minimal Base.TOML support
+Date(d::Base.TOML.Date) = Date(d.year, d.month, d.day)
+Time(t::Base.TOML.Time) = Time(t.hour, t.minute, t.second, t.ms)
+DateTime(dt::Base.TOML.DateTime) = DateTime(Date(dt.date), Time(dt.time))
diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl
index 092a58d5d70d1..98bc610784477 100644
--- a/stdlib/Dates/test/io.jl
+++ b/stdlib/Dates/test/io.jl
@@ -325,6 +325,23 @@ end
     # From Matt Bauman
     f = "yyyy-mm-ddTHH:MM:SS"
     @test Dates.DateTime("2014-05-28T16:46:04", f) == Dates.DateTime(2014, 5, 28, 16, 46, 04)
+
+    f = "yyyymmdd"
+    @test Dates.DateTime("20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(-2024, 5, 21)
+    @test Dates.DateTime("+20240521", f) == Dates.DateTime(2024, 5, 21)
+    f = "YYYYmmdd"
+    @test Dates.DateTime("20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(-2024, 5, 21)
+    @test Dates.DateTime("+20240521", f) == Dates.DateTime(2024, 5, 21)
+    f = "-yyyymmdd"
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test_throws ArgumentError Dates.DateTime("+20240521", f)
+    @test_throws ArgumentError Dates.DateTime("20240521", f)
+    f = "-YYYYmmdd"
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test_throws ArgumentError Dates.DateTime("+20240521", f)
+    @test_throws ArgumentError Dates.DateTime("20240521", f)
 end
 
 @testset "Error handling" begin
@@ -403,6 +420,17 @@ end
     @test_throws ArgumentError parse(Date, "Foo, 12 Nov 2016 07:45:36", Dates.RFC1123Format)
 end
 
+@testset "ISODateTimeFormat" begin
+    dt = Dates.DateTime(2024, 5, 21, 10, 57, 22)
+    neg_dt = Dates.DateTime(-2024, 5, 21, 10, 57, 22)
+    @test parse(Dates.DateTime, "2024-05-21T10:57:22", Dates.ISODateTimeFormat) == dt
+    @test parse(Dates.DateTime, "+2024-05-21T10:57:22", Dates.ISODateTimeFormat) == dt
+    @test parse(Dates.DateTime, "-2024-05-21T10:57:22", Dates.ISODateTimeFormat) == neg_dt
+
+    @test_throws ArgumentError parse(Dates.DateTime, "-", Dates.ISODateTimeFormat)
+    @test_throws ArgumentError parse(Dates.DateTime, "+", Dates.ISODateTimeFormat)
+end
+
 @testset "Issue 15195" begin
     f = "YY"
     @test Dates.format(Dates.Date(1999), f) == "1999"
@@ -470,6 +498,9 @@ end
 # Issue #44003
 @test tryparse(Dates.Date, "2017", Dates.DateFormat(".s")) === nothing
 
+# Issue #52989
+@test Dates.DateTime("2000") == Dates.DateTime(2000)
+
 @testset "parse milliseconds, Issue #22100" begin
     @test Dates.DateTime("2017-Mar-17 00:00:00.0000", "y-u-d H:M:S.s") == Dates.DateTime(2017, 3, 17)
     @test Dates.parse_components(".1", Dates.DateFormat(".s")) == [Dates.Millisecond(100)]
@@ -618,4 +649,9 @@ end
     end
 end
 
+@testset "Issue #50328: parsing negative years" begin
+    @test Date("-2013-10-10") == Date(-2013, 10, 10)
+    @test Date("-2013") == Date(-2013, 01, 01)
+end
+
 end
diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl
index 35a793867dc5a..f5284b376ca4a 100644
--- a/stdlib/Dates/test/types.jl
+++ b/stdlib/Dates/test/types.jl
@@ -41,6 +41,7 @@ end
     @test Dates.isleapyear(-1) == false
     @test Dates.isleapyear(4) == true
     @test Dates.isleapyear(-4) == true
+    @test_throws MethodError Dates.isleapyear(Dates.Year(1992))
 end
 # Create "test" check manually
 y = Dates.Year(1)
diff --git a/stdlib/Distributed.version b/stdlib/Distributed.version
index b0acacf2367de..02eac7eadf0ad 100644
--- a/stdlib/Distributed.version
+++ b/stdlib/Distributed.version
@@ -1,4 +1,4 @@
 DISTRIBUTED_BRANCH = master
-DISTRIBUTED_SHA1 = 6a07d9853ab7686df7440a47d1b585c6c9f3be35
+DISTRIBUTED_SHA1 = 6c7cdb5860fa5cb9ca191ce9c52a3d25a9ab3781
 DISTRIBUTED_GIT_URL := https://github.com/JuliaLang/Distributed.jl
 DISTRIBUTED_TAR_URL = https://api.github.com/repos/JuliaLang/Distributed.jl/tarball/$1
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 7805348a4b2f5..cb041d86d7f66 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = a9d274ff6588cc5dbfa90e908ee34c2408bab84a
+DOWNLOADS_SHA1 = 1061ecc377a053fce0df94e1a19e5260f7c030f5
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index 0ee572ec47d92..0c987ad01c828 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -468,6 +468,11 @@ function uv_fspollcb(handle::Ptr{Cvoid}, status::Int32, prev::Ptr, curr::Ptr)
     nothing
 end
 
+global uv_jl_pollcb::Ptr{Cvoid}
+global uv_jl_fspollcb::Ptr{Cvoid}
+global uv_jl_fseventscb_file::Ptr{Cvoid}
+global uv_jl_fseventscb_folder::Ptr{Cvoid}
+
 function __init__()
     global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
     global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid}))
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
index 0ee86e70f8465..4c821a3d897e4 100644
--- a/stdlib/FileWatching/src/pidfile.jl
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -280,7 +280,7 @@ function open_exclusive(path::String;
 end
 
 function _rand_filename(len::Int=4) # modified from Base.Libc
-    slug = Base.StringVector(len)
+    slug = Base.StringMemory(len)
     chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     for i = 1:len
         slug[i] = chars[(Libc.rand() % length(chars)) + 1]
diff --git a/stdlib/GMP_jll/src/GMP_jll.jl b/stdlib/GMP_jll/src/GMP_jll.jl
index fde2fc15acf90..ae8b3c0b3e7d5 100644
--- a/stdlib/GMP_jll/src/GMP_jll.jl
+++ b/stdlib/GMP_jll/src/GMP_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/GMP_jll.jl
 baremodule GMP_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index a0020090e451d..835988ddf149f 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -104,7 +104,7 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     if !isempty(Base.GIT_VERSION_INFO.commit_short)
         println(io, "Commit $(Base.GIT_VERSION_INFO.commit_short) ($(Base.GIT_VERSION_INFO.date_string))")
     end
-    official_release = Base.TAGGED_RELEASE_BANNER == "Official https://julialang.org/ release"
+    official_release = Base.TAGGED_RELEASE_BANNER == "Official https://julialang.org release"
     if Base.isdebugbuild() || !isempty(Base.TAGGED_RELEASE_BANNER) || (Base.GIT_VERSION_INFO.tagged_commit && !official_release)
         println(io, "Build Info:")
         if Base.isdebugbuild()
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index d03e2b27a2286..9f1538cd4a7fe 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -143,7 +143,7 @@ See the [`@code_warntype`](@ref man-code-warntype) section in the Performance Ti
 
 See also: [`@code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref), [`code_native`](@ref).
 """
-function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt(f));
+function code_warntype(io::IO, @nospecialize(f), @nospecialize(tt=Base.default_tt(f));
                        world=Base.get_world_counter(),
                        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world),
                        debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
@@ -153,18 +153,24 @@ function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt
     lineprinter = Base.IRShow.__debuginfo[debuginfo]
     nargs::Int = 0
     if isa(f, Core.OpaqueClosure)
-        isa(f.source, Method) && (nargs = f.nargs)
-        print_warntype_codeinfo(io, Base.code_typed_opaque_closure(f)[1]..., nargs; lineprinter)
+        isa(f.source, Method) && (nargs = f.source.nargs)
+        print_warntype_codeinfo(io, Base.code_typed_opaque_closure(f, tt)[1]..., nargs; lineprinter)
         return nothing
     end
-    matches = Base._methods_by_ftype(Base.signature_type(f, t), #=lim=#-1, world)::Vector
-    for match in matches
+    tt = Base.signature_type(f, tt)
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    matches === nothing && Base.raise_match_failure(:code_warntype, tt)
+    for match in matches.matches
         match = match::Core.MethodMatch
-        (src, rettype) = Core.Compiler.typeinf_code(interp, match, optimize)
+        src = Core.Compiler.typeinf_code(interp, match, optimize)
         mi = Core.Compiler.specialize_method(match)
         mi.def isa Method && (nargs = (mi.def::Method).nargs)
         print_warntype_mi(io, mi)
-        print_warntype_codeinfo(io, src, rettype, nargs; lineprinter)
+        if src isa Core.CodeInfo
+            print_warntype_codeinfo(io, src, src.rettype, nargs; lineprinter)
+        else
+            println(io, "  inference not successful")
+        end
     end
     nothing
 end
@@ -208,7 +214,6 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
             Core.Compiler.hasintersect(typeof(f).parameters[1], tt) || (warning = OC_MISMATCH_WARNING)
         else
             mi = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), tt.parameters...}, Core.svec())
-            actual = isdispatchtuple(mi.specTypes)
             isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
         end
     end
@@ -222,15 +227,29 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
         if syntax !== :att && syntax !== :intel
             throw(ArgumentError("'syntax' must be either :intel or :att"))
         end
-        if dump_module
-            # we want module metadata, so use LLVM to generate assembly output
-            str = _dump_function_native_assembly(mi, world, wrapper, syntax, debuginfo, binary, raw, params)
-        else
-            # if we don't want the module metadata, just disassemble what our JIT has
+        str = ""
+        if !dump_module
+            # if we don't want the module metadata, attempt to disassemble what our JIT has
             str = _dump_function_native_disassembly(mi, world, wrapper, syntax, debuginfo, binary)
         end
+        if isempty(str)
+            # if that failed (or we want metadata), use LLVM to generate more accurate assembly output
+            if !isa(f, Core.OpaqueClosure)
+                src = Core.Compiler.typeinf_code(Core.Compiler.NativeInterpreter(world), mi, true)
+            else
+                src, rt = Base.get_oc_code_rt(f, tt, true)
+            end
+            src isa Core.CodeInfo || error("failed to infer source for $mi")
+            str = _dump_function_native_assembly(mi, src, wrapper, syntax, debuginfo, binary, raw, params)
+        end
     else
-        str = _dump_function_llvm(mi, world, wrapper, !raw, dump_module, optimize, debuginfo, params)
+        if !isa(f, Core.OpaqueClosure)
+            src = Core.Compiler.typeinf_code(Core.Compiler.NativeInterpreter(world), mi, true)
+        else
+            src, rt = Base.get_oc_code_rt(f, tt, true)
+        end
+        src isa Core.CodeInfo || error("failed to infer source for $mi")
+        str = _dump_function_llvm(mi, src, wrapper, !raw, dump_module, optimize, debuginfo, params)
     end
     str = warning * str
     return str
@@ -250,11 +269,11 @@ struct LLVMFDump
     f::Ptr{Cvoid} # opaque
 end
 
-function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
+function _dump_function_native_assembly(mi::Core.MethodInstance, src::Core.CodeInfo,
                                         wrapper::Bool, syntax::Symbol, debuginfo::Symbol,
                                         binary::Bool, raw::Bool, params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump},mi::Any, world::UInt, wrapper::Bool,
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, src::Any, wrapper::Bool,
                              true::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
     str = @ccall jl_dump_function_asm(llvmf_dump::Ptr{LLVMFDump}, false::Bool,
@@ -264,12 +283,12 @@ function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
 end
 
 function _dump_function_llvm(
-        mi::Core.MethodInstance, world::UInt, wrapper::Bool,
+        mi::Core.MethodInstance, src::Core.CodeInfo, wrapper::Bool,
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, world::UInt,
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, src::Any,
                              wrapper::Bool, optimize::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
     str = @ccall jl_dump_function_ir(llvmf_dump::Ptr{LLVMFDump}, strip_ir_metadata::Bool,
@@ -351,7 +370,7 @@ const llvm_types =
 const llvm_cond = r"^(?:[ou]?eq|[ou]?ne|[uso][gl][te]|ord|uno)$" # true|false
 
 function print_llvm_tokens(io, tokens)
-    m = match(r"^((?:[^\s:]+:)?)(\s*)(.*)", tokens)
+    m = match(r"^((?:[^\"\s:]+:|\"[^\"]*\":)?)(\s*)(.*)", tokens)
     if m !== nothing
         label, spaces, tokens = m.captures
         printstyled_ll(io, label, :label, spaces)
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 5b87dc0c57d40..6d1d75f1072ea 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -223,6 +223,9 @@ Edit a file or directory optionally providing a line number to edit the file at.
 Return to the `julia` prompt when you quit the editor. The editor can be changed
 by setting `JULIA_EDITOR`, `VISUAL` or `EDITOR` as an environment variable.
 
+!!! compat "Julia 1.9"
+    The `column` argument requires at least Julia 1.9.
+
 See also [`InteractiveUtils.define_editor`](@ref).
 """
 function edit(path::AbstractString, line::Integer=0, column::Integer=0)
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 939098464e148..bb56c47b4f9ca 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -106,6 +106,11 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
                        $(kws...))
             end
         elseif ex0.head === :call
+            if ex0.args[1] === :^ && length(ex0.args) >= 3 && isa(ex0.args[3], Int)
+                return Expr(:call, fcn, :(Base.literal_pow),
+                            Expr(:call, typesof, esc(ex0.args[1]), esc(ex0.args[2]),
+                                 esc(Val(ex0.args[3]))))
+            end
             return Expr(:call, fcn, esc(ex0.args[1]),
                         Expr(:call, typesof, map(esc, ex0.args[2:end])...),
                         kws...)
diff --git a/stdlib/InteractiveUtils/test/highlighting.jl b/stdlib/InteractiveUtils/test/highlighting.jl
index b72c9dbe72795..3531618e10dfc 100644
--- a/stdlib/InteractiveUtils/test/highlighting.jl
+++ b/stdlib/InteractiveUtils/test/highlighting.jl
@@ -131,6 +131,13 @@ const XU = B * "}" * XB
 
         @test highlight_llvm("L7:\t\t; preds = %top") ==
             "$(L)L7:$(XL)\t\t$(C); preds = %top$(XC)\n"
+
+        @test highlight_llvm("  %\"box::GenericMemoryRef13\" = add i64 0, 0") ==
+            "  $(V)%\"box::GenericMemoryRef13\"$(XV) $EQU " *
+            "$(I)add$(XI) $(T)i64$(XT) $(N)0$(XN)$COM $(N)0$(XN)\n"
+
+        @test highlight_llvm("  \"label-as-string\":\t\t; preds = %top") ==
+            "  $(L)\"label-as-string\":$(XL)\t\t$(C); preds = %top$(XC)\n"
     end
     @testset "define" begin
         @test highlight_llvm("define double @julia_func_1234(float) {") ==
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index 4b69e4c1f615d..424564b70384c 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -138,6 +138,11 @@ tag = "ANY"
 @test !warntype_hastag(ImportIntrinsics15819.sqrt15819, Tuple{Float64}, tag)
 @test !warntype_hastag(ImportIntrinsics15819.sqrt15819, Tuple{Float32}, tag)
 
+@testset "code_warntype OpaqueClosure" begin
+    g = Base.Experimental.@opaque Tuple{Float64}->_ x -> 0.0
+    @test warntype_hastag(g, Tuple{Float64}, "::Float64")
+end
+
 end # module WarnType
 
 # Adds test for PR #17636
@@ -279,6 +284,43 @@ let x..y = 0
     @test (@which 1..2).name === :..
 end
 
+# issue #53691
+let a = -1
+    @test (@which 2^a).name === :^
+    @test (@which 2^0x1).name === :^
+end
+
+let w = Vector{Any}(undef, 9)
+    @testset "@which x^literal" begin
+        w[1] = @which 2^0
+        w[2] = @which 2^1
+        w[3] = @which 2^2
+        w[4] = @which 2^3
+        w[5] = @which 2^-1
+        w[6] = @which 2^-2
+        w[7] = @which 2^10
+        w[8] = @which big(2.0)^1
+        w[9] = @which big(2.0)^-1
+        @test all(getproperty.(w, :name) .=== :literal_pow)
+        @test length(Set(w)) == length(w) # all methods distinct
+    end
+end
+
+# PR 53713
+if Int === Int64
+    # literal_pow only for exponents x: -2^63 <= x < 2^63 #53860 (all Int)
+    @test (@which 2^-9223372036854775809).name === :^
+    @test (@which 2^-9223372036854775808).name === :literal_pow
+    @test (@which 2^9223372036854775807).name === :literal_pow
+    @test (@which 2^9223372036854775808).name === :^
+elseif Int === Int32
+    # literal_pow only for exponents x: -2^31 <= x < 2^31 #53860 (all Int)
+    @test (@which 2^-2147483649).name === :^
+    @test (@which 2^-2147483648).name === :literal_pow
+    @test (@which 2^2147483647).name === :literal_pow
+    @test (@which 2^2147483648).name === :^
+end
+
 # issue #13464
 try
     @which x = 1
@@ -286,7 +328,6 @@ try
 catch err13464
     @test startswith(err13464.msg, "expression is not a function call")
 end
-
 module MacroTest
 export @macrotest
 macro macrotest(x::Int, y::Symbol) end
@@ -352,23 +393,40 @@ let errf = tempname(),
     new_stderr = open(errf, "w")
     try
         redirect_stderr(new_stderr)
+        @test occursin("f_broken_code", sprint(code_native, h_broken_code, ()))
+        Libc.flush_cstdio()
         println(new_stderr, "start")
         flush(new_stderr)
-        @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
+        @test_throws "could not compile the specified method" sprint(io -> code_native(io, f_broken_code, (), dump_module=true))
+        Libc.flush_cstdio()
+        println(new_stderr, "middle")
+        flush(new_stderr)
+        @test !isempty(sprint(io -> code_native(io, f_broken_code, (), dump_module=false)))
+        Libc.flush_cstdio()
+        println(new_stderr, "later")
+        flush(new_stderr)
+        @test invokelatest(g_broken_code) == 0
         Libc.flush_cstdio()
         println(new_stderr, "end")
         flush(new_stderr)
-        @eval @test g_broken_code() == 0
     finally
+        Libc.flush_cstdio()
         redirect_stderr(old_stderr)
         close(new_stderr)
         let errstr = read(errf, String)
             @test startswith(errstr, """start
-                end
                 Internal error: encountered unexpected error during compilation of f_broken_code:
                 ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
                 """) || errstr
-            @test !endswith(errstr, "\nend\n") || errstr
+            @test occursin("""\nmiddle
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test occursin("""\nlater
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test endswith(errstr, "\nend\n") || errstr
         end
         rm(errf)
     end
@@ -707,6 +765,9 @@ end
 @testset "code_llvm on opaque_closure" begin
     let ci = code_typed(+, (Int, Int))[1][1]
         ir = Core.Compiler.inflate_ir(ci)
+        ir.argtypes[1] = Tuple{}
+        @test ir.debuginfo.def === nothing
+        ir.debuginfo.def = Symbol(@__FILE__)
         oc = Core.OpaqueClosure(ir)
         @test (code_llvm(devnull, oc, Tuple{Int, Int}); true)
         let io = IOBuffer()
diff --git a/stdlib/JuliaSyntaxHighlighting.version b/stdlib/JuliaSyntaxHighlighting.version
index 4a819d056e70e..280db66afe5f9 100644
--- a/stdlib/JuliaSyntaxHighlighting.version
+++ b/stdlib/JuliaSyntaxHighlighting.version
@@ -1,4 +1,4 @@
 JULIASYNTAXHIGHLIGHTING_BRANCH = main
-JULIASYNTAXHIGHLIGHTING_SHA1 = 4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63
+JULIASYNTAXHIGHLIGHTING_SHA1 = b89dd99db56700c47434df6106b6c6afd1c9ed01
 JULIASYNTAXHIGHLIGHTING_GIT_URL := https://github.com/julialang/JuliaSyntaxHighlighting.jl.git
 JULIASYNTAXHIGHLIGHTING_TAR_URL = https://api.github.com/repos/julialang/JuliaSyntaxHighlighting.jl/tarball/$1
diff --git a/stdlib/LLD_jll/Project.toml b/stdlib/LLD_jll/Project.toml
index fccea03ebd80e..6a6cc72aa3c62 100644
--- a/stdlib/LLD_jll/Project.toml
+++ b/stdlib/LLD_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLD_jll"
 uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
-version = "16.0.6+4"
+version = "18.1.7+2"
 
 [deps]
 Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
@@ -10,7 +10,7 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.11"
-libLLVM_jll = "16.0.6"
+libLLVM_jll = "18.1.7"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl
index 55ccec9cc4005..9b8365dddcf0b 100644
--- a/stdlib/LLD_jll/src/LLD_jll.jl
+++ b/stdlib/LLD_jll/src/LLD_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LLD_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
index 36c24111d4d31..0cb0fe5440066 100644
--- a/stdlib/LLVMLibUnwind_jll/Project.toml
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLVMLibUnwind_jll"
 uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
-version = "12.0.1+0"
+version = "14.0.6+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
index 5c4026291a673..429e35b91d3f2 100644
--- a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
+++ b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LLVMLibUnwind_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LibCURL_jll/src/LibCURL_jll.jl b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
index cd67bfac0006a..3291c97d811cb 100644
--- a/stdlib/LibCURL_jll/src/LibCURL_jll.jl
+++ b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LibCURL_jll
 using Base, Libdl, nghttp2_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LibGit2/src/blame.jl b/stdlib/LibGit2/src/blame.jl
index 89071ea9c6f79..e441189bdd423 100644
--- a/stdlib/LibGit2/src/blame.jl
+++ b/stdlib/LibGit2/src/blame.jl
@@ -13,7 +13,7 @@ function GitBlame(repo::GitRepo, path::AbstractString; options::BlameOptions=Bla
     blame_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_blame_file, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{BlameOptions}),
-                   blame_ptr_ptr, repo.ptr, path, Ref(options))
+                   blame_ptr_ptr, repo, path, Ref(options))
     return GitBlame(repo, blame_ptr_ptr[])
 end
 
@@ -27,7 +27,7 @@ that function later.
 """
 function counthunks(blame::GitBlame)
     ensure_initialized()
-    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame.ptr)
+    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame)
 end
 
 function Base.getindex(blame::GitBlame, i::Integer)
diff --git a/stdlib/LibGit2/src/blob.jl b/stdlib/LibGit2/src/blob.jl
index 1941989b5f529..af1a16574b51e 100644
--- a/stdlib/LibGit2/src/blob.jl
+++ b/stdlib/LibGit2/src/blob.jl
@@ -2,7 +2,7 @@
 
 function Base.length(blob::GitBlob)
     ensure_initialized()
-    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob.ptr)
+    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob)
 end
 
 """
@@ -20,7 +20,7 @@ is binary and not valid Unicode.
 """
 function rawcontent(blob::GitBlob)
     ensure_initialized()
-    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob.ptr)
+    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob)
     copy(unsafe_wrap(Array, ptr, (length(blob),), own = false))
 end
 
@@ -47,7 +47,7 @@ the first 8000 bytes.
 """
 function isbinary(blob::GitBlob)
     ensure_initialized()
-    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob.ptr)
+    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob)
     return bin_flag == 1
 end
 
@@ -69,7 +69,7 @@ function addblob!(repo::GitRepo, path::AbstractString)
     id_ref = Ref{GitHash}()
     @check ccall((:git_blob_create_from_disk, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                 id_ref, repo.ptr, path)
+                 id_ref, repo, path)
     return id_ref[]
 end
 
diff --git a/stdlib/LibGit2/src/commit.jl b/stdlib/LibGit2/src/commit.jl
index ceb56ee45d3b7..d76a31791e4c4 100644
--- a/stdlib/LibGit2/src/commit.jl
+++ b/stdlib/LibGit2/src/commit.jl
@@ -73,16 +73,18 @@ function commit(repo::GitRepo,
     ensure_initialized()
     commit_id_ptr = Ref(GitHash())
     nparents = length(parents)
-    parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
-    @check ccall((:git_commit_create, libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
-                  Ptr{SignatureStruct}, Ptr{SignatureStruct},
-                  Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
-                  Csize_t, Ptr{Ptr{Cvoid}}),
-                 commit_id_ptr, repo.ptr, isempty(refname) ? C_NULL : refname,
-                 author.ptr, committer.ptr,
-                 C_NULL, msg, tree.ptr,
-                 nparents, nparents > 0 ? parentptrs : C_NULL)
+    GC.@preserve parents begin
+        parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
+        @check ccall((:git_commit_create, libgit2), Cint,
+                     (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
+                      Ptr{SignatureStruct}, Ptr{SignatureStruct},
+                      Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
+                      Csize_t, Ptr{Ptr{Cvoid}}),
+                     commit_id_ptr, repo, isempty(refname) ? C_NULL : refname,
+                     author, committer,
+                     C_NULL, msg, tree,
+                     nparents, nparents > 0 ? parentptrs : C_NULL)
+    end
     return commit_id_ptr[]
 end
 
diff --git a/stdlib/LibGit2/src/config.jl b/stdlib/LibGit2/src/config.jl
index affe881abde08..0bee705259ca6 100644
--- a/stdlib/LibGit2/src/config.jl
+++ b/stdlib/LibGit2/src/config.jl
@@ -35,7 +35,7 @@ function GitConfig(repo::GitRepo)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_repository_config, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo)
     return GitConfig(repo, cfg_ptr_ptr[])
 end
 
@@ -58,7 +58,7 @@ function GitConfig(level::Consts.GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT)
         try
             @check ccall((:git_config_open_level, libgit2), Cint,
                          (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint),
-                          glb_cfg_ptr_ptr, cfg.ptr, Cint(level))
+                          glb_cfg_ptr_ptr, cfg, Cint(level))
             cfg = GitConfig(glb_cfg_ptr_ptr[])
         finally
             close(tmpcfg)
@@ -91,13 +91,13 @@ function addfile(cfg::GitConfig, path::AbstractString,
     ensure_initialized()
     @static if LibGit2.VERSION >= v"0.27.0"
         @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Ptr{Cvoid}, Cint),
-                     cfg.ptr, path, Cint(level), isa(repo, GitRepo) ? repo.ptr : C_NULL, Cint(force))
+                     (Ptr{Cvoid}, Cstring, Cint, Ptr{Cvoid}, Cint),
+                     cfg, path, Cint(level), isa(repo, GitRepo) ? repo : C_NULL, Cint(force))
     else
         repo === nothing || error("repo argument is not supported in this version of LibGit2")
         @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Cint),
-                     cfg.ptr, path, Cint(level), Cint(force))
+                     (Ptr{Cvoid}, Cstring, Cint, Cint),
+                     cfg, path, Cint(level), Cint(force))
     end
 end
 
@@ -105,7 +105,7 @@ function get(::Type{<:AbstractString}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     buf_ref = Ref(Buffer())
     @check ccall((:git_config_get_string_buf, libgit2), Cint,
-                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c.ptr, name)
+                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c, name)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -116,7 +116,7 @@ function get(::Type{Bool}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
     @check ccall((:git_config_get_bool, libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return Bool(val_ptr[])
 end
 
@@ -124,7 +124,7 @@ function get(::Type{Int32}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
     @check ccall((:git_config_get_int32, libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
@@ -132,7 +132,7 @@ function get(::Type{Int64}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cintmax_t(0))
     @check ccall((:git_config_get_int64, libgit2), Cint,
-          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
@@ -165,33 +165,33 @@ end
 function set!(c::GitConfig, name::AbstractString, value::AbstractString)
     ensure_initialized()
     @check ccall((:git_config_set_string, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cstring), c.ptr, name, value)
+                  (Ptr{Cvoid}, Cstring, Cstring), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Bool)
     ensure_initialized()
     bval = Int32(value)
     @check ccall((:git_config_set_bool, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, bval)
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, bval)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int32)
     ensure_initialized()
     @check ccall((:git_config_set_int32, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, value)
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int64)
     ensure_initialized()
     @check ccall((:git_config_set_int64, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cintmax_t), c.ptr, name, value)
+                  (Ptr{Cvoid}, Cstring, Cintmax_t), c, name, value)
 end
 
 function GitConfigIter(cfg::GitConfig)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_iterator_new, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -200,7 +200,7 @@ function GitConfigIter(cfg::GitConfig, name::AbstractString)
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, C_NULL)
+                  ci_ptr, cfg, name, C_NULL)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -209,7 +209,7 @@ function GitConfigIter(cfg::GitConfig, name::AbstractString, value::Regex)
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, value.pattern)
+                  ci_ptr, cfg, name, value.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -218,7 +218,7 @@ function GitConfigIter(cfg::GitConfig, name::Regex)
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_iterator_glob_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                  ci_ptr, cfg.ptr, name.pattern)
+                  ci_ptr, cfg, name.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -226,7 +226,7 @@ function Base.iterate(ci::GitConfigIter, state=nothing)
     ensure_initialized()
     entry_ptr_ptr = Ref{Ptr{ConfigEntry}}(C_NULL)
     err = ccall((:git_config_next, libgit2), Cint,
-                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci.ptr)
+                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci)
     if err == Cint(Error.GIT_OK)
         return (unsafe_load(entry_ptr_ptr[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/consts.jl b/stdlib/LibGit2/src/consts.jl
index 5a5b8b74e7127..8c140e8c2aa30 100644
--- a/stdlib/LibGit2/src/consts.jl
+++ b/stdlib/LibGit2/src/consts.jl
@@ -417,7 +417,32 @@ Option flags for `GitRepo`.
                    FEATURE_SSH     = Cuint(1 << 2),
                    FEATURE_NSEC    = Cuint(1 << 3))
 
-if version() >= v"0.24.0"
+if version() >= v"1.8.0"
+    @doc """
+    Priority level of a config file.
+
+    These priority levels correspond to the natural escalation logic (from higher to lower) when searching for config entries in git.
+
+    * `CONFIG_LEVEL_DEFAULT` - Open the global, XDG and system configuration files if any available.
+    * `CONFIG_LEVEL_PROGRAMDATA` - System-wide on Windows, for compatibility with portable git
+    * `CONFIG_LEVEL_SYSTEM` - System-wide configuration file; `/etc/gitconfig` on Linux systems
+    * `CONFIG_LEVEL_XDG` - XDG compatible configuration file; typically `~/.config/git/config`
+    * `CONFIG_LEVEL_GLOBAL` - User-specific configuration file (also called Global configuration file); typically `~/.gitconfig`
+    * `CONFIG_LEVEL_LOCAL` - Repository specific configuration file; `\$WORK_DIR/.git/config` on non-bare repos
+    * `CONFIG_LEVEL_WORKTREE` - Worktree specific configuration file; `\$GIT_DIR/config.worktree`
+    * `CONFIG_LEVEL_APP` - Application specific configuration file; freely defined by applications
+    * `CONFIG_HIGHEST_LEVEL` - Represents the highest level available config file (i.e. the most specific config file available that actually is loaded)
+    """
+    @enum(GIT_CONFIG, CONFIG_LEVEL_DEFAULT     = 0,
+                      CONFIG_LEVEL_PROGRAMDATA = 1,
+                      CONFIG_LEVEL_SYSTEM      = 2,
+                      CONFIG_LEVEL_XDG         = 3,
+                      CONFIG_LEVEL_GLOBAL      = 4,
+                      CONFIG_LEVEL_LOCAL       = 5,
+                      CONFIG_LEVEL_WORKTREE    = 6,
+                      CONFIG_LEVEL_APP         = 7,
+                      CONFIG_HIGHEST_LEVEL     =-1)
+elseif version() >= v"0.24.0"
     @doc """
     Priority level of a config file.
 
diff --git a/stdlib/LibGit2/src/diff.jl b/stdlib/LibGit2/src/diff.jl
index 044c6331dc1f1..a3f2cafe62e96 100644
--- a/stdlib/LibGit2/src/diff.jl
+++ b/stdlib/LibGit2/src/diff.jl
@@ -29,11 +29,11 @@ function diff_tree(repo::GitRepo, tree::GitTree, pathspecs::AbstractString=""; c
     if cached
         @check ccall((:git_diff_tree_to_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
     else
         @check ccall((:git_diff_tree_to_workdir_with_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, isempty(pathspecs) ? C_NULL : pathspecs)
     end
     return GitDiff(repo, diff_ptr_ptr[])
 end
@@ -53,7 +53,7 @@ function diff_tree(repo::GitRepo, oldtree::GitTree, newtree::GitTree)
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_diff_tree_to_tree, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                   diff_ptr_ptr, repo.ptr, oldtree.ptr, newtree.ptr, C_NULL)
+                   diff_ptr_ptr, repo, oldtree, newtree, C_NULL)
     return GitDiff(repo, diff_ptr_ptr[])
 end
 
@@ -69,7 +69,7 @@ function GitDiffStats(diff::GitDiff)
     diff_stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_diff_get_stats, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}),
-                  diff_stat_ptr_ptr, diff.ptr)
+                  diff_stat_ptr_ptr, diff)
     return GitDiffStats(diff.owner, diff_stat_ptr_ptr[])
 end
 
@@ -83,7 +83,7 @@ are to be included or not).
 """
 function files_changed(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
@@ -96,7 +96,7 @@ are to be included or not).
 """
 function insertions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
@@ -109,12 +109,12 @@ are to be included or not).
 """
 function deletions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 function count(diff::GitDiff)
     ensure_initialized()
-    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff.ptr)
+    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff)
 end
 
 function Base.getindex(diff::GitDiff, i::Integer)
@@ -122,10 +122,12 @@ function Base.getindex(diff::GitDiff, i::Integer)
         throw(BoundsError(diff, (i,)))
     end
     ensure_initialized()
-    delta_ptr = ccall((:git_diff_get_delta, libgit2),
-                      Ptr{DiffDelta},
-                      (Ptr{Cvoid}, Csize_t), diff.ptr, i-1)
-    return unsafe_load(delta_ptr)
+    GC.@preserve diff begin # preserve `diff` object until return of `unsafe_load`
+        delta_ptr = ccall((:git_diff_get_delta, libgit2),
+                          Ptr{DiffDelta},
+                          (Ptr{Cvoid}, Csize_t), diff, i-1)
+        return unsafe_load(delta_ptr)
+    end
 end
 
 function Base.show(io::IO, diff_stat::GitDiffStats)
diff --git a/stdlib/LibGit2/src/index.jl b/stdlib/LibGit2/src/index.jl
index 15e04d16b5756..81e8e75d59585 100644
--- a/stdlib/LibGit2/src/index.jl
+++ b/stdlib/LibGit2/src/index.jl
@@ -9,7 +9,7 @@ function GitIndex(repo::GitRepo)
     ensure_initialized()
     idx_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_repository_index, libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo.ptr)
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo)
     return GitIndex(repo, idx_ptr_ptr[])
 end
 
@@ -25,7 +25,7 @@ has changed since the last time it was loaded into `idx`.
 """
 function read!(idx::GitIndex, force::Bool = false)
     ensure_initialized()
-    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx.ptr, Cint(force))
+    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx, Cint(force))
     return idx
 end
 
@@ -36,7 +36,7 @@ Write the state of index `idx` to disk using a file lock.
 """
 function write!(idx::GitIndex)
     ensure_initialized()
-    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx.ptr)
+    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx)
     return idx
 end
 
@@ -52,7 +52,7 @@ function write_tree!(idx::GitIndex)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
     @check ccall((:git_index_write_tree, libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx.ptr)
+                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx)
     return oid_ptr[]
 end
 
@@ -74,7 +74,7 @@ Read the tree `tree` (or the tree pointed to by `treehash` in the repository own
 function read_tree!(idx::GitIndex, tree::GitTree)
     ensure_initialized()
     @check ccall((:git_index_read_tree, libgit2), Cint,
-                 (Ptr{Cvoid}, Ptr{Cvoid}), idx.ptr, tree.ptr)
+                 (Ptr{Cvoid}, Ptr{Cvoid}), idx, tree)
 end
 read_tree!(idx::GitIndex, hash::AbstractGitHash) =
     read_tree!(idx, GitTree(repository(idx), hash))
@@ -106,7 +106,7 @@ function add!(idx::GitIndex, files::AbstractString...;
     ensure_initialized()
     @check ccall((:git_index_add_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), flags, C_NULL, C_NULL)
+                 idx, collect(files), flags, C_NULL, C_NULL)
 end
 
 """
@@ -122,7 +122,7 @@ function update!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
     @check ccall((:git_index_update_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 """
@@ -136,7 +136,7 @@ function remove!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
     @check ccall((:git_index_remove_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 function add!(repo::GitRepo, files::AbstractString...;
@@ -173,7 +173,7 @@ end
 
 function count(idx::GitIndex)
     ensure_initialized()
-    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx.ptr)
+    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx)
 end
 
 function Base.getindex(idx::GitIndex, i::Integer)
@@ -192,7 +192,7 @@ function Base.findall(path::String, idx::GitIndex)
     ensure_initialized()
     pos_ref = Ref{Csize_t}(0)
     ret = ccall((:git_index_find, libgit2), Cint,
-                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx.ptr, path)
+                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx, path)
     ret == Cint(Error.ENOTFOUND) && return nothing
     return pos_ref[]+1
 end
diff --git a/stdlib/LibGit2/src/merge.jl b/stdlib/LibGit2/src/merge.jl
index 7c946315fdd86..8bd8d1e4b64e9 100644
--- a/stdlib/LibGit2/src/merge.jl
+++ b/stdlib/LibGit2/src/merge.jl
@@ -18,7 +18,7 @@ function GitAnnotated(repo::GitRepo, commit_id::GitHash)
     ann_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_annotated_commit_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}),
-                   ann_ptr_ptr, repo.ptr, Ref(commit_id))
+                   ann_ptr_ptr, repo, Ref(commit_id))
     return GitAnnotated(repo, ann_ptr_ptr[])
 end
 
@@ -27,7 +27,7 @@ function GitAnnotated(repo::GitRepo, ref::GitReference)
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_annotated_commit_from_ref, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
-                   ann_ref_ref, repo.ptr, ref.ptr)
+                   ann_ref_ref, repo, ref)
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
@@ -36,7 +36,7 @@ function GitAnnotated(repo::GitRepo, fh::FetchHead)
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_annotated_commit_from_fetchhead, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Ptr{GitHash}),
-                   ann_ref_ref, repo.ptr, fh.name, fh.url, Ref(fh.oid))
+                   ann_ref_ref, repo, fh.name, fh.url, Ref(fh.oid))
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
@@ -88,9 +88,11 @@ function merge_analysis(repo::GitRepo, anns::Vector{GitAnnotated})
     preference = Ref{Cint}(0)
     anns_ref = Ref(Base.map(a->a.ptr, anns), 1)
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge_analysis, libgit2), Cint,
-                  (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
-                   analysis, preference, repo.ptr, anns_ref, anns_size)
+    GC.@preserve anns begin
+        @check ccall((:git_merge_analysis, libgit2), Cint,
+                     (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
+                     analysis, preference, repo, anns_ref, anns_size)
+    end
     return analysis[], preference[]
 end
 
@@ -147,11 +149,13 @@ function merge!(repo::GitRepo, anns::Vector{GitAnnotated};
                 checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge, libgit2), Cint,
-                  (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
-                   Ptr{MergeOptions}, Ptr{CheckoutOptions}),
-                   repo.ptr, Base.map(x->x.ptr, anns), anns_size,
-                   Ref(merge_opts), Ref(checkout_opts))
+    GC.@preserve anns begin
+        @check ccall((:git_merge, libgit2), Cint,
+                     (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
+                      Ptr{MergeOptions}, Ptr{CheckoutOptions}),
+                     repo, Base.map(x->x.ptr, anns), anns_size,
+                     Ref(merge_opts), Ref(checkout_opts))
+    end
     @info "Review and commit merged changes"
     return true
 end
@@ -263,7 +267,7 @@ function merge_base(repo::GitRepo, one::AbstractString, two::AbstractString)
     moid = try
         @check ccall((:git_merge_base, libgit2), Cint,
                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{GitHash}, Ptr{GitHash}),
-                moid_ptr, repo.ptr, oid1_ptr, oid2_ptr)
+                moid_ptr, repo, oid1_ptr, oid2_ptr)
         moid_ptr[]
     catch e
         GitHash()
diff --git a/stdlib/LibGit2/src/oid.jl b/stdlib/LibGit2/src/oid.jl
index be4944791f55c..fae0d3737a429 100644
--- a/stdlib/LibGit2/src/oid.jl
+++ b/stdlib/LibGit2/src/oid.jl
@@ -133,7 +133,7 @@ function GitHash(repo::GitRepo, ref_name::AbstractString)
     oid_ptr  = Ref(GitHash())
     @check ccall((:git_reference_name_to_id, libgit2), Cint,
                     (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                     oid_ptr, repo.ptr, ref_name)
+                     oid_ptr, repo, ref_name)
     return oid_ptr[]
 end
 
@@ -144,7 +144,7 @@ Get the identifier (`GitHash`) of `obj`.
 """
 function GitHash(obj::GitObject)
     ensure_initialized()
-    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj.ptr))
+    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj))
 end
 
 ==(obj1::GitObject, obj2::GitObject) = GitHash(obj1) == GitHash(obj2)
@@ -160,7 +160,7 @@ function GitShortHash(obj::GitObject)
     ensure_initialized()
     buf_ref = Ref(Buffer())
     @check ccall((:git_object_short_id, libgit2), Cint,
-                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj.ptr)
+                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj)
     sid = GitShortHash(buf_ref[])
     free(buf_ref)
     return sid
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index b36c2f3f475cf..e4abf5a85cc92 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -8,14 +8,14 @@ function GitRebase(repo::GitRepo, branch::GitAnnotated, upstream::GitAnnotated;
     @check ccall((:git_rebase_init, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
                    Ptr{Cvoid}, Ptr{RebaseOptions}),
-                   rebase_ptr_ptr, repo.ptr, branch.ptr, upstream.ptr,
-                   onto === nothing ? C_NULL : onto.ptr, Ref(opts))
+                   rebase_ptr_ptr, repo, branch, upstream,
+                   onto === nothing ? C_NULL : onto, Ref(opts))
     return GitRebase(repo, rebase_ptr_ptr[])
 end
 
 function count(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 """
@@ -28,7 +28,7 @@ has not yet been called or iteration over `rb` has not yet begun), return
 """
 function current(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 function Base.getindex(rb::GitRebase, i::Integer)
@@ -80,7 +80,7 @@ function commit(rb::GitRebase, sig::GitSignature)
     try
         @check ccall((:git_rebase_commit, libgit2), Error.Code,
                      (Ptr{GitHash}, Ptr{Cvoid}, Ptr{SignatureStruct}, Ptr{SignatureStruct}, Ptr{UInt8}, Ptr{UInt8}),
-                      oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
+                      oid_ptr, rb, C_NULL, sig, C_NULL, C_NULL)
     catch err
         # TODO: return current HEAD instead
         err isa GitError && err.code === Error.EAPPLIED && return nothing
@@ -101,7 +101,7 @@ rebase had completed), and `-1` for other errors.
 function abort(rb::GitRebase)
     ensure_initialized()
     return ccall((:git_rebase_abort, libgit2), Csize_t,
-                      (Ptr{Cvoid},), rb.ptr)
+                      (Ptr{Cvoid},), rb)
 end
 
 """
@@ -115,5 +115,5 @@ function finish(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
     return ccall((:git_rebase_finish, libgit2), Csize_t,
                   (Ptr{Cvoid}, Ptr{SignatureStruct}),
-                   rb.ptr, sig.ptr)
+                   rb, sig)
 end
diff --git a/stdlib/LibGit2/src/reference.jl b/stdlib/LibGit2/src/reference.jl
index 9f849ed01a00f..8a9bc5cf1a6de 100644
--- a/stdlib/LibGit2/src/reference.jl
+++ b/stdlib/LibGit2/src/reference.jl
@@ -5,7 +5,7 @@ function GitReference(repo::GitRepo, refname::AbstractString)
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname)
+                   ref_ptr_ptr, repo, refname)
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -15,7 +15,7 @@ function GitReference(repo::GitRepo, obj_oid::GitHash, refname::AbstractString =
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{GitHash}, Cint, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname, Ref(obj_oid), Cint(force),
+                   ref_ptr_ptr, repo, refname, Ref(obj_oid), Cint(force),
                    isempty(msg) ? C_NULL : msg)
     return GitReference(repo, ref_ptr_ptr[])
 end
@@ -29,7 +29,7 @@ to this branch will have no parents.
 function isorphan(repo::GitRepo)
     ensure_initialized()
     r = @check ccall((:git_repository_head_unborn, libgit2), Cint,
-                     (Ptr{Cvoid},), repo.ptr)
+                     (Ptr{Cvoid},), repo)
     r != 0
 end
 
@@ -42,7 +42,7 @@ function head(repo::GitRepo)
     ensure_initialized()
     head_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_repository_head, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo)
     return GitReference(repo, head_ptr_ptr[])
 end
 
@@ -68,7 +68,7 @@ function shortname(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -85,7 +85,7 @@ Return a `Cint` corresponding to the type of `ref`:
 """
 function reftype(ref::GitReference)
     ensure_initialized()
-    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref.ptr)
+    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref)
 end
 
 """
@@ -139,7 +139,7 @@ function ishead(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_branch_is_head, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -147,7 +147,7 @@ function isbranch(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_reference_is_branch, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -155,7 +155,7 @@ function istag(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_reference_is_tag, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -163,7 +163,7 @@ function isremote(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_reference_is_remote, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -200,7 +200,7 @@ function peel(::Type{T}, ref::GitReference) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_peel, libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref.ptr, Consts.OBJECT(T))
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref, Consts.OBJECT(T))
     return T(ref.owner, obj_ptr_ptr[])
 end
 peel(ref::GitReference) = peel(GitObject, ref)
@@ -214,7 +214,7 @@ function ref_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_reference_list, libgit2), Cint,
-                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
+                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -237,7 +237,7 @@ function create_branch(repo::GitRepo,
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_branch_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Cint),
-                   ref_ptr_ptr, repo.ptr, bname, commit_obj.ptr, Cint(force))
+                   ref_ptr_ptr, repo, bname, commit_obj, Cint(force))
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -248,7 +248,7 @@ Delete the branch pointed to by `branch`.
 """
 function delete_branch(branch::GitReference)
     ensure_initialized()
-    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch.ptr)
+    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch)
 end
 
 """
@@ -260,7 +260,7 @@ function head!(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ref_name = name(ref)
     @check ccall((:git_repository_set_head, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, ref_name)
+                  (Ptr{Cvoid}, Cstring), repo, ref_name)
     return ref
 end
 
@@ -282,7 +282,7 @@ function lookup_branch(repo::GitRepo,
     branch_type = remote ? Consts.BRANCH_REMOTE : Consts.BRANCH_LOCAL
     err = ccall((:git_branch_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Cint),
-                  ref_ptr_ptr, repo.ptr, branch_name, branch_type)
+                  ref_ptr_ptr, repo, branch_name, branch_type)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -308,7 +308,7 @@ function upstream(ref::GitReference)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     err = ccall((:git_branch_upstream, libgit2), Cint,
-                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref.ptr)
+                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -328,7 +328,7 @@ function target!(ref::GitReference, new_oid::GitHash; msg::AbstractString="")
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_set_target, libgit2), Cint,
              (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Cstring),
-             ref_ptr_ptr, ref.ptr, Ref(new_oid), isempty(msg) ? C_NULL : msg)
+             ref_ptr_ptr, ref, Ref(new_oid), isempty(msg) ? C_NULL : msg)
     return GitReference(ref.owner, ref_ptr_ptr[])
 end
 
@@ -336,7 +336,7 @@ function GitBranchIter(repo::GitRepo, flags::Cint=Cint(Consts.BRANCH_LOCAL))
     ensure_initialized()
     bi_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_branch_iterator_new, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo.ptr, flags)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo, flags)
     return GitBranchIter(repo, bi_ptr[])
 end
 
@@ -346,7 +346,7 @@ function Base.iterate(bi::GitBranchIter, state=nothing)
     btype = Ref{Cint}()
     err = ccall((:git_branch_next, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cint}, Ptr{Cvoid}),
-                  ref_ptr_ptr, btype, bi.ptr)
+                  ref_ptr_ptr, btype, bi)
     if err == Cint(Error.GIT_OK)
         return ((GitReference(bi.owner, ref_ptr_ptr[]), btype[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/remote.jl b/stdlib/LibGit2/src/remote.jl
index 07afecebfd373..5081eff56dd46 100644
--- a/stdlib/LibGit2/src/remote.jl
+++ b/stdlib/LibGit2/src/remote.jl
@@ -16,7 +16,7 @@ function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractStr
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_create, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -39,7 +39,7 @@ function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractStr
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url, fetch_spec)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url, fetch_spec)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -59,7 +59,7 @@ function GitRemoteAnon(repo::GitRepo, url::AbstractString)
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_create_anonymous, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, url)
+                rmt_ptr_ptr, repo, url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -95,7 +95,7 @@ function lookup_remote(repo::GitRepo, remote_name::AbstractString)
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     err = ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, remote_name)
+                rmt_ptr_ptr, repo, remote_name)
     if err == Int(Error.GIT_OK)
         return GitRemote(repo, rmt_ptr_ptr[])
     elseif err == Int(Error.ENOTFOUND)
@@ -110,7 +110,7 @@ function get(::Type{GitRemote}, repo::GitRepo, rmt_name::AbstractString)
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name)
+                rmt_ptr_ptr, repo, rmt_name)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -133,9 +133,11 @@ julia> LibGit2.url(remote)
 """
 function url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -157,9 +159,11 @@ julia> LibGit2.push_url(LibGit2.get(LibGit2.GitRemote, repo, "origin"))
 """
 function push_url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -183,9 +187,11 @@ julia> name(remote)
 """
 function name(rmt::GitRemote)
     ensure_initialized()
-    name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    name_ptr == C_NULL && return ""
-    return unsafe_string(name_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        name_ptr == C_NULL && return ""
+        return unsafe_string(name_ptr)
+    end
 end
 
 """
@@ -208,7 +214,7 @@ function fetch_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_remote_get_fetch_refspecs, libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -238,7 +244,7 @@ function push_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_remote_get_push_refspecs, libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -261,7 +267,7 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 function add_fetch!(repo::GitRepo, rmt::GitRemote, fetch_spec::String)
     ensure_initialized()
     @check ccall((:git_remote_add_fetch, libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), fetch_spec)
 end
 
@@ -290,7 +296,7 @@ String["refs/heads/master"]
 function add_push!(repo::GitRepo, rmt::GitRemote, push_spec::String)
     ensure_initialized()
     @check ccall((:git_remote_add_push, libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), push_spec)
 end
 
@@ -311,7 +317,7 @@ function fetch(rmt::GitRemote, refspecs::Vector{<:AbstractString};
     msg = "libgit2.fetch: $msg"
     @check ccall((:git_remote_fetch, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{FetchOptions}, Cstring),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
 end
 
 """
@@ -336,7 +342,7 @@ function push(rmt::GitRemote, refspecs::Vector{<:AbstractString};
     ensure_initialized()
     @check ccall((:git_remote_push, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{PushOptions}),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
 end
 
 """
@@ -348,7 +354,7 @@ function remote_delete(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
     @check ccall((:git_remote_delete, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring),
-                 repo.ptr, remote_name)
+                 repo, remote_name)
 end
 
 Base.show(io::IO, rmt::GitRemote) = print(io, "GitRemote:\nRemote name: ", name(rmt), " url: ", url(rmt))
@@ -367,7 +373,7 @@ function set_remote_fetch_url(repo::GitRepo, remote_name::AbstractString, url::A
     ensure_initialized()
     @check ccall((:git_remote_set_url, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_fetch_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -390,7 +396,7 @@ function set_remote_push_url(repo::GitRepo, remote_name::AbstractString, url::Ab
     ensure_initialized()
     @check ccall((:git_remote_set_pushurl, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_push_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -432,7 +438,7 @@ function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION,
                  callbacks::RemoteCallbacks)
     @check ccall((:git_remote_connect, libgit2),
                  Cint, (Ptr{Cvoid}, Cint, Ref{RemoteCallbacks}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 rmt.ptr, direction, callbacks, C_NULL, C_NULL)
+                 rmt, direction, callbacks, C_NULL, C_NULL)
     return rmt
 end
 
@@ -442,7 +448,7 @@ end
 Check whether the remote is connected
 """
 function connected(rmt::GitRemote)
-    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt.ptr) != 0
+    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt) != 0
 end
 
 """
@@ -451,7 +457,7 @@ end
 Close the connection to the remote.
 """
 function disconnect(rmt::GitRemote)
-    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt.ptr)
+    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt)
     return
 end
 
@@ -465,7 +471,7 @@ This function must only be called after connecting (See [`connect`](@ref)).
 function default_branch(rmt::GitRemote)
     buf_ref = Ref(Buffer())
     @check ccall((:git_remote_default_branch, libgit2), Cint,
-                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt.ptr)
+                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -484,7 +490,7 @@ function ls(rmt::GitRemote)
     head_refs = Ref{Ptr{Ptr{_GitRemoteHead}}}()
     @check ccall((:git_remote_ls, libgit2), Cint,
                  (Ptr{Ptr{Ptr{_GitRemoteHead}}}, Ptr{Csize_t}, Ptr{Cvoid}),
-                 head_refs, nheads, rmt.ptr)
+                 head_refs, nheads, rmt)
     head_ptr = head_refs[]
     return [GitRemoteHead(unsafe_load(unsafe_load(head_ptr, i)))
             for i in 1:nheads[]]
diff --git a/stdlib/LibGit2/src/repository.jl b/stdlib/LibGit2/src/repository.jl
index 8297ae92a6a00..192a6870f639b 100644
--- a/stdlib/LibGit2/src/repository.jl
+++ b/stdlib/LibGit2/src/repository.jl
@@ -32,7 +32,7 @@ end
 function cleanup(r::GitRepo)
     if r.ptr != C_NULL
         ensure_initialized()
-        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r.ptr)
+        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r)
     end
 end
 
@@ -97,7 +97,7 @@ tree, and no tracking information for remote branches or configurations is prese
 function isbare(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo.ptr) == 1
+    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo) == 1
 end
 
 """
@@ -109,7 +109,7 @@ Determine if `repo` is detached - that is, whether its HEAD points to a commit
 function isattached(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo.ptr) != 1
+    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo) != 1
 end
 
 @doc """
@@ -140,13 +140,20 @@ function (::Type{T})(repo::GitRepo, spec::AbstractString) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
     @check ccall((:git_revparse_single, libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo.ptr, spec)
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo, spec)
+    obj_ptr = obj_ptr_ptr[]
     # check object is of correct type
     if T != GitObject && T != GitUnknownObject
-        t = Consts.OBJECT(obj_ptr_ptr[])
-        t == Consts.OBJECT(T) || throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        t = Consts.OBJECT(obj_ptr)
+        if t != Consts.OBJECT(T)
+            if obj_ptr != C_NULL
+                # free result
+                ccall((:git_object_free, libgit2), Cvoid, (Ptr{Cvoid},), obj_ptr)
+            end
+            throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        end
     end
-    return T(repo, obj_ptr_ptr[])
+    return T(repo, obj_ptr)
 end
 
 function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
@@ -157,7 +164,7 @@ function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
     @assert repo.ptr != C_NULL
     @check ccall((:git_object_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -169,7 +176,7 @@ function (::Type{T})(repo::GitRepo, oid::GitShortHash) where T<:GitObject
     @assert repo.ptr != C_NULL
     @check ccall((:git_object_lookup_prefix, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Csize_t, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, oid.len, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, oid.len, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -190,8 +197,10 @@ See also [`workdir`](@ref), [`path`](@ref).
 function gitdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
-                        (Ptr{Cvoid},), repo.ptr))
+    GC.@preserve repo begin
+        return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
+                                   (Ptr{Cvoid},), repo))
+    end
 end
 
 """
@@ -211,10 +220,12 @@ See also [`gitdir`](@ref), [`path`](@ref).
 function workdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    sptr = ccall((:git_repository_workdir, libgit2), Cstring,
-                (Ptr{Cvoid},), repo.ptr)
-    sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
-    return unsafe_string(sptr)
+    GC.@preserve repo begin
+        sptr = ccall((:git_repository_workdir, libgit2), Cstring,
+                     (Ptr{Cvoid},), repo)
+        sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
+        return unsafe_string(sptr)
+    end
 end
 
 """
@@ -256,7 +267,7 @@ function peel(::Type{T}, obj::GitObject) where T<:GitObject
     new_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @check ccall((:git_object_peel, libgit2), Cint,
-                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj.ptr, Consts.OBJECT(T))
+                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj, Consts.OBJECT(T))
 
     return T(obj.owner, new_ptr_ptr[])
 end
@@ -287,7 +298,7 @@ function GitDescribeResult(committish::GitObject;
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_describe_commit, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, committish.ptr, Ref(options))
+                 result_ptr_ptr, committish, Ref(options))
     return GitDescribeResult(committish.owner, result_ptr_ptr[])
 end
 
@@ -314,7 +325,7 @@ function GitDescribeResult(repo::GitRepo; options::DescribeOptions=DescribeOptio
     @assert repo.ptr != C_NULL
     @check ccall((:git_describe_workdir, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, repo.ptr, Ref(options))
+                 result_ptr_ptr, repo, Ref(options))
     return GitDescribeResult(repo, result_ptr_ptr[])
 end
 
@@ -331,7 +342,7 @@ function format(result::GitDescribeResult; options::DescribeFormatOptions=Descri
     buf_ref = Ref(Buffer())
     @check ccall((:git_describe_format, libgit2), Cint,
                  (Ptr{Buffer}, Ptr{Cvoid}, Ptr{DescribeFormatOptions}),
-                 buf_ref, result.ptr, Ref(options))
+                 buf_ref, result, Ref(options))
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -357,7 +368,7 @@ function checkout_tree(repo::GitRepo, obj::GitObject;
     @assert repo.ptr != C_NULL
     @check ccall((:git_checkout_tree, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, obj.ptr, Ref(options))
+                 repo, obj, Ref(options))
 end
 
 """
@@ -373,8 +384,8 @@ function checkout_index(repo::GitRepo, idx::Union{GitIndex, Nothing} = nothing;
     @assert repo.ptr != C_NULL
     @check ccall((:git_checkout_index, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr,
-                 idx === nothing ? C_NULL : idx.ptr,
+                 repo,
+                 idx === nothing ? C_NULL : idx,
                  Ref(options))
 end
 
@@ -393,7 +404,7 @@ function checkout_head(repo::GitRepo; options::CheckoutOptions = CheckoutOptions
     @assert repo.ptr != C_NULL
     @check ccall((:git_checkout_head, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, Ref(options))
+                 repo, Ref(options))
 end
 
 """
@@ -412,7 +423,7 @@ function cherrypick(repo::GitRepo, commit::GitCommit; options::CherrypickOptions
     @assert repo.ptr != C_NULL
     @check ccall((:git_cherrypick, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CherrypickOptions}),
-                 repo.ptr, commit.ptr, Ref(options))
+                 repo, commit, Ref(options))
 end
 
 """Updates some entries, determined by the `pathspecs`, in the index from the target commit tree."""
@@ -421,8 +432,8 @@ function reset!(repo::GitRepo, obj::Union{GitObject, Nothing}, pathspecs::Abstra
     @assert repo.ptr != C_NULL
     @check ccall((:git_reset_default, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{StrArrayStruct}),
-                 repo.ptr,
-                 obj === nothing ? C_NULL : obj.ptr,
+                 repo,
+                 obj === nothing ? C_NULL : obj,
                  collect(pathspecs))
     return head_oid(repo)
 end
@@ -434,7 +445,7 @@ function reset!(repo::GitRepo, obj::GitObject, mode::Cint;
     @assert repo.ptr != C_NULL
     @check ccall((:git_reset, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Cint, Ptr{CheckoutOptions}),
-                  repo.ptr, obj.ptr, mode, Ref(checkout_opts))
+                  repo, obj, mode, Ref(checkout_opts))
     return head_oid(repo)
 end
 
@@ -492,7 +503,7 @@ function fetchheads(repo::GitRepo)
     @assert repo.ptr != C_NULL
     @check ccall((:git_repository_fetchhead_foreach, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Any),
-                 repo.ptr, ffcb, fh)
+                 repo, ffcb, fh)
     return fh
 end
 
@@ -506,7 +517,7 @@ function remotes(repo::GitRepo)
     sa_ref = Ref(StrArrayStruct())
     @assert repo.ptr != C_NULL
     @check ccall((:git_remote_list, libgit2), Cint,
-                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
+                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     return res
diff --git a/stdlib/LibGit2/src/signature.jl b/stdlib/LibGit2/src/signature.jl
index 85e62cd8c2b7e..17013121db9ad 100644
--- a/stdlib/LibGit2/src/signature.jl
+++ b/stdlib/LibGit2/src/signature.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function Signature(ptr::Ptr{SignatureStruct})
+    @assert ptr != C_NULL
     sig   = unsafe_load(ptr)::SignatureStruct
     name  = unsafe_string(sig.name)
     email = unsafe_string(sig.email)
@@ -67,6 +68,6 @@ function default_signature(repo::GitRepo)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
     @check ccall((:git_signature_default, libgit2), Cint,
-                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo.ptr)
+                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo)
     return GitSignature(sig_ptr_ptr[])
 end
diff --git a/stdlib/LibGit2/src/status.jl b/stdlib/LibGit2/src/status.jl
index c1cb2fb1c5a9c..c048e68c2b2bc 100644
--- a/stdlib/LibGit2/src/status.jl
+++ b/stdlib/LibGit2/src/status.jl
@@ -14,14 +14,14 @@ function GitStatus(repo::GitRepo; status_opts=StatusOptions())
     stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_status_list_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{StatusOptions}),
-                  stat_ptr_ptr, repo.ptr, Ref(status_opts))
+                  stat_ptr_ptr, repo, Ref(status_opts))
     return GitStatus(repo, stat_ptr_ptr[])
 end
 
 function Base.length(status::GitStatus)
     ensure_initialized()
     return Int(ccall((:git_status_list_entrycount, libgit2), Csize_t,
-                      (Ptr{Ptr{Cvoid}},), status.ptr))
+                         (Ptr{Cvoid},), status))
 end
 
 function Base.getindex(status::GitStatus, i::Integer)
@@ -51,7 +51,7 @@ function status(repo::GitRepo, path::String)
     status_ptr = Ref{Cuint}(0)
     ret =  ccall((:git_status_file, libgit2), Cint,
                   (Ref{Cuint}, Ptr{Cvoid}, Cstring),
-                  status_ptr, repo.ptr, path)
+                  status_ptr, repo, path)
     (ret == Cint(Error.ENOTFOUND) || ret == Cint(Error.EAMBIGUOUS)) && return nothing
     return status_ptr[]
 end
diff --git a/stdlib/LibGit2/src/tag.jl b/stdlib/LibGit2/src/tag.jl
index 0e3d2b398a835..bbb0c97a484ec 100644
--- a/stdlib/LibGit2/src/tag.jl
+++ b/stdlib/LibGit2/src/tag.jl
@@ -9,7 +9,7 @@ function tag_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_tag_list, libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -23,7 +23,7 @@ Remove the git tag `tag` from the repository `repo`.
 function tag_delete(repo::GitRepo, tag::AbstractString)
     ensure_initialized()
     @check ccall((:git_tag_delete, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, tag)
+                  (Ptr{Cvoid}, Cstring), repo, tag)
 end
 
 """
@@ -48,7 +48,7 @@ function tag_create(repo::GitRepo, tag::AbstractString, commit::Union{AbstractSt
             ensure_initialized()
             @check ccall((:git_tag_create, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Ptr{SignatureStruct}, Cstring, Cint),
-                  oid_ptr, repo.ptr, tag, commit_obj.ptr, git_sig.ptr, msg, Cint(force))
+                  oid_ptr, repo, tag, commit_obj, git_sig, msg, Cint(force))
         end
     end
     return oid_ptr[]
diff --git a/stdlib/LibGit2/src/tree.jl b/stdlib/LibGit2/src/tree.jl
index 1aeeec96ea778..4c507aaba8e48 100644
--- a/stdlib/LibGit2/src/tree.jl
+++ b/stdlib/LibGit2/src/tree.jl
@@ -37,7 +37,7 @@ function treewalk(f, tree::GitTree, post::Bool = false)
         end, Cint, (Cstring, Ptr{Cvoid}, Ref{Vector{Any}}))
     err = ccall((:git_tree_walk, libgit2), Cint,
                 (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Any),
-                tree.ptr, post, cbf, payload)
+                tree, post, cbf, payload)
     if err < 0
         err_class, _ = Error.last_error()
         if err_class != Error.Callback
@@ -58,8 +58,10 @@ Return the filename of the object on disk to which `te` refers.
 """
 function filename(te::GitTreeEntry)
     ensure_initialized()
-    str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
-    str != C_NULL && return unsafe_string(str)
+    GC.@preserve te begin
+        str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
+        str != C_NULL && return unsafe_string(str)
+    end
     return nothing
 end
 
@@ -70,7 +72,7 @@ Return the UNIX filemode of the object on disk to which `te` refers as an intege
 """
 function filemode(te::GitTreeEntry)
     ensure_initialized()
-    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te)
 end
 
 """
@@ -81,7 +83,7 @@ one of the types which [`objtype`](@ref) returns, e.g. a `GitTree` or `GitBlob`.
 """
 function entrytype(te::GitTreeEntry)
     ensure_initialized()
-    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te)
     return objtype(Consts.OBJECT(otype))
 end
 
@@ -101,7 +103,7 @@ end
 
 function count(tree::GitTree)
     ensure_initialized()
-    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree.ptr)
+    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree)
 end
 
 function Base.getindex(tree::GitTree, i::Integer)
@@ -111,7 +113,7 @@ function Base.getindex(tree::GitTree, i::Integer)
     ensure_initialized()
     te_ptr = ccall((:git_tree_entry_byindex, libgit2),
                    Ptr{Cvoid},
-                   (Ptr{Cvoid}, Csize_t), tree.ptr, i-1)
+                   (Ptr{Cvoid}, Csize_t), tree, i-1)
     return GitTreeEntry(tree, te_ptr, false)
 end
 
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index 96cea96d013e5..7a4ad37a68ca5 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -678,6 +678,8 @@ The fields represent:
      for more information.
   * `custom_headers`: only relevant if the LibGit2 version is greater than or equal to `0.24.0`.
      Extra headers needed for the push operation.
+  * `remote_push_options`: only relevant if the LibGit2 version is greater than or equal to `1.8.0`.
+     "Push options" to deliver to the remote.
 """
 @kwdef struct PushOptions
     version::Cuint                     = Cuint(1)
@@ -692,6 +694,9 @@ The fields represent:
     @static if LibGit2.VERSION >= v"0.24.0"
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
+    @static if LibGit2.VERSION >= v"1.8.0"
+        remote_push_options::StrArrayStruct = StrArrayStruct()
+    end
 end
 @assert Base.allocatedinline(PushOptions)
 
@@ -913,10 +918,17 @@ Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_conf
 struct ConfigEntry
     name::Cstring
     value::Cstring
+    @static if LibGit2.VERSION >= v"1.8.0"
+        backend_type::Cstring
+        origin_path::Cstring
+    end
     include_depth::Cuint
     level::GIT_CONFIG
     free::Ptr{Cvoid}
-    payload::Ptr{Cvoid} # User is not permitted to read or write this field
+    @static if LibGit2.VERSION < v"1.8.0"
+        # In 1.8.0, the unused payload value has been removed
+        payload::Ptr{Cvoid}
+    end
 end
 @assert Base.allocatedinline(ConfigEntry)
 
@@ -1045,7 +1057,6 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
     else
         @eval mutable struct $typ <: $sup
             owner::$owntyp
@@ -1060,17 +1071,17 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
         if isa(owntyp, Expr) && owntyp.args[1] === :Union && owntyp.args[3] === :Nothing
             @eval begin
                 $typ(ptr::Ptr{Cvoid}, fin::Bool=true) = $typ(nothing, ptr, fin)
             end
         end
     end
+    @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, obj::$typ) = obj.ptr
     @eval function Base.close(obj::$typ)
         if obj.ptr != C_NULL
             ensure_initialized()
-            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj.ptr)
+            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj)
             obj.ptr = C_NULL
             if Threads.atomic_sub!(REFCOUNT, 1) == 1
                 # will the last finalizer please turn out the lights?
@@ -1104,10 +1115,11 @@ end
 function Base.close(obj::GitSignature)
     if obj.ptr != C_NULL
         ensure_initialized()
-        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj.ptr)
+        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj)
         obj.ptr = C_NULL
     end
 end
+Base.unsafe_convert(::Type{Ptr{SignatureStruct}}, obj::GitSignature) = obj.ptr
 
 # Structure has the same layout as SignatureStruct
 mutable struct Signature
diff --git a/stdlib/LibGit2/src/walker.jl b/stdlib/LibGit2/src/walker.jl
index e43687b014226..239009a014c1e 100644
--- a/stdlib/LibGit2/src/walker.jl
+++ b/stdlib/LibGit2/src/walker.jl
@@ -22,7 +22,7 @@ function GitRevWalker(repo::GitRepo)
     ensure_initialized()
     w_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_revwalk_new, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo)
     return GitRevWalker(repo, w_ptr[])
 end
 
@@ -30,7 +30,7 @@ function Base.iterate(w::GitRevWalker, state=nothing)
     ensure_initialized()
     id_ptr = Ref(GitHash())
     err = ccall((:git_revwalk_next, libgit2), Cint,
-                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w.ptr)
+                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w)
     if err == Cint(Error.GIT_OK)
         return (id_ptr[], nothing)
     elseif err == Cint(Error.ITEROVER)
@@ -51,7 +51,7 @@ during the walk.
 """
 function push_head!(w::GitRevWalker)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w.ptr)
+    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w)
     return w
 end
 
@@ -64,20 +64,20 @@ of that year as `cid` and then passing the resulting `w` to [`LibGit2.map`](@ref
 """
 function push!(w::GitRevWalker, cid::GitHash)
     ensure_initialized()
-    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w.ptr, Ref(cid))
+    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w, Ref(cid))
     return w
 end
 
 function push!(w::GitRevWalker, range::AbstractString)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w.ptr, range)
+    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w, range)
     return w
 end
 
 function Base.sort!(w::GitRevWalker; by::Cint = Consts.SORT_NONE, rev::Bool=false)
     ensure_initialized()
     rev && (by |= Consts.SORT_REVERSE)
-    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w.ptr, by)
+    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w, by)
     return w
 end
 
diff --git a/stdlib/LibGit2/test/libgit2-tests.jl b/stdlib/LibGit2/test/libgit2-tests.jl
index c5abca15ca719..72ca1019ff9e0 100644
--- a/stdlib/LibGit2/test/libgit2-tests.jl
+++ b/stdlib/LibGit2/test/libgit2-tests.jl
@@ -8,113 +8,11 @@ using Test
 using Random, Serialization, Sockets
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
-import .Main.FakePTYs: with_fake_pty
-
-const timeout = 60
-
-function challenge_prompt(code::Expr, challenges)
-    input_code = tempname()
-    open(input_code, "w") do fp
-        serialize(fp, code)
-    end
-    output_file = tempname()
-    torun = """
-        import LibGit2
-        using Serialization
-        result = open($(repr(input_code))) do fp
-            eval(deserialize(fp))
-        end
-        open($(repr(output_file)), "w") do fp
-            serialize(fp, result)
-        end"""
-    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
-    try
-        challenge_prompt(cmd, challenges)
-        return open(output_file, "r") do fp
-            deserialize(fp)
-        end
-    finally
-        isfile(output_file) && rm(output_file)
-        isfile(input_code) && rm(input_code)
-    end
-    return nothing
-end
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt as basic_challenge_prompt
 
-function challenge_prompt(cmd::Cmd, challenges)
-    function format_output(output)
-        str = read(seekstart(output), String)
-        isempty(str) && return ""
-        return "Process output found:\n\"\"\"\n$str\n\"\"\""
-    end
-    out = IOBuffer()
-    with_fake_pty() do pts, ptm
-        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
-        Base.close_stdio(pts)
-
-        # Kill the process if it takes too long. Typically occurs when process is waiting
-        # for input.
-        timer = Channel{Symbol}(1)
-        watcher = @async begin
-            waited = 0
-            while waited < timeout && process_running(p)
-                sleep(1)
-                waited += 1
-            end
-
-            if process_running(p)
-                kill(p)
-                put!(timer, :timeout)
-            elseif success(p)
-                put!(timer, :success)
-            else
-                put!(timer, :failure)
-            end
-
-            # SIGKILL stubborn processes
-            if process_running(p)
-                sleep(3)
-                process_running(p) && kill(p, Base.SIGKILL)
-            end
-            wait(p)
-        end
-
-        wroteall = false
-        try
-            for (challenge, response) in challenges
-                write(out, readuntil(ptm, challenge, keep=true))
-                if !isopen(ptm)
-                    error("Could not locate challenge: \"$challenge\". ",
-                          format_output(out))
-                end
-                write(ptm, response)
-            end
-            wroteall = true
-
-            # Capture output from process until `pts` is closed
-            write(out, ptm)
-        catch ex
-            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
-                # ignore EIO from `ptm` after `pts` dies
-                error("Process failed possibly waiting for a response. ",
-                      format_output(out))
-            end
-        end
-
-        status = fetch(timer)
-        close(ptm)
-        if status !== :success
-            if status === :timeout
-                error("Process timed out possibly waiting for a response. ",
-                      format_output(out))
-            else
-                error("Failed process. ", format_output(out), "\n", p)
-            end
-        end
-        wait(watcher)
-    end
-    nothing
-end
+challenge_prompt(code::Expr, challenges) = basic_challenge_prompt(code, challenges; pkgs=["LibGit2"])
+challenge_prompt(cmd::Cmd, challenges) = basic_challenge_prompt(cmd, challenges)
 
 const LIBGIT2_MIN_VER = v"1.0.0"
 const LIBGIT2_HELPER_PATH = joinpath(@__DIR__, "libgit2-helpers.jl")
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
index 1e13d0228488f..ceeb394f26231 100644
--- a/stdlib/LibGit2_jll/Project.toml
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibGit2_jll"
 uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
-version = "1.7.2+0"
+version = "1.8.0+0"
 
 [deps]
 MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
diff --git a/stdlib/LibGit2_jll/src/LibGit2_jll.jl b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
index ff625a6494a26..15d303dfea6ee 100644
--- a/stdlib/LibGit2_jll/src/LibGit2_jll.jl
+++ b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LibGit2_jll
 using Base, Libdl, MbedTLS_jll, LibSSH2_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
@@ -21,9 +20,9 @@ libgit2_path::String = ""
 if Sys.iswindows()
     const libgit2 = "libgit2.dll"
 elseif Sys.isapple()
-    const libgit2 = "@rpath/libgit2.1.7.dylib"
+    const libgit2 = "@rpath/libgit2.1.8.dylib"
 else
-    const libgit2 = "libgit2.so.1.7"
+    const libgit2 = "libgit2.so.1.8"
 end
 
 function __init__()
diff --git a/stdlib/LibGit2_jll/test/runtests.jl b/stdlib/LibGit2_jll/test/runtests.jl
index 6d83f6e447453..5bc74760b1603 100644
--- a/stdlib/LibGit2_jll/test/runtests.jl
+++ b/stdlib/LibGit2_jll/test/runtests.jl
@@ -7,5 +7,5 @@ using Test, Libdl, LibGit2_jll
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
     @test ccall((:git_libgit2_version, libgit2), Cint, (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch) == 0
-    @test VersionNumber(major[], minor[], patch[]) == v"1.7.2"
+    @test VersionNumber(major[], minor[], patch[]) == v"1.8.0"
 end
diff --git a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
index a809f7a912d6b..351cbe0e3729b 100644
--- a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
+++ b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LibSSH2_jll
 using Base, Libdl, MbedTLS_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index 8089516c1df14..fb03c6b996048 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+15"
+version = "2.0.1+18"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUV_jll/src/LibUV_jll.jl b/stdlib/LibUV_jll/src/LibUV_jll.jl
index 767f055eb019f..febc47f168ab9 100644
--- a/stdlib/LibUV_jll/src/LibUV_jll.jl
+++ b/stdlib/LibUV_jll/src/LibUV_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LibUV_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 # NOTE: This file is currently empty, as we link libuv statically for now.
 
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index 0e8684a212944..03ccfcd1449d8 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUnwind_jll"
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.7.2+2"
+version = "1.8.1+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
index 12abeaf598151..f97b18443b6fd 100644
--- a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
+++ b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LibUnwind_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index da756f56e12b8..967ef8237f03d 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -609,7 +609,7 @@ LinearAlgebra.copy_transpose!
 ## Low-level matrix operations
 
 In many cases there are in-place versions of matrix operations that allow you to supply
-a pre-allocated output vector or matrix.  This is useful when optimizing critical code in order
+a pre-allocated output vector or matrix. This is useful when optimizing critical code in order
 to avoid the overhead of repeated allocations. These in-place operations are suffixed with `!`
 below (e.g. `mul!`) according to the usual Julia convention.
 
@@ -630,7 +630,7 @@ implementations of BLAS available for every computer architecture, and sometimes
 linear algebra routines it is useful to call the BLAS functions directly.
 
 `LinearAlgebra.BLAS` provides wrappers for some of the BLAS functions. Those BLAS functions
-that overwrite one of the input arrays have names ending in `'!'`.  Usually, a BLAS function has
+that overwrite one of the input arrays have names ending in `'!'`. Usually, a BLAS function has
 four methods defined, for [`Float32`](@ref), [`Float64`](@ref), [`ComplexF32`](@ref Complex),
 and [`ComplexF64`](@ref Complex) arrays.
 
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index 9b9aaf0d9581d..27d4255fb656b 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -18,7 +18,7 @@ import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, as
     typed_hcat, vec, view, zero
 using Base: IndexLinear, promote_eltype, promote_op, print_matrix,
     @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
-    splat
+    splat, BitInteger
 using Base.Broadcast: Broadcasted, broadcasted
 using Base.PermutedDimsArrays: CommutativeOps
 using OpenBLAS_jll
@@ -190,6 +190,7 @@ end
 abstract type Algorithm end
 struct DivideAndConquer <: Algorithm end
 struct QRIteration <: Algorithm end
+struct RobustRepresentations <: Algorithm end
 
 # Pivoting strategies for matrix factorization algorithms.
 abstract type PivotingStrategy end
@@ -197,9 +198,11 @@ abstract type PivotingStrategy end
 """
     NoPivot
 
-Pivoting is not performed. Matrix factorizations such as the LU factorization
-may fail without pivoting, and may also be numerically unstable for floating-point matrices in the face of roundoff error.
-This pivot strategy is mainly useful for pedagogical purposes.
+Pivoting is not performed. This is the default strategy for [`cholesky`](@ref) and
+[`qr`](@ref) factorizations. Note, however, that other matrix factorizations such as the LU
+factorization may fail without pivoting, and may also be numerically unstable for
+floating-point matrices in the face of roundoff error. In such cases, this pivot strategy
+is mainly useful for pedagogical purposes.
 """
 struct NoPivot <: PivotingStrategy end
 
@@ -208,10 +211,11 @@ struct NoPivot <: PivotingStrategy end
 
 First non-zero element in the remaining rows is chosen as the pivot element.
 
-Beware that for floating-point matrices, the resulting LU algorithm is numerically unstable — this strategy
-is mainly useful for comparison to hand calculations (which typically use this strategy) or for other
-algebraic types (e.g. rational numbers) not susceptible to roundoff errors.   Otherwise, the default
-`RowMaximum` pivoting strategy should be generally preferred in Gaussian elimination.
+Beware that for floating-point matrices, the resulting LU algorithm is numerically unstable
+— this strategy is mainly useful for comparison to hand calculations (which typically use
+this strategy) or for other algebraic types (e.g. rational numbers) not susceptible to
+roundoff errors. Otherwise, the default `RowMaximum` pivoting strategy should be generally
+preferred in Gaussian elimination.
 
 Note that the [element type](@ref eltype) of the matrix must admit an [`iszero`](@ref)
 method.
@@ -221,26 +225,36 @@ struct RowNonZero <: PivotingStrategy end
 """
     RowMaximum
 
-The maximum-magnitude element in the remaining rows is chosen as the pivot element.
-This is the default strategy for LU factorization of floating-point matrices, and is sometimes
-referred to as the "partial pivoting" algorithm.
+A row (and potentially also column) pivot is chosen based on a maximum property.
+This is the default strategy for LU factorization and for pivoted Cholesky factorization
+(though [`NoPivot`] is the default for [`cholesky`](@ref)).
 
-Note that the [element type](@ref eltype) of the matrix must admit an [`abs`](@ref) method,
-whose result type must admit a [`<`](@ref) method.
+In the LU case, the maximum-magnitude element within the current column in the remaining
+rows is chosen as the pivot element. This is sometimes referred to as the "partial
+pivoting" algorithm. In this case, the [element type](@ref eltype) of the matrix must admit
+an [`abs`](@ref) method, whose result type must admit a [`<`](@ref) method.
+
+In the Cholesky case, the maximal element among the remaining diagonal elements is
+chosen as the pivot element. This is sometimes referred to as the "diagonal pivoting"
+algorithm, and leads to _complete pivoting_ (i.e., of both rows and columns by the same
+permutation). In this case, the (real part of the) [element type](@ref eltype) of the
+matrix must admit a [`<`](@ref) method.
 """
 struct RowMaximum <: PivotingStrategy end
 
 """
     ColumnNorm
 
-The column with the maximum norm is used for subsequent computation.  This
-is used for pivoted QR factorization.
+The column with the maximum norm is used for subsequent computation. This is used for
+pivoted QR factorization.
 
 Note that the [element type](@ref eltype) of the matrix must admit [`norm`](@ref) and
 [`abs`](@ref) methods, whose respective result types must admit a [`<`](@ref) method.
 """
 struct ColumnNorm <: PivotingStrategy end
 
+using Base: DimOrInd
+
 # Check that stride of matrix/vector is 1
 # Writing like this to avoid splatting penalty when called with multiple arguments,
 # see PR 16416
@@ -279,6 +293,28 @@ stride1(x::DenseArray) = stride(x, 1)::Int
 @noinline _chkstride1(ok::Bool) = ok || error("matrix does not have contiguous columns")
 @inline _chkstride1(ok::Bool, A, B...) = _chkstride1(ok & (stride1(A) == 1), B...)
 
+# Subtypes of StridedArrays that satisfy certain properties on their strides
+# Similar to Base.RangeIndex, but only include range types where the step is statically known to be non-zero
+const IncreasingRangeIndex = Union{BitInteger, AbstractUnitRange{<:BitInteger}}
+const NonConstRangeIndex = Union{IncreasingRangeIndex, StepRange{<:BitInteger, <:BitInteger}}
+# StridedArray subtypes for which _fullstride2(::T) === true is known from the type
+DenseOrStridedReshapedReinterpreted{T,N} =
+    Union{DenseArray{T,N}, Base.StridedReshapedArray{T,N}, Base.StridedReinterpretArray{T,N}}
+# Similar to Base.StridedSubArray, except with a NonConstRangeIndex instead of a RangeIndex
+StridedSubArrayStandard{T,N,A<:DenseOrStridedReshapedReinterpreted,
+    I<:Tuple{Vararg{Union{NonConstRangeIndex, Base.ReshapedUnitRange, Base.AbstractCartesianIndex}}}} = Base.StridedSubArray{T,N,A,I}
+StridedArrayStdSubArray{T,N} = Union{DenseOrStridedReshapedReinterpreted{T,N},StridedSubArrayStandard{T,N}}
+# Similar to Base.StridedSubArray, except with a IncreasingRangeIndex instead of a RangeIndex
+StridedSubArrayIncr{T,N,A<:DenseOrStridedReshapedReinterpreted,
+    I<:Tuple{Vararg{Union{IncreasingRangeIndex, Base.ReshapedUnitRange, Base.AbstractCartesianIndex}}}} = Base.StridedSubArray{T,N,A,I}
+StridedArrayStdSubArrayIncr{T,N} = Union{DenseOrStridedReshapedReinterpreted{T,N},StridedSubArrayIncr{T,N}}
+# These subarrays have a stride of 1 along the first dimension
+StridedSubArrayAUR{T,N,A<:DenseOrStridedReshapedReinterpreted,
+    I<:Tuple{AbstractUnitRange{<:BitInteger}}} = Base.StridedSubArray{T,N,A,I}
+StridedArrayStride1{T,N} = Union{DenseOrStridedReshapedReinterpreted{T,N},StridedSubArrayIncr{T,N}}
+# StridedMatrixStride1 may typically be forwarded to LAPACK methods
+StridedMatrixStride1{T} = StridedArrayStride1{T,2}
+
 """
     LinearAlgebra.checksquare(A)
 
@@ -297,14 +333,14 @@ julia> LinearAlgebra.checksquare(A, B)
 """
 function checksquare(A)
     m,n = size(A)
-    m == n || throw(DimensionMismatch("matrix is not square: dimensions are $(size(A))"))
+    m == n || throw(DimensionMismatch(lazy"matrix is not square: dimensions are $(size(A))"))
     m
 end
 
 function checksquare(A...)
     sizes = Int[]
     for a in A
-        size(a,1)==size(a,2) || throw(DimensionMismatch("matrix is not square: dimensions are $(size(a))"))
+        size(a,1)==size(a,2) || throw(DimensionMismatch(lazy"matrix is not square: dimensions are $(size(a))"))
         push!(sizes, size(a,1))
     end
     return sizes
@@ -474,6 +510,33 @@ See also: `copymutable_oftype`.
 """
 copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
 
+"""
+    BandIndex(band, index)
+
+Represent a Cartesian index as a linear index along a band.
+This type is primarily meant to index into a specific band without branches,
+so, for best performance, `band` should be a compile-time constant.
+"""
+struct BandIndex
+    band :: Int
+    index :: Int
+end
+function _cartinds(b::BandIndex)
+    (; band, index) = b
+    bandg0 = max(band,0)
+    row = index - band + bandg0
+    col = index + bandg0
+    CartesianIndex(row, col)
+end
+function Base.to_indices(A, inds, t::Tuple{BandIndex, Vararg{Any}})
+    to_indices(A, inds, (_cartinds(first(t)), Base.tail(t)...))
+end
+function Base.checkbounds(::Type{Bool}, A::AbstractMatrix, b::BandIndex)
+    checkbounds(Bool, A, _cartinds(b))
+end
+function Base.checkbounds(A::Broadcasted, b::BandIndex)
+    checkbounds(A, _cartinds(b))
+end
 
 include("adjtrans.jl")
 include("transpose.jl")
@@ -516,32 +579,60 @@ const ⋅ = dot
 const × = cross
 export ⋅, ×
 
+# Separate the char corresponding to the wrapper from that corresponding to the uplo
+# In most cases, the former may be constant-propagated, while the latter usually can't be.
+# This improves type-inference in wrap for Symmetric/Hermitian matrices
+# A WrapperChar is equivalent to `isuppertri ? uppercase(wrapperchar) : lowercase(wrapperchar)`
+struct WrapperChar <: AbstractChar
+    wrapperchar :: Char
+    isuppertri :: Bool
+end
+function Base.Char(w::WrapperChar)
+    T = w.wrapperchar
+    if T ∈ ('N', 'T', 'C') # known cases where isuppertri is true
+        T
+    else
+        _isuppertri(w) ? uppercase(T) : lowercase(T)
+    end
+end
+Base.codepoint(w::WrapperChar) = codepoint(Char(w))
+WrapperChar(n::UInt32) = WrapperChar(Char(n))
+WrapperChar(c::Char) = WrapperChar(c, isuppercase(c))
+# We extract the wrapperchar so that the result may be constant-propagated
+# This doesn't return a value of the same type on purpose
+Base.uppercase(w::WrapperChar) = uppercase(w.wrapperchar)
+Base.lowercase(w::WrapperChar) = lowercase(w.wrapperchar)
+_isuppertri(w::WrapperChar) = w.isuppertri
+_isuppertri(x::AbstractChar) = isuppercase(x) # compatibility with earlier Char-based implementation
+_uplosym(x) = _isuppertri(x) ? (:U) : (:L)
+
 wrapper_char(::AbstractArray) = 'N'
 wrapper_char(::Adjoint) = 'C'
 wrapper_char(::Adjoint{<:Real}) = 'T'
 wrapper_char(::Transpose) = 'T'
-wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h'
-wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's'
-wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's'
+wrapper_char(A::Hermitian) =  WrapperChar('H', A.uplo == 'U')
+wrapper_char(A::Hermitian{<:Real}) = WrapperChar('S', A.uplo == 'U')
+wrapper_char(A::Symmetric) = WrapperChar('S', A.uplo == 'U')
+
+wrapper_char_NTC(A::AbstractArray) = uppercase(wrapper_char(A)) == 'N'
+wrapper_char_NTC(A::Union{StridedArray, Adjoint, Transpose}) = true
+wrapper_char_NTC(A::Union{Symmetric, Hermitian}) = false
 
 Base.@constprop :aggressive function wrap(A::AbstractVecOrMat, tA::AbstractChar)
     # merge the result of this before return, so that we can type-assert the return such
     # that even if the tmerge is inaccurate, inference can still identify that the
     # `_generic_matmatmul` signature still matches and doesn't require missing backedges
-    B = if tA == 'N'
+    tA_uc = uppercase(tA)
+    B = if tA_uc == 'N'
         A
-    elseif tA == 'T'
+    elseif tA_uc == 'T'
         transpose(A)
-    elseif tA == 'C'
+    elseif tA_uc == 'C'
         adjoint(A)
-    elseif tA == 'H'
-        Hermitian(A, :U)
-    elseif tA == 'h'
-        Hermitian(A, :L)
-    elseif tA == 'S'
-        Symmetric(A, :U)
-    else # tA == 's'
-        Symmetric(A, :L)
+    elseif tA_uc == 'H'
+        Hermitian(A, _uplosym(tA))
+    elseif tA_uc == 'S'
+        Symmetric(A, _uplosym(tA))
     end
     return B::AbstractVecOrMat
 end
@@ -564,10 +655,6 @@ _evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2]
 _zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
 _zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
 
-# convert to Vector, if necessary
-_makevector(x::Vector) = x
-_makevector(x::AbstractVector) = Vector(x)
-
 # append a zero element / drop the last element
 _pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B)
 _droplast!(A) = deleteat!(A, lastindex(A))
@@ -576,31 +663,21 @@ _droplast!(A) = deleteat!(A, lastindex(A))
 matprod_dest(A::StructuredMatrix, B::StructuredMatrix, TS) = similar(B, TS, size(B))
 matprod_dest(A, B::StructuredMatrix, TS) = similar(A, TS, size(A))
 matprod_dest(A::StructuredMatrix, B, TS) = similar(B, TS, size(B))
-matprod_dest(A::StructuredMatrix, B::Diagonal, TS) = similar(A, TS)
-matprod_dest(A::Diagonal, B::StructuredMatrix, TS) = similar(B, TS)
-matprod_dest(A::Diagonal, B::Diagonal, TS) = similar(B, TS)
-matprod_dest(A::HermOrSym, B::Diagonal, TS) = similar(A, TS, size(A))
-matprod_dest(A::Diagonal, B::HermOrSym, TS) = similar(B, TS, size(B))
-
-# TODO: remove once not used anymore in SparseArrays.jl
-# some trait like this would be cool
-# onedefined(::Type{T}) where {T} = hasmethod(one, (T,))
-# but we are actually asking for oneunit(T), that is, however, defined for generic T as
-# `T(one(T))`, so the question is equivalent for whether one(T) is defined
-onedefined(::Type) = false
-onedefined(::Type{<:Number}) = true
-
-# initialize return array for op(A, B)
-_init_eltype(::typeof(*), ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    (onedefined(TA) && onedefined(TB)) ?
-        typeof(matprod(oneunit(TA), oneunit(TB))) :
-        promote_op(matprod, TA, TB)
-_init_eltype(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    (onedefined(TA) && onedefined(TB)) ?
-        typeof(op(oneunit(TA), oneunit(TB))) :
-        promote_op(op, TA, TB)
-_initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
-    similar(C, _init_eltype(op, TA, TB), size(C))
+# diagonal is special, as it does not change the structure of the other matrix
+# we call similar without a size to preserve the type of the matrix wherever possible
+# reroute through _matprod_dest_diag to allow speicalizing on the type of the StructuredMatrix
+# without defining methods for both the orderings
+matprod_dest(A::StructuredMatrix, B::Diagonal, TS) = _matprod_dest_diag(A, TS)
+matprod_dest(A::Diagonal, B::StructuredMatrix, TS) = _matprod_dest_diag(B, TS)
+matprod_dest(A::Diagonal, B::Diagonal, TS) = _matprod_dest_diag(B, TS)
+_matprod_dest_diag(A, TS) = similar(A, TS)
+function _matprod_dest_diag(A::SymTridiagonal, TS)
+    n = size(A, 1)
+    Tridiagonal(similar(A, TS, n-1), similar(A, TS, n), similar(A, TS, n-1))
+end
+
+# Special handling for adj/trans vec
+matprod_dest(A::Diagonal, B::AdjOrTransAbsVec, TS) = similar(B, TS)
 
 # General fallback definition for handling under- and overdetermined system as well as square problems
 # While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
@@ -691,7 +768,7 @@ function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3
     end
 
     if parallel
-        let Distributed = Base.require_stdlib(Base.PkgId(
+        let Distributed = Base.require(Base.PkgId(
                 Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
             nworkers = @invokelatest Distributed.nworkers()
             results = @invokelatest Distributed.pmap(peakflops, fill(n, nworkers))
@@ -766,9 +843,9 @@ function __init__()
     # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
     if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
         @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS))
+            BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint)))
         else
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2))
+            BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint) ÷ 2))
         end
     end
 end
diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl
index b0d53320f4aa3..101fb2eb75735 100644
--- a/stdlib/LinearAlgebra/src/abstractq.jl
+++ b/stdlib/LinearAlgebra/src/abstractq.jl
@@ -18,6 +18,10 @@ transpose(Q::AbstractQ{<:Real}) = AdjointQ(Q)
 transpose(Q::AbstractQ) = error("transpose not implemented for $(typeof(Q)). Consider using adjoint instead of transpose.")
 adjoint(adjQ::AdjointQ) = adjQ.Q
 
+(^)(Q::AbstractQ, p::Integer) = p < 0 ? power_by_squaring(inv(Q), -p) : power_by_squaring(Q, p)
+@inline Base.literal_pow(::typeof(^), Q::AbstractQ, ::Val{1}) = Q
+@inline Base.literal_pow(::typeof(^), Q::AbstractQ, ::Val{-1}) = inv(Q)
+
 # promotion with AbstractMatrix, at least for equal eltypes
 promote_rule(::Type{<:AbstractMatrix{T}}, ::Type{<:AbstractQ{T}}) where {T} =
     (@inline; Union{AbstractMatrix{T},AbstractQ{T}})
@@ -74,6 +78,14 @@ axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1)
 copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
 copy(Q::AbstractQ) = copymutable(Q)
 
+# legacy compatibility
+similar(Q::AbstractQ) = similar(Q, eltype(Q), size(Q))
+similar(Q::AbstractQ, ::Type{T}) where {T} = similar(Q, T, size(Q))
+similar(Q::AbstractQ, size::DimOrInd...) = similar(Q, eltype(Q), size...)
+similar(Q::AbstractQ, ::Type{T}, size::DimOrInd...) where {T} = similar(Q, T, Base.to_shape(size))
+similar(Q::AbstractQ, size::Tuple{Vararg{DimOrInd}}) = similar(Q, eltype(Q), Base.to_shape(size))
+similar(Q::AbstractQ, ::Type{T}, size::NTuple{N,Integer}) where {T,N} = Array{T,N}(undef, size)
+
 # getindex
 @inline function getindex(Q::AbstractQ, inds...)
     @boundscheck Base.checkbounds_indices(Bool, axes(Q), inds) || Base.throw_boundserror(Q, inds)
@@ -149,13 +161,13 @@ end
 # generically, treat AbstractQ like a matrix with its definite size
 qsize_check(Q::AbstractQ, B::AbstractVecOrMat) =
     size(Q, 2) == size(B, 1) ||
-        throw(DimensionMismatch("second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))"))
+        throw(DimensionMismatch(lazy"second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))"))
 qsize_check(A::AbstractVecOrMat, Q::AbstractQ) =
     size(A, 2) == size(Q, 1) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))"))
+        throw(DimensionMismatch(lazy"second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))"))
 qsize_check(Q::AbstractQ, P::AbstractQ) =
     size(Q, 2) == size(P, 1) ||
-        throw(DimensionMismatch("second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))"))
+        throw(DimensionMismatch(lazy"second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))"))
 
 # mimic the AbstractArray fallback
 *(Q::AbstractQ{<:Number}) = Q
@@ -231,6 +243,7 @@ end
 ### division
 \(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A
 /(A::AbstractVecOrMat, Q::AbstractQ) = A*Q'
+/(Q::AbstractQ, A::AbstractVecOrMat) = Matrix(Q) / A
 ldiv!(Q::AbstractQ, A::AbstractVecOrMat) = lmul!(Q', A)
 ldiv!(C::AbstractVecOrMat, Q::AbstractQ, A::AbstractVecOrMat) = mul!(C, Q', A)
 rdiv!(A::AbstractVecOrMat, Q::AbstractQ) = rmul!(A, Q')
@@ -317,7 +330,7 @@ function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
     mA, nA = size(A.factors)
     mB, nB = size(B,1), size(B,2)
     if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
     end
     Afactors = A.factors
     @inbounds begin
@@ -353,7 +366,7 @@ function lmul!(adjA::AdjointQ{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
     mA, nA = size(A.factors)
     mB, nB = size(B,1), size(B,2)
     if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
     end
     Afactors = A.factors
     @inbounds begin
@@ -384,7 +397,7 @@ function rmul!(A::AbstractVecOrMat, Q::QRPackedQ)
     mQ, nQ = size(Q.factors)
     mA, nA = size(A,1), size(A,2)
     if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
+        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
     end
     Qfactors = Q.factors
     @inbounds begin
@@ -420,7 +433,7 @@ function rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:QRPackedQ})
     mQ, nQ = size(Q.factors)
     mA, nA = size(A,1), size(A,2)
     if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
+        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
     end
     Qfactors = Q.factors
     @inbounds begin
@@ -518,13 +531,34 @@ rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q
 lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
 rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
 
+# division by a matrix
+function /(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat)
+    size(B, 2) in size(Q.factors) ||
+            throw(DimensionMismatch(lazy"second dimension of B, $(size(B,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+    if size(B, 2) == size(Q.factors, 2)
+        return Matrix(Q) / B
+    else
+        return collect(Q) / B
+    end
+end
+function \(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}})
+    Q = adjQ.Q
+    size(A, 1) in size(Q.factors) ||
+            throw(DimensionMismatch(lazy"first dimension of A, $(size(A,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+    if size(A, 1) == size(Q.factors, 2)
+        return A \ Matrix(Q)'
+    else
+        return A \ collect(Q)'
+    end
+end
+
 # flexible left-multiplication (and adjoint right-multiplication)
 qsize_check(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat) =
     size(B, 1) in size(Q.factors) ||
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+        throw(DimensionMismatch(lazy"first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
 qsize_check(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) =
     (Q = adjQ.Q; size(A, 2) in size(Q.factors) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")))
+        throw(DimensionMismatch(lazy"second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")))
 
 det(Q::HessenbergQ) = _det_tau(Q.τ)
 
@@ -560,10 +594,10 @@ size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n)
 
 qsize_check(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat) =
     size(B, 1) in size(adjQ.Q.factors) ||
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))"))
+        throw(DimensionMismatch(lazy"first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))"))
 qsize_check(A::AbstractVecOrMat, Q::LQPackedQ) =
     size(A, 2) in size(Q.factors) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+        throw(DimensionMismatch(lazy"second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
 
 # in-place right-application of LQPackedQs
 # these methods require that the applied-to matrix's (A's) number of columns
@@ -584,6 +618,27 @@ lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:Bla
 lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
     (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
 
+# division by a matrix
+function /(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat)
+    Q = adjQ.Q
+    size(B, 2) in size(Q.factors) ||
+            throw(DimensionMismatch(lazy"second dimension of B, $(size(B,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+    if size(B, 2) == size(Q.factors, 1)
+        return Matrix(Q)' / B
+    else
+        return collect(Q)' / B
+    end
+end
+function \(A::AbstractVecOrMat, Q::LQPackedQ)
+    size(A, 1) in size(Q.factors) ||
+            throw(DimensionMismatch(lazy"first dimension of A, $(size(A,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+    if size(A, 1) == size(Q.factors, 1)
+        return A \ Matrix(Q)
+    else
+        return A \ collect(Q)
+    end
+end
+
 # In LQ factorization, `Q` is expressed as the product of the adjoint of the
 # reflectors.  Thus, `det` has to be conjugated.
 det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index 24ad7960f00b4..b722e49bb2c3d 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -302,6 +302,16 @@ function Base.showarg(io::IO, v::Transpose, toplevel)
     toplevel && print(io, " with eltype ", eltype(v))
     return nothing
 end
+function Base.show(io::IO, v::Adjoint{<:Real, <:AbstractVector})
+    print(io, "adjoint(")
+    show(io, parent(v))
+    print(io, ")")
+end
+function Base.show(io::IO, v::Transpose{<:Number, <:AbstractVector})
+    print(io, "transpose(")
+    show(io, parent(v))
+    print(io, ")")
+end
 
 # some aliases for internal convenience use
 const AdjOrTrans{T,S} = Union{Adjoint{T,S},Transpose{T,S}} where {T,S}
@@ -327,7 +337,6 @@ size(A::AdjOrTransAbsMat) = reverse(size(A.parent))
 axes(v::AdjOrTransAbsVec) = (axes(v.parent,2), axes(v.parent)...)
 axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent))
 IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear()
-IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
 @propagate_inbounds Base.isassigned(v::AdjOrTransAbsVec, i::Int) = isassigned(v.parent, i-1+first(axes(v.parent)[1]))
 @propagate_inbounds Base.isassigned(v::AdjOrTransAbsMat, i::Int, j::Int) = isassigned(v.parent, j, i)
 @propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T
@@ -465,7 +474,7 @@ tr(A::Transpose) = transpose(tr(parent(A)))
 function _dot_nonrecursive(u, v)
     lu = length(u)
     if lu != length(v)
-        throw(DimensionMismatch("first array has length $(lu) which does not match the length of the second, $(length(v))."))
+        throw(DimensionMismatch(lazy"first array has length $(lu) which does not match the length of the second, $(length(v))."))
     end
     if lu == 0
         zero(eltype(u)) * zero(eltype(v))
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 00c972f7f990b..d86bad7e41435 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -8,7 +8,7 @@ struct Bidiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
     function Bidiagonal{T,V}(dv, ev, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
         require_one_based_indexing(dv, ev)
         if length(ev) != max(length(dv)-1, 0)
-            throw(DimensionMismatch("length of diagonal vector is $(length(dv)), length of off-diagonal vector is $(length(ev))"))
+            throw(DimensionMismatch(lazy"length of diagonal vector is $(length(dv)), length of off-diagonal vector is $(length(ev))"))
         end
         (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,V}(dv, ev, uplo)
@@ -130,14 +130,14 @@ function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
     end
 end
 
+_offdiagind(uplo) = uplo == 'U' ? 1 : -1
+
 @inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int)
     @boundscheck checkbounds(Bool, A, i, j) || return false
     if i == j
         return @inbounds isassigned(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds isassigned(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds isassigned(A.ev, j)
+    elseif i == j - _offdiagind(A.uplo)
+        return @inbounds isassigned(A.ev, A.uplo == 'U' ? i : j)
     else
         return true
     end
@@ -147,67 +147,66 @@ end
     @boundscheck checkbounds(A, i, j)
     if i == j
         return @inbounds Base.isstored(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds Base.isstored(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds Base.isstored(A.ev, j)
+    elseif i == j - _offdiagind(A.uplo)
+        return @inbounds Base.isstored(A.ev, A.uplo == 'U' ? i : j)
     else
         return false
     end
 end
 
-@inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
+@inline function getindex(A::Bidiagonal{T}, i::Int, j::Int) where T
     @boundscheck checkbounds(A, i, j)
     if i == j
         return @inbounds A.dv[i]
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds A.ev[i]
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds A.ev[j]
+    elseif i == j - _offdiagind(A.uplo)
+        return @inbounds A.ev[A.uplo == 'U' ? i : j]
     else
         return bidiagzero(A, i, j)
     end
 end
 
+@inline function getindex(A::Bidiagonal{T}, b::BandIndex) where T
+    @boundscheck checkbounds(A, _cartinds(b))
+    if b.band == 0
+        return @inbounds A.dv[b.index]
+    elseif b.band == _offdiagind(A.uplo)
+        return @inbounds A.ev[b.index]
+    else
+        return bidiagzero(A, Tuple(_cartinds(b))...)
+    end
+end
+
 @inline function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
-    elseif A.uplo == 'U' && (i == j - 1)
-        @inbounds A.ev[i] = x
-    elseif A.uplo == 'L' && (i == j + 1)
-        @inbounds A.ev[j] = x
+    elseif i == j - _offdiagind(A.uplo)
+        @inbounds A.ev[A.uplo == 'U' ? i : j] = x
     elseif !iszero(x)
-        throw(ArgumentError(string("cannot set entry ($i, $j) off the ",
-            "$(istriu(A) ? "upper" : "lower") bidiagonal band to a nonzero value ($x)")))
+        throw(ArgumentError(LazyString(lazy"cannot set entry ($i, $j) off the ",
+            A.uplo == 'U' ? "upper" : "lower", " bidiagonal band to a nonzero value ", x)))
     end
-    return x
+    return A
 end
 
+Base._reverse(A::Bidiagonal, dims) = reverse!(Matrix(A); dims)
+Base._reverse(A::Bidiagonal, ::Colon) = Bidiagonal(reverse(A.dv), reverse(A.ev), A.uplo == 'U' ? :L : :U)
+
 ## structured matrix methods ##
 function Base.replace_in_print_matrix(A::Bidiagonal,i::Integer,j::Integer,s::AbstractString)
-    if A.uplo == 'U'
-        i==j || i==j-1 ? s : Base.replace_with_centered_mark(s)
-    else
-        i==j || i==j+1 ? s : Base.replace_with_centered_mark(s)
-    end
+    i==j || i==j-_offdiagind(A.uplo) ? s : Base.replace_with_centered_mark(s)
 end
 
 #Converting from Bidiagonal to dense Matrix
 function Matrix{T}(A::Bidiagonal) where T
-    n = size(A, 1)
-    B = Matrix{T}(undef, n, n)
-    n == 0 && return B
-    n > 1 && fill!(B, zero(T))
-    @inbounds for i = 1:n - 1
-        B[i,i] = A.dv[i]
-        if A.uplo == 'U'
-            B[i,i+1] = A.ev[i]
-        else
-            B[i+1,i] = A.ev[i]
-        end
-    end
-    B[n,n] = A.dv[n]
+    B = Matrix{T}(undef, size(A))
+    if haszero(T) # optimized path for types with zero(T) defined
+        size(B,1) > 1 && fill!(B, zero(T))
+        copyto!(view(B, diagind(B)), A.dv)
+        copyto!(view(B, diagind(B, _offdiagind(A.uplo))), A.ev)
+    else
+        copyto!(B, A)
+    end
     return B
 end
 Matrix(A::Bidiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(A)
@@ -220,7 +219,8 @@ promote_rule(::Type{<:Matrix}, ::Type{<:Bidiagonal}) = Matrix
 function Tridiagonal{T}(A::Bidiagonal) where T
     dv = convert(AbstractVector{T}, A.dv)
     ev = convert(AbstractVector{T}, A.ev)
-    z = fill!(similar(ev), zero(T))
+    # ensure that the types are identical, even if zero returns a different type
+    z = oftype(ev, zero(ev))
     A.uplo == 'U' ? Tridiagonal(z, dv, ev) : Tridiagonal(ev, dv, z)
 end
 promote_rule(::Type{<:Tridiagonal{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
@@ -240,8 +240,8 @@ tr(B::Bidiagonal) = sum(B.dv)
 
 function kron(A::Diagonal, B::Bidiagonal)
     # `_droplast!` is only guaranteed to work with `Vector`
-    kdv = _makevector(kron(diag(A), B.dv))
-    kev = _droplast!(_makevector(kron(diag(A), _pushzero(B.ev))))
+    kdv = convert(Vector, kron(diag(A), B.dv))
+    kev = _droplast!(convert(Vector, kron(diag(A), _pushzero(B.ev))))
     Bidiagonal(kdv, kev, B.uplo)
 end
 
@@ -264,12 +264,13 @@ end
 ####################
 
 function show(io::IO, M::Bidiagonal)
-    # TODO: make this readable and one-line
-    summary(io, M)
-    print(io, ":\n diag:")
-    print_matrix(io, (M.dv)')
-    print(io, M.uplo == 'U' ? "\n super:" : "\n sub:")
-    print_matrix(io, (M.ev)')
+    print(io, "Bidiagonal(")
+    show(io, M.dv)
+    print(io, ", ")
+    show(io, M.ev)
+    print(io, ", ")
+    show(io, sym_uplo(M.uplo))
+    print(io, ")")
 end
 
 size(M::Bidiagonal) = (n = length(M.dv); (n, n))
@@ -280,13 +281,13 @@ for func in (:conj, :copy, :real, :imag)
     @eval ($func)(M::Bidiagonal) = Bidiagonal(($func)(M.dv), ($func)(M.ev), M.uplo)
 end
 
-adjoint(B::Bidiagonal) = Adjoint(B)
-transpose(B::Bidiagonal) = Transpose(B)
-adjoint(B::Bidiagonal{<:Number}) = Bidiagonal(conj(B.dv), conj(B.ev), B.uplo == 'U' ? :L : :U)
+adjoint(B::Bidiagonal{<:Number}) = Bidiagonal(vec(adjoint(B.dv)), vec(adjoint(B.ev)), B.uplo == 'U' ? :L : :U)
+adjoint(B::Bidiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}}) =
+    Bidiagonal(adjoint(parent(B.dv)), adjoint(parent(B.ev)), B.uplo == 'U' ? :L : :U)
 transpose(B::Bidiagonal{<:Number}) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? :L : :U)
 permutedims(B::Bidiagonal) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? 'L' : 'U')
 function permutedims(B::Bidiagonal, perm)
-    Base.checkdims_perm(B, B, perm)
+    Base.checkdims_perm(axes(B), axes(B), perm)
     NTuple{2}(perm) == (2, 1) ? permutedims(B) : B
 end
 function Base.copy(aB::Adjoint{<:Any,<:Bidiagonal})
@@ -298,9 +299,29 @@ function Base.copy(tB::Transpose{<:Any,<:Bidiagonal})
     return Bidiagonal(map(x -> copy.(transpose.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
 end
 
+@noinline function throw_zeroband_error(A)
+    uplo = A.uplo
+    zeroband = uplo == 'U' ? "lower" : "upper"
+    throw(ArgumentError(LazyString("cannot set the ",
+        zeroband, " bidiagonal band to a nonzero value for uplo=:", uplo)))
+end
+
+# copyto! for matching axes
+function _copyto_banded!(A::Bidiagonal, B::Bidiagonal)
+    A.dv .= B.dv
+    if A.uplo == B.uplo
+        A.ev .= B.ev
+    elseif iszero(B.ev) # diagonal source
+        A.ev .= B.ev
+    else
+        throw_zeroband_error(A)
+    end
+    return A
+end
+
 iszero(M::Bidiagonal) = iszero(M.dv) && iszero(M.ev)
 isone(M::Bidiagonal) = all(isone, M.dv) && iszero(M.ev)
-function istriu(M::Bidiagonal, k::Integer=0)
+Base.@constprop :aggressive function istriu(M::Bidiagonal, k::Integer=0)
     if M.uplo == 'U'
         if k <= 0
             return true
@@ -319,7 +340,7 @@ function istriu(M::Bidiagonal, k::Integer=0)
         end
     end
 end
-function istril(M::Bidiagonal, k::Integer=0)
+Base.@constprop :aggressive function istril(M::Bidiagonal, k::Integer=0)
     if M.uplo == 'U'
         if k >= 1
             return true
@@ -339,12 +360,14 @@ function istril(M::Bidiagonal, k::Integer=0)
     end
 end
 isdiag(M::Bidiagonal) = iszero(M.ev)
+issymmetric(M::Bidiagonal) = isdiag(M) && all(issymmetric, M.dv)
+ishermitian(M::Bidiagonal) = isdiag(M) && all(ishermitian, M.dv)
 
 function tril!(M::Bidiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least ",
+            lazy"$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif M.uplo == 'U' && k < 0
         fill!(M.dv, zero(T))
         fill!(M.ev, zero(T))
@@ -362,8 +385,8 @@ end
 function triu!(M::Bidiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least",
+            lazy"$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif M.uplo == 'L' && k > 0
         fill!(M.dv, zero(T))
         fill!(M.ev, zero(T))
@@ -386,10 +409,14 @@ function diag(M::Bidiagonal{T}, n::Integer=0) where T
     elseif (n == 1 && M.uplo == 'U') ||  (n == -1 && M.uplo == 'L')
         return copyto!(similar(M.ev, length(M.ev)), M.ev)
     elseif -size(M,1) <= n <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-abs(n)), zero(T))
+        v = similar(M.dv, size(M,1)-abs(n))
+        for i in eachindex(v)
+            v[i] = M[BandIndex(n,i)]
+        end
+        return v
     else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
+        throw(ArgumentError(LazyString(lazy"requested diagonal, $n, must be at least $(-size(M, 1)) ",
+            lazy"and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
     end
 end
 
@@ -429,25 +456,101 @@ const BandedMatrix = Union{Bidiagonal,Diagonal,Tridiagonal,SymTridiagonal} # or
 const BiTriSym = Union{Bidiagonal,Tridiagonal,SymTridiagonal}
 const TriSym = Union{Tridiagonal,SymTridiagonal}
 const BiTri = Union{Bidiagonal,Tridiagonal}
-@inline _mul!(C::AbstractVector, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline _mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline _mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline _mul!(C::AbstractMatrix, A::AbstractMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline _mul!(C::AbstractMatrix, A::BandedMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-
-lmul!(A::Bidiagonal, B::AbstractVecOrMat) = @inline _mul!(B, A, B, MulAddMul())
-rmul!(B::AbstractMatrix, A::Bidiagonal) = @inline _mul!(B, B, A, MulAddMul())
-
-function check_A_mul_B!_sizes(C, A, B)
-    mA, nA = size(A)
-    mB, nB = size(B)
-    mC, nC = size(C)
+@inline _mul!(C::AbstractVector, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) =
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline _mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) =
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline _mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractMatrix, alpha::Number, beta::Number) =
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline _mul!(C::AbstractMatrix, A::AbstractMatrix, B::BandedMatrix, alpha::Number, beta::Number) =
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline _mul!(C::AbstractMatrix, A::BandedMatrix, B::BandedMatrix, alpha::Number, beta::Number) =
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
+
+# B .= A * B
+function lmul!(A::Bidiagonal, B::AbstractVecOrMat)
+    _muldiag_size_check(size(A), size(B))
+    (; dv, ev) = A
+    if A.uplo == 'U'
+        for k in axes(B,2)
+            for i in axes(ev,1)
+                B[i,k] = dv[i] * B[i,k] + ev[i] * B[i+1,k]
+            end
+            B[end,k] = dv[end] * B[end,k]
+        end
+    else
+        for k in axes(B,2)
+            for i in reverse(axes(dv,1)[2:end])
+                B[i,k] = dv[i] * B[i,k] + ev[i-1] * B[i-1,k]
+            end
+            B[1,k] = dv[1] * B[1,k]
+        end
+    end
+    return B
+end
+# B .= D * B
+function lmul!(D::Diagonal, B::Bidiagonal)
+    _muldiag_size_check(size(D), size(B))
+    (; dv, ev) = B
+    isL = B.uplo == 'L'
+    dv[1] = D.diag[1] * dv[1]
+    for i in axes(ev,1)
+        ev[i] = D.diag[i + isL] * ev[i]
+        dv[i+1] = D.diag[i+1] * dv[i+1]
+    end
+    return B
+end
+# B .= B * A
+function rmul!(B::AbstractMatrix, A::Bidiagonal)
+    _muldiag_size_check(size(A), size(B))
+    (; dv, ev) = A
+    if A.uplo == 'U'
+        for k in reverse(axes(dv,1)[2:end])
+            for i in axes(B,1)
+                B[i,k] = B[i,k] * dv[k] + B[i,k-1] * ev[k-1]
+            end
+        end
+        for i in axes(B,1)
+            B[i,1] *= dv[1]
+        end
+    else
+        for k in axes(ev,1)
+            for i in axes(B,1)
+                B[i,k] = B[i,k] * dv[k] + B[i,k+1] * ev[k]
+            end
+        end
+        for i in axes(B,1)
+            B[i,end] *= dv[end]
+        end
+    end
+    return B
+end
+# B .= B * D
+function rmul!(B::Bidiagonal, D::Diagonal)
+    _muldiag_size_check(size(B), size(D))
+    (; dv, ev) = B
+    isU = B.uplo == 'U'
+    dv[1] *= D.diag[1]
+    for i in axes(ev,1)
+        ev[i] *= D.diag[i + isU]
+        dv[i+1] *= D.diag[i+1]
+    end
+    return B
+end
+
+@noinline function check_A_mul_B!_sizes((mC, nC)::NTuple{2,Integer}, (mA, nA)::NTuple{2,Integer}, (mB, nB)::NTuple{2,Integer})
+    # check for matching sizes in one column of B and C
+    check_A_mul_B!_sizes((mC,), (mA, nA), (mB,))
+    # ensure that the number of columns in B and C match
+    if nB != nC
+        throw(DimensionMismatch(lazy"second dimension of output C, $nC, and second dimension of B, $nB, must match"))
+    end
+end
+@noinline function check_A_mul_B!_sizes((mC,)::Tuple{Integer}, (mA, nA)::NTuple{2,Integer}, (mB,)::Tuple{Integer})
     if mA != mC
-        throw(DimensionMismatch("first dimension of A, $mA, and first dimension of output C, $mC, must match"))
+        throw(DimensionMismatch(lazy"first dimension of A, $mA, and first dimension of output C, $mC, must match"))
     elseif nA != mB
-        throw(DimensionMismatch("second dimension of A, $nA, and first dimension of B, $mB, must match"))
-    elseif nB != nC
-        throw(DimensionMismatch("second dimension of output C, $nC, and second dimension of B, $nB, must match"))
+        throw(DimensionMismatch(lazy"second dimension of A, $nA, and first dimension of B, $mB, must match"))
     end
 end
 
@@ -458,7 +561,7 @@ _diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev
 function _diag(A::Bidiagonal, k)
     if k == 0
         return A.dv
-    elseif (A.uplo == 'L' && k == -1) || (A.uplo == 'U' && k == 1)
+    elseif k == _offdiagind(A.uplo)
         return A.ev
     else
         return diag(A, k)
@@ -470,8 +573,10 @@ _mul!(C::AbstractMatrix, A::BiTriSym, B::TriSym, _add::MulAddMul) =
 _mul!(C::AbstractMatrix, A::BiTriSym, B::Bidiagonal, _add::MulAddMul) =
     _bibimul!(C, A, B, _add)
 function _bibimul!(C, A, B, _add)
-    check_A_mul_B!_sizes(C, A, B)
+    require_one_based_indexing(C)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
+    iszero(n) && return C
     n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     # We use `_rmul_or_fill!` instead of `_modify!` here since using
     # `_modify!` in the following loop will not update the
@@ -528,10 +633,9 @@ end
 
 function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul)
     require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
     iszero(n) && return C
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     _rmul_or_fill!(C, _add.beta)  # see the same use above
     iszero(_add.alpha) && return C
     Al = _diag(A, -1)
@@ -540,39 +644,105 @@ function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul)
     Bd = B.diag
     @inbounds begin
         # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1])
-        C[1,2] += _add(A[1,2]*B[2,2])
+        for j in 1:min(2, n)
+            C[1,j] += _add(A[1,j]*B[j,j])
+        end
         # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1])
-        C[2,2] += _add(A[2,2]*B[2,2])
-        C[2,3] += _add(A[2,3]*B[3,3])
+        if n > 1
+            for j in 1:min(3, n)
+                C[2,j] += _add(A[2,j]*B[j,j])
+            end
+        end
         for j in 3:n-2
             C[j, j-1] += _add(Al[j-1]*Bd[j-1])
             C[j, j  ] += _add(Ad[j  ]*Bd[j  ])
             C[j, j+1] += _add(Au[j  ]*Bd[j+1])
         end
-        # row before last of C
-        C[n-1,n-2] += _add(A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-1]*B[n-1,n-1])
-        C[n-1,n  ] += _add(A[n-1,  n]*B[n  ,n  ])
+        if n > 3
+            # row before last of C
+            for j in n-2:n
+                C[n-1,j] += _add(A[n-1,j]*B[j,j])
+            end
+        end
         # last row of C
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1])
-        C[n,n  ] += _add(A[n,n  ]*B[n,  n  ])
+        if n > 2
+            for j in n-1:n
+                C[n,j] += _add(A[n,j]*B[j,j])
+            end
+        end
     end # inbounds
     C
 end
 
+function _mul!(C::AbstractMatrix, A::Bidiagonal, B::Diagonal, _add::MulAddMul)
+    require_one_based_indexing(C)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    iszero(n) && return C
+    _rmul_or_fill!(C, _add.beta)  # see the same use above
+    iszero(_add.alpha) && return C
+    (; dv, ev) = A
+    Bd = B.diag
+    rowshift = A.uplo == 'U' ? -1 : 1
+    evshift = Int(A.uplo == 'U')
+    @inbounds begin
+        # first row of C
+        C[1,1] += _add(dv[1]*Bd[1])
+        if n > 1
+            if A.uplo == 'L'
+                C[2,1] += _add(ev[1]*Bd[1])
+            end
+            for col in 2:n-1
+                C[col+rowshift, col] += _add(ev[col - evshift]*Bd[col])
+                C[col, col] += _add(dv[col]*Bd[col])
+            end
+            if A.uplo == 'U'
+                C[n-1,n] += _add(ev[n-1]*Bd[n])
+            end
+            C[n, n] += _add(dv[n]*Bd[n])
+        end
+    end # inbounds
+    C
+end
+
+function _mul!(C::Bidiagonal, A::Bidiagonal, B::Diagonal, _add::MulAddMul)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    iszero(n) && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    Adv, Aev = A.dv, A.ev
+    Cdv, Cev = C.dv, C.ev
+    Bd = B.diag
+    shift = Int(A.uplo == 'U')
+    if C.uplo == A.uplo
+        @inbounds begin
+            _modify!(_add, Adv[1]*Bd[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Aev, Cev)
+                _modify!(_add, Aev[j]*Bd[j+shift], Cev, j)
+                _modify!(_add, Adv[j+1]*Bd[j+1], Cdv, j+1)
+            end
+        end # inbounds
+    else
+        @inbounds begin
+            _modify!(_add, Adv[1]*Bd[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Aev, Cev)
+                _modify!(_add, Adv[j+1]*Bd[j+1], Cdv, j+1)
+                # this branch will error unless the value is zero
+                _modify!(_add, Aev[j]*Bd[j+shift], C, (j+1-shift, j+shift))
+                # zeros of the correct type
+                _modify!(_add, A[j+shift, j+1-shift]*Bd[j+1-shift], Cev, j)
+            end
+        end
+    end
+    C
+end
+
 function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulAddMul)
     require_one_based_indexing(C, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     nA = size(A,1)
     nB = size(B,2)
-    if !(size(C,1) == size(B,1) == nA)
-        throw(DimensionMismatch("A has first dimension $nA, B has $(size(B,1)), C has $(size(C,1)) but all must match"))
-    end
-    if size(C,2) != nB
-        throw(DimensionMismatch("A has second dimension $nA, B has $(size(B,2)), C has $(size(C,2)) but all must match"))
-    end
-    iszero(nA) && return C
+    (iszero(nA) || iszero(nB)) && return C
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     l = _diag(A, -1)
@@ -594,10 +764,11 @@ end
 
 function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::TriSym, _add::MulAddMul)
     require_one_based_indexing(C, A)
-    check_A_mul_B!_sizes(C, A, B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
     m = size(B,2)
+    (iszero(m) || iszero(n)) && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     if n <= 3 || m <= 1
         return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     end
@@ -629,12 +800,13 @@ end
 
 function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal, _add::MulAddMul)
     require_one_based_indexing(C, A)
-    check_A_mul_B!_sizes(C, A, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    m, n = size(A)
+    (iszero(m) || iszero(n)) && return C
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     if size(A, 1) <= 3 || size(B, 2) <= 1
         return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     end
-    m, n = size(A)
     @inbounds if B.uplo == 'U'
         for i in 1:m
             for j in n:-1:2
@@ -659,8 +831,9 @@ _mul!(C::AbstractMatrix, A::Diagonal, B::TriSym, _add::MulAddMul) =
     _dibimul!(C, A, B, _add)
 function _dibimul!(C, A, B, _add)
     require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
+    iszero(n) && return C
     n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     _rmul_or_fill!(C, _add.beta)  # see the same use above
     iszero(_add.alpha) && return C
@@ -692,6 +865,68 @@ function _dibimul!(C, A, B, _add)
     end # inbounds
     C
 end
+function _dibimul!(C::AbstractMatrix, A::Diagonal, B::Bidiagonal, _add)
+    require_one_based_indexing(C)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    iszero(n) && return C
+    _rmul_or_fill!(C, _add.beta)  # see the same use above
+    iszero(_add.alpha) && return C
+    Ad = A.diag
+    Bdv, Bev = B.dv, B.ev
+    rowshift = B.uplo == 'U' ? -1 : 1
+    evshift = Int(B.uplo == 'U')
+    @inbounds begin
+        # first row of C
+        C[1,1] += _add(Ad[1]*Bdv[1])
+        if n > 1
+            if B.uplo == 'L'
+                C[2,1] += _add(Ad[2]*Bev[1])
+            end
+            for col in 2:n-1
+                evrow = col+rowshift
+                C[evrow, col] += _add(Ad[evrow]*Bev[col - evshift])
+                C[col, col] += _add(Ad[col]*Bdv[col])
+            end
+            if B.uplo == 'U'
+                C[n-1,n] += _add(Ad[n-1]*Bev[n-1])
+            end
+            C[n, n] += _add(Ad[n]*Bdv[n])
+        end
+    end # inbounds
+    C
+end
+function _dibimul!(C::Bidiagonal, A::Diagonal, B::Bidiagonal, _add)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    n == 0 && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    Ad = A.diag
+    Bdv, Bev = B.dv, B.ev
+    Cdv, Cev = C.dv, C.ev
+    shift = Int(B.uplo == 'L')
+    if C.uplo == B.uplo
+        @inbounds begin
+            _modify!(_add, Ad[1]*Bdv[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Bev, Cev)
+                _modify!(_add, Ad[j+shift]*Bev[j], Cev, j)
+                _modify!(_add, Ad[j+1]*Bdv[j+1], Cdv, j+1)
+            end
+        end # inbounds
+    else
+        @inbounds begin
+            _modify!(_add, Ad[1]*Bdv[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Bev, Cev)
+                _modify!(_add, Ad[j+1]*Bdv[j+1], Cdv, j+1)
+                # this branch will error unless the value is zero
+                _modify!(_add, Ad[j+shift]*Bev[j], C, (j+shift, j+1-shift))
+                # zeros of the correct type
+                _modify!(_add, Ad[j+1-shift]*B[j+1-shift,j+shift], Cev, j)
+            end
+        end
+    end
+    C
+end
 
 function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
@@ -717,18 +952,6 @@ function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
     return A.uplo == 'L' ? LowerTriangular(C) : C
 end
 
-function *(A::Diagonal, B::SymTridiagonal)
-    TS = promote_op(*, eltype(A), eltype(B))
-    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
-    mul!(out, A, B)
-end
-
-function *(A::SymTridiagonal, B::Diagonal)
-    TS = promote_op(*, eltype(A), eltype(B))
-    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
-    mul!(out, A, B)
-end
-
 function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
     require_one_based_indexing(x, y)
     nx, ny = length(x), length(y)
@@ -768,11 +991,11 @@ function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
     N = size(A, 2)
     mb, nb = size(b, 1), size(b, 2)
     if N != mb
-        throw(DimensionMismatch("second dimension of A, $N, does not match first dimension of b, $mb"))
+        throw(DimensionMismatch(lazy"second dimension of A, $N, does not match first dimension of b, $mb"))
     end
     mc, nc = size(c, 1), size(c, 2)
     if mc != mb || nc != nb
-        throw(DimensionMismatch("size of result, ($mc, $nc), does not match the size of b, ($mb, $nb)"))
+        throw(DimensionMismatch(lazy"size of result, ($mc, $nc), does not match the size of b, ($mb, $nb)"))
     end
 
     if N == 0
@@ -838,11 +1061,11 @@ function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
     require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
     mc, nc = size(C)
     if mc != m || nc != n
-        throw(DimensionMismatch("expect output to have size ($m, $n), but got ($mc, $nc)"))
+        throw(DimensionMismatch(lazy"expect output to have size ($m, $n), but got ($mc, $nc)"))
     end
 
     zi = findfirst(iszero, B.dv)
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 7bf7f8bf95100..413b7866c5444 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -159,16 +159,15 @@ function check()
     interface = USE_BLAS64 ? :ilp64 : :lp64
     if !any(lib.interface == interface for lib in config.loaded_libs)
         interfacestr = uppercase(string(interface))
-        @error("No loaded BLAS libraries were built with $(interfacestr) support")
-        println("Quitting.")
-        exit()
+        @error("No loaded BLAS libraries were built with $interfacestr support.")
+        exit(1)
     end
 end
 
 "Check that upper/lower (for special matrices) is correctly specified"
 function chkuplo(uplo::AbstractChar)
     if !(uplo == 'U' || uplo == 'L')
-        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
+        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got '$uplo'"))
     end
     uplo
 end
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
index 8d1ded9bf8111..a44f1a1c99094 100644
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/src/bunchkaufman.jl
@@ -127,6 +127,9 @@ function bunchkaufman!(A::StridedMatrix{<:BlasFloat}, rook::Bool = false; check:
     end
 end
 
+bkcopy_oftype(A, S) = eigencopy_oftype(A, S)
+bkcopy_oftype(A::Symmetric{<:Complex}, S) = Symmetric(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
+
 """
     bunchkaufman(A, rook::Bool=false; check = true) -> S::BunchKaufman
 
@@ -206,7 +209,7 @@ julia> S.L*S.D*S.L' - A[S.p, S.p]
 ```
 """
 bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
-    bunchkaufman!(eigencopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
+    bunchkaufman!(bkcopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
 
 BunchKaufman{T}(B::BunchKaufman) where {T} =
     BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
@@ -297,6 +300,17 @@ end
 Base.propertynames(B::BunchKaufman, private::Bool=false) =
     (:p, :P, :L, :U, :D, (private ? fieldnames(typeof(B)) : ())...)
 
+function Base.:(==)(B1::BunchKaufman, B2::BunchKaufman)
+    # check for the equality between properties instead of fields
+    B1.p == B2.p || return false
+    if B1.uplo == 'L'
+        B1.L == B2.L || return false
+    else
+        B1.U == B2.U || return false
+    end
+    return (B1.D == B2.D)
+end
+
 function getproperties!(B::BunchKaufman{T,<:StridedMatrix}) where {T<:BlasFloat}
     # NOTE: Unlike in the 'getproperty' function, in this function L/U and D are computed in place.
     if B.rook
@@ -1529,7 +1543,7 @@ function bunchkaufman(A::AbstractMatrix{TS},
     rook::Bool = false;
     check::Bool = true
     ) where TS <: ClosedScalar{TR} where TR <: ClosedReal
-    return bunchkaufman!(eigencopy_oftype(A, TS), rook; check)
+    return bunchkaufman!(bkcopy_oftype(A, TS), rook; check)
 end
 
 function bunchkaufman(A::AbstractMatrix{TS},
@@ -1551,15 +1565,15 @@ function bunchkaufman(A::AbstractMatrix{TS},
     # We promote input to BigInt to avoid overflow problems
     if TA == Nothing
         if TS <: Integer
-            M = Rational{BigInt}.(eigencopy_oftype(A, TS))
+            M = Rational{BigInt}.(bkcopy_oftype(A, TS))
         else
-            M = Complex{Rational{BigInt}}.(eigencopy_oftype(A, TS))
+            M = Complex{Rational{BigInt}}.(bkcopy_oftype(A, TS))
         end
     else
         if TS <: Integer
-            M = TA(Rational{BigInt}.(eigencopy_oftype(A, TS)), Symbol(A.uplo))
+            M = TA(Rational{BigInt}.(bkcopy_oftype(A, TS)), Symbol(A.uplo))
         else
-            M = TA(Complex{Rational{BigInt}}.(eigencopy_oftype(A, TS)),
+            M = TA(Complex{Rational{BigInt}}.(bkcopy_oftype(A, TS)),
                 Symbol(A.uplo))
         end
     end
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index 528eca5c3d8a3..545d92ec1704d 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -211,7 +211,7 @@ function _chol!(A::AbstractMatrix, ::Type{UpperTriangular})
             A[k,k] = Akk
             AkkInv = inv(copy(Akk'))
             for j = k + 1:n
-                for i = 1:k - 1
+                @simd for i = 1:k - 1
                     A[k,j] -= A[i,k]'A[i,j]
                 end
                 A[k,j] = AkkInv*A[k,j]
@@ -236,14 +236,15 @@ function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
                 return LowerTriangular(A), convert(BlasInt, k)
             end
             A[k,k] = Akk
-            AkkInv = inv(Akk)
+            AkkInv = inv(copy(Akk'))
             for j = 1:k - 1
+                Akjc = A[k,j]'
                 @simd for i = k + 1:n
-                    A[i,k] -= A[i,j]*A[k,j]'
+                    A[i,k] -= A[i,j]*Akjc
                 end
             end
-            for i = k + 1:n
-                A[i,k] *= AkkInv'
+            @simd for i = k + 1:n
+                A[i,k] *= AkkInv
             end
         end
      end
@@ -255,16 +256,164 @@ function _chol!(x::Number, _)
     rx = real(x)
     iszero(rx) && return (rx, convert(BlasInt, 1))
     rxr = sqrt(abs(rx))
-    rval =  convert(promote_type(typeof(x), typeof(rxr)), rxr)
+    rval = convert(promote_type(typeof(x), typeof(rxr)), rxr)
     return (rval, convert(BlasInt, rx != abs(x)))
 end
 
-## for StridedMatrices, check that matrix is symmetric/Hermitian
+# _cholpivoted!. Internal methods for calling pivoted Cholesky
+Base.@propagate_inbounds function _swap_rowcols!(A, ::Type{UpperTriangular}, n, j, q)
+    j == q && return
+    @assert j < q
+    # swap rows and cols without touching the possibly undef-ed triangle
+    A[q, q] = A[j, j]
+    for k in 1:j-1 # initial vertical segments
+        A[k,j], A[k,q] = A[k,q], A[k,j]
+    end
+    for k in j+1:q-1 # intermediate segments
+        A[j,k], A[k,q] = conj(A[k,q]), conj(A[j,k])
+    end
+    A[j,q] = conj(A[j,q]) # corner case
+    for k in q+1:n # final horizontal segments
+        A[j,k], A[q,k] = A[q,k], A[j,k]
+    end
+    return
+end
+Base.@propagate_inbounds function _swap_rowcols!(A, ::Type{LowerTriangular}, n, j, q)
+    j == q && return
+    @assert j < q
+    # swap rows and cols without touching the possibly undef-ed triangle
+    A[q, q] = A[j, j]
+    for k in 1:j-1 # initial horizontal segments
+        A[j,k], A[q,k] = A[q,k], A[j,k]
+    end
+    for k in j+1:q-1 # intermediate segments
+        A[k,j], A[q,k] = conj(A[q,k]), conj(A[k,j])
+    end
+    A[q,j] = conj(A[q,j]) # corner case
+    for k in q+1:n # final vertical segments
+        A[k,j], A[k,q] = A[k,q], A[k,j]
+    end
+    return
+end
+### BLAS/LAPACK element types
+_cholpivoted!(A::StridedMatrix{<:BlasFloat}, ::Type{UpperTriangular}, tol::Real, check::Bool) =
+    LAPACK.pstrf!('U', A, tol)
+_cholpivoted!(A::StridedMatrix{<:BlasFloat}, ::Type{LowerTriangular}, tol::Real, check::Bool) =
+    LAPACK.pstrf!('L', A, tol)
+## Non BLAS/LAPACK element types (generic)
+function _cholpivoted!(A::AbstractMatrix, ::Type{UpperTriangular}, tol::Real, check::Bool)
+    rTA = real(eltype(A))
+    # checks
+    Base.require_one_based_indexing(A)
+    n = LinearAlgebra.checksquare(A)
+    # initialization
+    piv = collect(1:n)
+    dots = zeros(rTA, n)
+    temp = similar(dots)
+
+    @inbounds begin
+        # first step
+        Akk, q = findmax(i -> real(A[i,i]), 1:n)
+        stop = tol < 0 ? eps(rTA)*n*abs(Akk) : tol
+        Akk ≤ stop && return A, piv, convert(BlasInt, 0), convert(BlasInt, 1)
+        # swap
+        _swap_rowcols!(A, UpperTriangular, n, 1, q)
+        piv[1], piv[q] = piv[q], piv[1]
+        A[1,1] = Akk = sqrt(Akk)
+        AkkInv = inv(copy(Akk'))
+        @simd for j in 2:n
+            A[1, j] *= AkkInv
+        end
+
+        for k in 2:n
+            @simd for j in k:n
+                dots[j] += abs2(A[k-1, j])
+                temp[j] = real(A[j,j]) - dots[j]
+            end
+            Akk, q = findmax(j -> temp[j], k:n)
+            Akk ≤ stop && return A, piv, convert(BlasInt, k - 1), convert(BlasInt, 1)
+            q += k - 1
+            # swap
+            _swap_rowcols!(A, UpperTriangular, n, k, q)
+            dots[k], dots[q] = dots[q], dots[k]
+            piv[k], piv[q] = piv[q], piv[k]
+            # update
+            A[k,k] = Akk = sqrt(Akk)
+            AkkInv = inv(copy(Akk'))
+            for j in (k+1):n
+                @simd for i in 1:(k-1)
+                    A[k,j] -= A[i,k]'A[i,j]
+                end
+                A[k,j] = AkkInv * A[k,j]
+            end
+        end
+        return A, piv, convert(BlasInt, n), convert(BlasInt, 0)
+    end
+end
+function _cholpivoted!(A::AbstractMatrix, ::Type{LowerTriangular}, tol::Real, check::Bool)
+    rTA = real(eltype(A))
+    # checks
+    Base.require_one_based_indexing(A)
+    n = LinearAlgebra.checksquare(A)
+    # initialization
+    piv = collect(1:n)
+    dots = zeros(rTA, n)
+    temp = similar(dots)
+
+    @inbounds begin
+        # first step
+        Akk, q = findmax(i -> real(A[i,i]), 1:n)
+        stop = tol < 0 ? eps(rTA)*n*abs(Akk) : tol
+        Akk ≤ stop && return A, piv, convert(BlasInt, 0), convert(BlasInt, 1)
+        # swap
+        _swap_rowcols!(A, LowerTriangular, n, 1, q)
+        piv[1], piv[q] = piv[q], piv[1]
+        A[1,1] = Akk = sqrt(Akk)
+        AkkInv = inv(copy(Akk'))
+        @simd for i in 2:n
+            A[i,1] *= AkkInv
+        end
+
+        for k in 2:n
+            @simd for j in k:n
+                dots[j] += abs2(A[j, k-1])
+                temp[j] = real(A[j,j]) - dots[j]
+            end
+            Akk, q = findmax(i -> temp[i], k:n)
+            Akk ≤ stop && return A, piv, convert(BlasInt, k-1), convert(BlasInt, 1)
+            q += k - 1
+            # swap
+            _swap_rowcols!(A, LowerTriangular, n, k, q)
+            dots[k], dots[q] = dots[q], dots[k]
+            piv[k], piv[q] = piv[q], piv[k]
+            # update
+            A[k,k] = Akk = sqrt(Akk)
+            for j in 1:(k-1)
+                Akjc = A[k,j]'
+                @simd for i in (k+1):n
+                    A[i,k] -= A[i,j]*Akjc
+                end
+            end
+            AkkInv = inv(copy(Akk'))
+            @simd for i in (k+1):n
+                A[i, k] *= AkkInv
+            end
+        end
+        return A, piv, convert(BlasInt, n), convert(BlasInt, 0)
+    end
+end
+function _cholpivoted!(x::Number, tol)
+    rx = real(x)
+    iszero(rx) && return (rx, convert(BlasInt, 1))
+    rxr = sqrt(abs(rx))
+    rval = convert(promote_type(typeof(x), typeof(rxr)), rxr)
+    return (rval, convert(BlasInt, !(rx == abs(x) > tol)))
+end
 
 # cholesky!. Destructive methods for computing Cholesky factorization of real symmetric
 # or Hermitian matrix
 ## No pivoting (default)
-function cholesky!(A::RealHermSymComplexHerm, ::NoPivot = NoPivot(); check::Bool = true)
+function cholesky!(A::SelfAdjoint, ::NoPivot = NoPivot(); check::Bool = true)
     C, info = _chol!(A.data, A.uplo == 'U' ? UpperTriangular : LowerTriangular)
     check && checkpositivedefinite(info)
     return Cholesky(C.data, A.uplo, info)
@@ -295,7 +444,7 @@ Stacktrace:
 function cholesky!(A::AbstractMatrix, ::NoPivot = NoPivot(); check::Bool = true)
     checksquare(A)
     if !ishermitian(A) # return with info = -1 if not Hermitian
-        check && checkpositivedefinite(-1)
+        check && checkpositivedefinite(convert(BlasInt, -1))
         return Cholesky(A, 'U', convert(BlasInt, -1))
     else
         return cholesky!(Hermitian(A), NoPivot(); check = check)
@@ -305,23 +454,15 @@ end
 @deprecate cholesky!(A::RealHermSymComplexHerm, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
 
 ## With pivoting
-### BLAS/LAPACK element types
-function cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix},
-                   ::RowMaximum; tol = 0.0, check::Bool = true)
-    AA, piv, rank, info = LAPACK.pstrf!(A.uplo, A.data, tol)
-    C = CholeskyPivoted{eltype(AA),typeof(AA),typeof(piv)}(AA, A.uplo, piv, rank, tol, info)
+### Non BLAS/LAPACK element types (generic).
+function cholesky!(A::SelfAdjoint, ::RowMaximum; tol = 0.0, check::Bool = true)
+    AA, piv, rank, info = _cholpivoted!(A.data, A.uplo == 'U' ? UpperTriangular : LowerTriangular, tol, check)
+    C = CholeskyPivoted(AA, A.uplo, piv, rank, tol, info)
     check && chkfullrank(C)
     return C
 end
-@deprecate cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-### Non BLAS/LAPACK element types (generic). Since generic fallback for pivoted Cholesky
-### is not implemented yet we throw an error
-cholesky!(A::RealHermSymComplexHerm{<:Real}, ::RowMaximum; tol = 0.0, check::Bool = true) =
-    throw(ArgumentError("generic pivoted Cholesky factorization is not implemented yet"))
 @deprecate cholesky!(A::RealHermSymComplexHerm{<:Real}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
 
-### for AbstractMatrix, check that matrix is symmetric/Hermitian
 """
     cholesky!(A::AbstractMatrix, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
 
@@ -333,12 +474,12 @@ e.g. for integer types.
 function cholesky!(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true)
     checksquare(A)
     if !ishermitian(A)
-        C = CholeskyPivoted(A, 'U', Vector{BlasInt}(),convert(BlasInt, 1),
+        C = CholeskyPivoted(A, 'U', Vector{BlasInt}(), convert(BlasInt, 1),
                             tol, convert(BlasInt, -1))
-        check && chkfullrank(C)
+        check && checkpositivedefinite(convert(BlasInt, -1))
         return C
     else
-        return cholesky!(Hermitian(A), RowMaximum(); tol = tol, check = check)
+        return cholesky!(Hermitian(A), RowMaximum(); tol, check)
     end
 end
 @deprecate cholesky!(A::StridedMatrix, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
@@ -410,6 +551,9 @@ end
 # allow packages like SparseArrays.jl to hook into here and redirect to out-of-place `cholesky`
 _cholesky(A::AbstractMatrix, args...; kwargs...) = cholesky!(A, args...; kwargs...)
 
+# allow cholesky of cholesky
+cholesky(A::Cholesky) = A
+
 ## With pivoting
 """
     cholesky(A, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
@@ -427,7 +571,7 @@ The following functions are available for `CholeskyPivoted` objects:
 [`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
 
 The argument `tol` determines the tolerance for determining the rank.
-For negative values, the tolerance is the machine precision.
+For negative values, the tolerance is equal to `eps()*size(A,1)*maximum(diag(A))`.
 
 If you have a matrix `A` that is slightly non-Hermitian due to roundoff errors in its construction,
 wrap it in `Hermitian(A)` before passing it to `cholesky` in order to treat it as perfectly Hermitian.
@@ -515,13 +659,28 @@ copy(C::CholeskyPivoted) = CholeskyPivoted(copy(C.factors), C.uplo, C.piv, C.ran
 size(C::Union{Cholesky, CholeskyPivoted}) = size(C.factors)
 size(C::Union{Cholesky, CholeskyPivoted}, d::Integer) = size(C.factors, d)
 
+function _choleskyUfactor(Cfactors, Cuplo)
+    if Cuplo === 'U'
+        return UpperTriangular(Cfactors)
+    else
+        return copy(LowerTriangular(Cfactors)')
+    end
+end
+function _choleskyLfactor(Cfactors, Cuplo)
+    if Cuplo === 'L'
+        return LowerTriangular(Cfactors)
+    else
+        return copy(UpperTriangular(Cfactors)')
+    end
+end
+
 function getproperty(C::Cholesky, d::Symbol)
     Cfactors = getfield(C, :factors)
     Cuplo    = getfield(C, :uplo)
     if d === :U
-        return UpperTriangular(Cuplo === char_uplo(d) ? Cfactors : copy(Cfactors'))
+        _choleskyUfactor(Cfactors, Cuplo)
     elseif d === :L
-        return LowerTriangular(Cuplo === char_uplo(d) ? Cfactors : copy(Cfactors'))
+        _choleskyLfactor(Cfactors, Cuplo)
     elseif d === :UL
         return (Cuplo === 'U' ? UpperTriangular(Cfactors) : LowerTriangular(Cfactors))
     else
@@ -531,13 +690,18 @@ end
 Base.propertynames(F::Cholesky, private::Bool=false) =
     (:U, :L, :UL, (private ? fieldnames(typeof(F)) : ())...)
 
+function Base.:(==)(C1::Cholesky, C2::Cholesky)
+    C1.uplo == C2.uplo || return false
+    C1.uplo == 'L' ? (C1.L == C2.L) : (C1.U == C2.U)
+end
+
 function getproperty(C::CholeskyPivoted{T}, d::Symbol) where {T}
     Cfactors = getfield(C, :factors)
     Cuplo    = getfield(C, :uplo)
     if d === :U
-        return UpperTriangular(sym_uplo(Cuplo) == d ? Cfactors : copy(Cfactors'))
+        _choleskyUfactor(Cfactors, Cuplo)
     elseif d === :L
-        return LowerTriangular(sym_uplo(Cuplo) == d ? Cfactors : copy(Cfactors'))
+        _choleskyLfactor(Cfactors, Cuplo)
     elseif d === :p
         return getfield(C, :piv)
     elseif d === :P
@@ -554,6 +718,11 @@ end
 Base.propertynames(F::CholeskyPivoted, private::Bool=false) =
     (:U, :L, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
 
+function Base.:(==)(C1::CholeskyPivoted, C2::CholeskyPivoted)
+    (C1.uplo == C2.uplo && C1.p == C2.p) || return false
+    C1.uplo == 'L' ? (C1.L == C2.L) : (C1.U == C2.U)
+end
+
 issuccess(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
 
 adjoint(C::Union{Cholesky,CholeskyPivoted}) = C
@@ -713,7 +882,7 @@ end
 
 function chkfullrank(C::CholeskyPivoted)
     if C.rank < size(C.factors, 1)
-        throw(RankDeficientException(C.info))
+        throw(RankDeficientException(C.rank))
     end
 end
 
@@ -835,3 +1004,30 @@ then `CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses
 `O(n^2)` operations.
 """
 lowrankdowndate(C::Cholesky, v::AbstractVector) = lowrankdowndate!(copy(C), copy(v))
+
+function diag(C::Cholesky{T}, k::Int = 0) where {T}
+    N = size(C, 1)
+    absk = abs(k)
+    iabsk = N - absk
+    z = Vector{T}(undef, iabsk)
+    UL = C.factors
+    if C.uplo == 'U'
+        for i in 1:iabsk
+            z[i] = zero(T)
+            for j in 1:min(i, i+absk)
+                z[i] += UL[j, i]'UL[j, i+absk]
+            end
+        end
+    else
+        for i in 1:iabsk
+            z[i] = zero(T)
+            for j in 1:min(i, i+absk)
+                z[i] += UL[i, j]*UL[i+absk, j]'
+            end
+        end
+    end
+    if !(T <: Real) && k < 0
+        z .= adjoint.(z)
+    end
+    return z
+end
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index 91c38e7f0b09b..62096cbb172f2 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -14,6 +14,7 @@ const NRM2_CUTOFF = 32
 const ISONE_CUTOFF = 2^21 # 2M
 
 function isone(A::AbstractMatrix)
+    require_one_based_indexing(A)  # multiplication not defined yet among offset matrices
     m, n = size(A)
     m != n && return false # only square matrices can satisfy x == one(x)
     if sizeof(A) < ISONE_CUTOFF
@@ -216,13 +217,16 @@ function diagind(::IndexCartesian, m::Integer, n::Integer, k::Integer=0)
 end
 
 """
-    diagind(M::AbstractMatrix, [k::Integer=0,] indstyle::IndexStyle = IndexLinear())
+    diagind(M::AbstractMatrix, k::Integer = 0, indstyle::IndexStyle = IndexLinear())
+    diagind(M::AbstractMatrix, indstyle::IndexStyle = IndexLinear())
 
 An `AbstractRange` giving the indices of the `k`th diagonal of the matrix `M`.
 Optionally, an index style may be specified which determines the type of the range returned.
 If `indstyle isa IndexLinear` (default), this returns an `AbstractRange{Integer}`.
 On the other hand, if `indstyle isa IndexCartesian`, this returns an `AbstractRange{CartesianIndex{2}}`.
 
+If `k` is not provided, it is assumed to be `0` (corresponding to the main diagonal).
+
 See also: [`diag`](@ref), [`diagm`](@ref), [`Diagonal`](@ref).
 
 # Examples
@@ -233,9 +237,15 @@ julia> A = [1 2 3; 4 5 6; 7 8 9]
  4  5  6
  7  8  9
 
-julia> diagind(A,-1)
+julia> diagind(A, -1)
 2:4:6
+
+julia> diagind(A, IndexCartesian())
+StepRangeLen(CartesianIndex(1, 1), CartesianIndex(1, 1), 3)
 ```
+
+!!! compat "Julia 1.11"
+     Specifying an `IndexStyle` requires at least Julia 1.11.
 """
 function diagind(A::AbstractMatrix, k::Integer=0, indexstyle::IndexStyle = IndexLinear())
     require_one_based_indexing(A)
@@ -327,7 +337,7 @@ function diagm_size(size::Tuple{Int,Int}, kv::Pair{<:Integer,<:AbstractVector}..
     mmax = mapreduce(x -> length(x.second) - min(0,Int(x.first)), max, kv; init=0)
     nmax = mapreduce(x -> length(x.second) + max(0,Int(x.first)), max, kv; init=0)
     m, n = size
-    (m ≥ mmax && n ≥ nmax) || throw(DimensionMismatch("invalid size=$size"))
+    (m ≥ mmax && n ≥ nmax) || throw(DimensionMismatch(lazy"invalid size=$size"))
     return m, n
 end
 function diagm_container(size, kv::Pair{<:Integer,<:AbstractVector}...)
@@ -360,13 +370,10 @@ julia> diagm([1,2,3])
 diagm(v::AbstractVector) = diagm(0 => v)
 diagm(m::Integer, n::Integer, v::AbstractVector) = diagm(m, n, 0 => v)
 
-function tr(A::Matrix{T}) where T
-    n = checksquare(A)
-    t = zero(T)
-    @inbounds @simd for i in 1:n
-        t += A[i,i]
-    end
-    t
+function tr(A::StridedMatrix{T}) where T
+    checksquare(A)
+    isempty(A) && return zero(T)
+    reduce(+, (A[i] for i in diagind(A, IndexStyle(A))))
 end
 
 _kronsize(A::AbstractMatrix, B::AbstractMatrix) = map(*, size(A), size(B))
@@ -482,8 +489,8 @@ julia> reshape(kron(v,w), (length(w), length(v)))
 ```
 """
 function kron(A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}) where {T,S}
-    R = Matrix{promote_op(*,T,S)}(undef, _kronsize(A, B))
-    return kron!(R, A, B)
+    C = Matrix{promote_op(*,T,S)}(undef, _kronsize(A, B))
+    return kron!(C, A, B)
 end
 function kron(a::AbstractVector{T}, b::AbstractVector{S}) where {T,S}
     c = Vector{promote_op(*,T,S)}(undef, length(a)*length(b))
@@ -555,9 +562,6 @@ function (^)(A::AbstractMatrix{T}, p::Real) where T
     isinteger(p) && return integerpow(A, p)
 
     # If possible, use diagonalization
-    if issymmetric(A)
-        return (Symmetric(A)^p)
-    end
     if ishermitian(A)
         return (Hermitian(A)^p)
     end
@@ -695,22 +699,29 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P
         for k in 2:(div(length(C), 2) - 1)
             P *= A2
-            mul!(U, C[2k + 2], P, true, true) # U += C[2k+2]*P
-            mul!(V, C[2k + 1], P, true, true) # V += C[2k+1]*P
+            for ind in eachindex(P)
+                U[ind] += C[2k + 2] * P[ind]
+                V[ind] += C[2k + 1] * P[ind]
+            end
         end
 
         U = A * U
 
         # Padé approximant:  (V-U)\(V+U)
         tmp1, tmp2 = A, A2 # Reuse already allocated arrays
-        tmp1 .= V .- U
-        tmp2 .= V .+ U
+        for ind in eachindex(tmp1)
+            tmp1[ind] = V[ind] - U[ind]
+            tmp2[ind] = V[ind] + U[ind]
+        end
         X = LAPACK.gesv!(tmp1, tmp2)[1]
     else
         s  = log2(nA/5.4)               # power of 2 later reversed by squaring
         if s > 0
             si = ceil(Int,s)
-            A ./= convert(T,2^si)
+            twopowsi = convert(T,2^si)
+            for ind in eachindex(A)
+                A[ind] /= twopowsi
+            end
         end
         CC = T[64764752532480000.,32382376266240000.,7771770303897600.,
                 1187353796428800.,  129060195264000.,  10559470521600.,
@@ -725,8 +736,10 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         # Allocation economical version of:
         # U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
         #           CC[8].*A6 .+ CC[6].*A4 .+ CC[4]*A2+CC[2]*I)
-        tmp1 .= CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2
-        tmp2 .= CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2
+        for ind in eachindex(tmp1)
+            tmp1[ind] = CC[14]*A6[ind] + CC[12]*A4[ind] + CC[10]*A2[ind]
+            tmp2[ind] = CC[8]*A6[ind] + CC[6]*A4[ind] + CC[4]*A2[ind]
+        end
         mul!(tmp2, true,CC[2]*I, true, true) # tmp2 .+= CC[2]*I
         U = mul!(tmp2, A6, tmp1, true, true)
         U, tmp1 = mul!(tmp1, A, U), A # U = A * U0
@@ -734,13 +747,17 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         # Allocation economical version of:
         # V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
         #           CC[7].*A6 .+ CC[5].*A4 .+ CC[3]*A2 .+ CC[1]*I
-        tmp1 .= CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2
-        tmp2 .= CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2
+        for ind in eachindex(tmp1)
+            tmp1[ind] = CC[13]*A6[ind] + CC[11]*A4[ind] + CC[9]*A2[ind]
+            tmp2[ind] = CC[7]*A6[ind] + CC[5]*A4[ind] + CC[3]*A2[ind]
+        end
         mul!(tmp2, true, CC[1]*I, true, true) # tmp2 .+= CC[1]*I
         V = mul!(tmp2, A6, tmp1, true, true)
 
-        tmp1 .= V .+ U
-        tmp2 .= V .- U # tmp2 already contained V but this seems more readable
+        for ind in eachindex(tmp1)
+            tmp1[ind] = V[ind] + U[ind]
+            tmp2[ind] = V[ind] - U[ind] # tmp2 already contained V but this seems more readable
+        end
         X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
 
         if s > 0
@@ -1364,15 +1381,14 @@ end
     factorize(A)
 
 Compute a convenient factorization of `A`, based upon the type of the input matrix.
-`factorize` checks `A` to see if it is symmetric/triangular/etc. if `A` is passed
-as a generic matrix. `factorize` checks every element of `A` to verify/rule out
-each property. It will short-circuit as soon as it can rule out symmetry/triangular
-structure. The return value can be reused for efficient solving of multiple
-systems. For example: `A=factorize(A); x=A\\b; y=A\\C`.
+If `A` is passed as a generic matrix, `factorize` checks to see if it is
+symmetric/triangular/etc. To this end, `factorize` may check every element of `A` to
+verify/rule out each property. It will short-circuit as soon as it can rule out
+symmetry/triangular structure. The return value can be reused for efficient solving
+of multiple systems. For example: `A=factorize(A); x=A\\b; y=A\\C`.
 
 | Properties of `A`          | type of factorization                          |
 |:---------------------------|:-----------------------------------------------|
-| Positive-definite          | Cholesky (see [`cholesky`](@ref))  |
 | Dense Symmetric/Hermitian  | Bunch-Kaufman (see [`bunchkaufman`](@ref)) |
 | Sparse Symmetric/Hermitian | LDLt (see [`ldlt`](@ref))      |
 | Triangular                 | Triangular                                     |
@@ -1383,9 +1399,6 @@ systems. For example: `A=factorize(A); x=A\\b; y=A\\C`.
 | General square             | LU (see [`lu`](@ref))            |
 | General non-square         | QR (see [`qr`](@ref))            |
 
-If `factorize` is called on a Hermitian positive-definite matrix, for instance, then `factorize`
-will return a Cholesky factorization.
-
 # Examples
 ```jldoctest
 julia> A = Array(Bidiagonal(fill(1.0, (5, 5)), :U))
@@ -1404,8 +1417,9 @@ julia> factorize(A) # factorize will check to see that A is already factorized
   ⋅    ⋅    ⋅   1.0  1.0
   ⋅    ⋅    ⋅    ⋅   1.0
 ```
-This returns a `5×5 Bidiagonal{Float64}`, which can now be passed to other linear algebra functions
-(e.g. eigensolvers) which will use specialized methods for `Bidiagonal` types.
+
+This returns a `5×5 Bidiagonal{Float64}`, which can now be passed to other linear algebra
+functions (e.g. eigensolvers) which will use specialized methods for `Bidiagonal` types.
 """
 function factorize(A::AbstractMatrix{T}) where T
     m, n = size(A)
@@ -1467,12 +1481,7 @@ function factorize(A::AbstractMatrix{T}) where T
             return UpperTriangular(A)
         end
         if herm
-            cf = cholesky(A; check = false)
-            if cf.info == 0
-                return cf
-            else
-                return factorize(Hermitian(A))
-            end
+            return factorize(Hermitian(A))
         end
         if sym
             return factorize(Symmetric(A))
@@ -1636,7 +1645,7 @@ function cond(A::AbstractMatrix, p::Real=2)
             end
         end
     end
-    throw(ArgumentError("p-norm must be 1, 2 or Inf, got $p"))
+    throw(ArgumentError(lazy"p-norm must be 1, 2 or Inf, got $p"))
 end
 
 ## Lyapunov and Sylvester equation
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index 063dde619bd1a..23d2422d13654 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -114,13 +114,15 @@ Diagonal{T}(D::Diagonal) where {T} = Diagonal{T}(D.diag)
 AbstractMatrix{T}(D::Diagonal) where {T} = Diagonal{T}(D)
 AbstractMatrix{T}(D::Diagonal{T}) where {T} = copy(D)
 Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D)
+Matrix(D::Diagonal{Any}) = Matrix{Any}(D)
 Array(D::Diagonal{T}) where {T} = Matrix(D)
 function Matrix{T}(D::Diagonal) where {T}
-    n = size(D, 1)
-    B = Matrix{T}(undef, n, n)
-    n > 1 && fill!(B, zero(T))
-    @inbounds for i in 1:n
-        B[i,i] = D.diag[i]
+    B = Matrix{T}(undef, size(D))
+    if haszero(T) # optimized path for types with zero(T) defined
+        size(B,1) > 1 && fill!(B, zero(T))
+        copyto!(view(B, diagind(B)), D.diag)
+    else
+        copyto!(B, D)
     end
     return B
 end
@@ -135,7 +137,8 @@ Diagonal{T}(::UndefInitializer, n::Integer) where T = Diagonal(Vector{T}(undef,
 similar(D::Diagonal, ::Type{T}) where {T} = Diagonal(similar(D.diag, T))
 similar(D::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(D.diag, T, dims)
 
-copyto!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
+# copyto! for matching axes
+_copyto_banded!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
 
 size(D::Diagonal) = (n = length(D.diag); (n,n))
 
@@ -185,14 +188,24 @@ end
 diagzero(::Diagonal{T}, i, j) where {T} = zero(T)
 diagzero(D::Diagonal{<:AbstractMatrix{T}}, i, j) where {T} = zeros(T, size(D.diag[i], 1), size(D.diag[j], 2))
 
+@inline function getindex(D::Diagonal, b::BandIndex)
+    @boundscheck checkbounds(D, b)
+    if b.band == 0
+        @inbounds r = D.diag[b.index]
+    else
+        r = diagzero(D, Tuple(_cartinds(b))...)
+    end
+    r
+end
+
 function setindex!(D::Diagonal, v, i::Int, j::Int)
     @boundscheck checkbounds(D, i, j)
     if i == j
         @inbounds D.diag[i] = v
     elseif !iszero(v)
-        throw(ArgumentError("cannot set off-diagonal entry ($i, $j) to a nonzero value ($v)"))
+        throw(ArgumentError(lazy"cannot set off-diagonal entry ($i, $j) to a nonzero value ($v)"))
     end
-    return v
+    return D
 end
 
 
@@ -200,11 +213,20 @@ end
 function Base.replace_in_print_matrix(A::Diagonal,i::Integer,j::Integer,s::AbstractString)
     i==j ? s : Base.replace_with_centered_mark(s)
 end
+function Base.show(io::IO, A::Diagonal)
+    print(io, "Diagonal(")
+    show(io, A.diag)
+    print(io, ")")
+end
 
 parent(D::Diagonal) = D.diag
 
 copy(D::Diagonal) = Diagonal(copy(D.diag))
 
+Base._reverse(A::Diagonal, dims) = reverse!(Matrix(A); dims)
+Base._reverse(A::Diagonal, ::Colon) = Diagonal(reverse(A.diag))
+Base._reverse!(A::Diagonal, ::Colon) = (reverse!(A.diag); A)
+
 ishermitian(D::Diagonal{<:Real}) = true
 ishermitian(D::Diagonal{<:Number}) = isreal(D.diag)
 ishermitian(D::Diagonal) = all(ishermitian, D.diag)
@@ -221,8 +243,8 @@ iszero(D::Diagonal) = all(iszero, D.diag)
 isone(D::Diagonal) = all(isone, D.diag)
 isdiag(D::Diagonal) = all(isdiag, D.diag)
 isdiag(D::Diagonal{<:Number}) = true
-istriu(D::Diagonal, k::Integer=0) = k <= 0 || iszero(D.diag) ? true : false
-istril(D::Diagonal, k::Integer=0) = k >= 0 || iszero(D.diag) ? true : false
+Base.@constprop :aggressive istriu(D::Diagonal, k::Integer=0) = k <= 0 || iszero(D.diag) ? true : false
+Base.@constprop :aggressive istril(D::Diagonal, k::Integer=0) = k >= 0 || iszero(D.diag) ? true : false
 function triu!(D::Diagonal{T}, k::Integer=0) where T
     n = size(D,1)
     if !(-n + 1 <= k <= n + 1)
@@ -237,8 +259,8 @@ end
 function tril!(D::Diagonal{T}, k::Integer=0) where T
     n = size(D,1)
     if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least ",
+            lazy"$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < 0
         fill!(D.diag, zero(T))
     end
@@ -250,21 +272,6 @@ end
 (+)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag + Db.diag)
 (-)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag - Db.diag)
 
-for f in (:+, :-)
-    @eval function $f(D::Diagonal, S::Symmetric)
-        return Symmetric($f(D, S.data), sym_uplo(S.uplo))
-    end
-    @eval function $f(S::Symmetric, D::Diagonal)
-        return Symmetric($f(S.data, D), sym_uplo(S.uplo))
-    end
-    @eval function $f(D::Diagonal{<:Real}, H::Hermitian)
-        return Hermitian($f(D, H.data), sym_uplo(H.uplo))
-    end
-    @eval function $f(H::Hermitian, D::Diagonal{<:Real})
-        return Hermitian($f(H.data, D), sym_uplo(H.uplo))
-    end
-end
-
 (*)(x::Number, D::Diagonal) = Diagonal(x * D.diag)
 (*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
 (/)(D::Diagonal, x::Number) = Diagonal(D.diag / x)
@@ -276,42 +283,80 @@ Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
     Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
 Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
 
-function _muldiag_size_check(A, B)
-    nA = size(A, 2)
-    mB = size(B, 1)
-    @noinline throw_dimerr(::AbstractMatrix, nA, mB) = throw(DimensionMismatch("second dimension of A, $nA, does not match first dimension of B, $mB"))
-    @noinline throw_dimerr(::AbstractVector, nA, mB) = throw(DimensionMismatch("second dimension of D, $nA, does not match length of V, $mB"))
-    nA == mB || throw_dimerr(B, nA, mB)
+function _muldiag_size_check(szA::NTuple{2,Integer}, szB::Tuple{Integer,Vararg{Integer}})
+    nA = szA[2]
+    mB = szB[1]
+    @noinline throw_dimerr(szB::NTuple{2}, nA, mB) = throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match first dimension of B, $mB"))
+    @noinline throw_dimerr(szB::NTuple{1}, nA, mB) = throw(DimensionMismatch(lazy"second dimension of D, $nA, does not match length of V, $mB"))
+    nA == mB || throw_dimerr(szB, nA, mB)
     return nothing
 end
 # the output matrix should have the same size as the non-diagonal input matrix or vector
-@noinline throw_dimerr(szC, szA) = throw(DimensionMismatch("output matrix has size: $szC, but should have size $szA"))
-_size_check_out(C, ::Diagonal, A) = _size_check_out(C, A)
-_size_check_out(C, A, ::Diagonal) = _size_check_out(C, A)
-_size_check_out(C, A::Diagonal, ::Diagonal) = _size_check_out(C, A)
-function _size_check_out(C, A)
-    szA = size(A)
-    szC = size(C)
-    szA == szC || throw_dimerr(szC, szA)
-    return nothing
+@noinline throw_dimerr(szC, szA) = throw(DimensionMismatch(lazy"output matrix has size: $szC, but should have size $szA"))
+function _size_check_out(szC::NTuple{2}, szA::NTuple{2}, szB::NTuple{2})
+    (szC[1] == szA[1] && szC[2] == szB[2]) || throw_dimerr(szC, (szA[1], szB[2]))
+end
+function _size_check_out(szC::NTuple{1}, szA::NTuple{2}, szB::NTuple{1})
+    szC[1] == szA[1] || throw_dimerr(szC, (szA[1],))
 end
-function _muldiag_size_check(C, A, B)
-    _muldiag_size_check(A, B)
-    _size_check_out(C, A, B)
+function _muldiag_size_check(szC::Tuple{Vararg{Integer}}, szA::Tuple{Vararg{Integer}}, szB::Tuple{Vararg{Integer}})
+    _muldiag_size_check(szA, szB)
+   _size_check_out(szC, szA, szB)
 end
 
 function (*)(Da::Diagonal, Db::Diagonal)
-    _muldiag_size_check(Da, Db)
+    _muldiag_size_check(size(Da), size(Db))
     return Diagonal(Da.diag .* Db.diag)
 end
 
 function (*)(D::Diagonal, V::AbstractVector)
-    _muldiag_size_check(D, V)
+    _muldiag_size_check(size(D), size(V))
     return D.diag .* V
 end
 
-rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
-lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
+function rmul!(A::AbstractMatrix, D::Diagonal)
+    _muldiag_size_check(size(A), size(D))
+    for I in CartesianIndices(A)
+        row, col = Tuple(I)
+        @inbounds A[row, col] *= D.diag[col]
+    end
+    return A
+end
+# T .= T * D
+function rmul!(T::Tridiagonal, D::Diagonal)
+    _muldiag_size_check(size(T), size(D))
+    (; dl, d, du) = T
+    d[1] *= D.diag[1]
+    for i in axes(dl,1)
+        dl[i] *= D.diag[i]
+        du[i] *= D.diag[i+1]
+        d[i+1] *= D.diag[i+1]
+    end
+    return T
+end
+
+function lmul!(D::Diagonal, B::AbstractVecOrMat)
+    _muldiag_size_check(size(D), size(B))
+    for I in CartesianIndices(B)
+        row = I[1]
+        @inbounds B[I] = D.diag[row] * B[I]
+    end
+    return B
+end
+
+# in-place multiplication with a diagonal
+# T .= D * T
+function lmul!(D::Diagonal, T::Tridiagonal)
+    _muldiag_size_check(size(D), size(T))
+    (; dl, d, du) = T
+    d[1] = D.diag[1] * d[1]
+    for i in axes(dl,1)
+        dl[i] = D.diag[i+1] * dl[i]
+        du[i] = D.diag[i] * du[i]
+        d[i+1] = D.diag[i+1] * d[i+1]
+    end
+    return T
+end
 
 function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
     require_one_based_indexing(out, B)
@@ -394,7 +439,7 @@ function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0})
 end
 
 function _mul_diag!(out, A, B, _add)
-    _muldiag_size_check(out, A, B)
+    _muldiag_size_check(size(out), size(A), size(B))
     __muldiag!(out, A, B, _add)
     return out
 end
@@ -411,19 +456,18 @@ _mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, _add) =
     _mul_diag!(C, Da, Db, _add)
 
 function (*)(Da::Diagonal, A::AbstractMatrix, Db::Diagonal)
-    _muldiag_size_check(Da, A)
-    _muldiag_size_check(A, Db)
+    _muldiag_size_check(size(Da), size(A))
+    _muldiag_size_check(size(A), size(Db))
     return broadcast(*, Da.diag, A, permutedims(Db.diag))
 end
 
 function (*)(Da::Diagonal, Db::Diagonal, Dc::Diagonal)
-    _muldiag_size_check(Da, Db)
-    _muldiag_size_check(Db, Dc)
+    _muldiag_size_check(size(Da), size(Db))
+    _muldiag_size_check(size(Db), size(Dc))
     return Diagonal(Da.diag .* Db.diag .* Dc.diag)
 end
 
 /(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(matprod_dest(A, D, promote_op(/, eltype(A), eltype(D))), A, D)
-/(A::HermOrSym, D::Diagonal) = _rdiv!(matprod_dest(A, D, promote_op(/, eltype(A), eltype(D))), A, D)
 
 rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
 # avoid copy when possible via internal 3-arg backend
@@ -432,7 +476,7 @@ function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
     dd = D.diag
     m, n = size(A, 1), size(A, 2)
     if (k = length(dd)) != n
-        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+        throw(DimensionMismatch(lazy"left hand side has $n columns but D is $k by $k"))
     end
     @inbounds for j in 1:n
         ddj = dd[j]
@@ -450,7 +494,6 @@ function \(D::Diagonal, B::AbstractVector)
     return D.diag .\ B
 end
 \(D::Diagonal, B::AbstractMatrix) = ldiv!(matprod_dest(D, B, promote_op(\, eltype(D), eltype(B))), D, B)
-\(D::Diagonal, B::HermOrSym) = ldiv!(matprod_dest(D, B, promote_op(\, eltype(D), eltype(B))), D, B)
 
 ldiv!(D::Diagonal, B::AbstractVecOrMat) = @inline ldiv!(B, D, B)
 function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
@@ -459,8 +502,8 @@ function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
     d = length(dd)
     m, n = size(A, 1), size(A, 2)
     m′, n′ = size(B, 1), size(B, 2)
-    m == d || throw(DimensionMismatch("right hand side has $m rows but D is $d by $d"))
-    (m, n) == (m′, n′) || throw(DimensionMismatch("expect output to be $m by $n, but got $m′ by $n′"))
+    m == d || throw(DimensionMismatch(lazy"right hand side has $m rows but D is $d by $d"))
+    (m, n) == (m′, n′) || throw(DimensionMismatch(lazy"expect output to be $m by $n, but got $m′ by $n′"))
     j = findfirst(iszero, D.diag)
     isnothing(j) || throw(SingularException(j))
     @inbounds for j = 1:n, i = 1:m
@@ -469,12 +512,9 @@ function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
     B
 end
 
-# Optimizations for \, / between Diagonals
-\(D::Diagonal, B::Diagonal) = ldiv!(matprod_dest(D, B, promote_op(\, eltype(D), eltype(B))), D, B)
-/(A::Diagonal, D::Diagonal) = _rdiv!(matprod_dest(A, D, promote_op(/, eltype(A), eltype(D))), A, D)
 function _rdiv!(Dc::Diagonal, Db::Diagonal, Da::Diagonal)
     n, k = length(Db.diag), length(Da.diag)
-    n == k || throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+    n == k || throw(DimensionMismatch(lazy"left hand side has $n columns but D is $k by $k"))
     j = findfirst(iszero, Da.diag)
     isnothing(j) || throw(SingularException(j))
     Dc.diag .= Db.diag ./ Da.diag
@@ -502,10 +542,10 @@ function ldiv!(T::Tridiagonal, D::Diagonal, S::Union{SymTridiagonal,Tridiagonal}
     m = size(S, 1)
     dd = D.diag
     if (k = length(dd)) != m
-        throw(DimensionMismatch("diagonal matrix is $k by $k but right hand side has $m rows"))
+        throw(DimensionMismatch(lazy"diagonal matrix is $k by $k but right hand side has $m rows"))
     end
     if length(T.d) != m
-        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
+        throw(DimensionMismatch(lazy"target matrix size $(size(T)) does not match input matrix size $(size(S))"))
     end
     m == 0 && return T
     j = findfirst(iszero, dd)
@@ -539,10 +579,10 @@ function _rdiv!(T::Tridiagonal, S::Union{SymTridiagonal,Tridiagonal}, D::Diagona
     n = size(S, 2)
     dd = D.diag
     if (k = length(dd)) != n
-        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+        throw(DimensionMismatch(lazy"left hand side has $n columns but D is $k by $k"))
     end
     if length(T.d) != n
-        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
+        throw(DimensionMismatch(lazy"target matrix size $(size(T)) does not match input matrix size $(size(S))"))
     end
     n == 0 && return T
     j = findfirst(iszero, dd)
@@ -612,7 +652,7 @@ end
     valB = B.diag; nB = length(valB)
     nC = checksquare(C)
     @boundscheck nC == nA*nB ||
-        throw(DimensionMismatch("expect C to be a $(nA*nB)x$(nA*nB) matrix, got size $(nC)x$(nC)"))
+        throw(DimensionMismatch(lazy"expect C to be a $(nA*nB)x$(nA*nB) matrix, got size $(nC)x$(nC)"))
     isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     @inbounds for i = 1:nA, j = 1:nB
         idx = (i-1)*nB+j
@@ -631,9 +671,9 @@ function kron(A::Diagonal, B::SymTridiagonal)
 end
 function kron(A::Diagonal, B::Tridiagonal)
     # `_droplast!` is only guaranteed to work with `Vector`
-    kd = _makevector(kron(diag(A), B.d))
-    kdl = _droplast!(_makevector(kron(diag(A), _pushzero(B.dl))))
-    kdu = _droplast!(_makevector(kron(diag(A), _pushzero(B.du))))
+    kd = convert(Vector, kron(diag(A), B.d))
+    kdl = _droplast!(convert(Vector, kron(diag(A), _pushzero(B.dl))))
+    kdu = _droplast!(convert(Vector, kron(diag(A), _pushzero(B.du))))
     Tridiagonal(kdl, kd, kdu)
 end
 
@@ -643,7 +683,7 @@ end
     (mB, nB) = size(B)
     (mC, nC) = size(C)
     @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
-        throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
+        throw(DimensionMismatch(lazy"expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
     isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     m = 1
     @inbounds for j = 1:nA
@@ -666,7 +706,7 @@ end
     (mB, nB) = size(B)
     (mC, nC) = size(C)
     @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
-        throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
+        throw(DimensionMismatch(lazy"expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
     isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     m = 1
     @inbounds for j = 1:nA
@@ -690,7 +730,7 @@ adjoint(D::Diagonal{<:Number}) = Diagonal(vec(adjoint(D.diag)))
 adjoint(D::Diagonal{<:Number,<:Base.ReshapedArray{<:Number,1,<:Adjoint}}) = Diagonal(adjoint(parent(D.diag)))
 adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
 permutedims(D::Diagonal) = D
-permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
+permutedims(D::Diagonal, perm) = (Base.checkdims_perm(axes(D), axes(D), perm); D)
 
 function diag(D::Diagonal{T}, k::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
@@ -698,10 +738,14 @@ function diag(D::Diagonal{T}, k::Integer=0) where T
     if k == 0
         return copyto!(similar(D.diag, length(D.diag)), D.diag)
     elseif -size(D,1) <= k <= size(D,1)
-        return fill!(similar(D.diag, size(D,1)-abs(k)), zero(T))
+        v = similar(D.diag, size(D,1)-abs(k))
+        for i in eachindex(v)
+            v[i] = D[BandIndex(k, i)]
+        end
+        return v
     else
-        throw(ArgumentError(string("requested diagonal, $k, must be at least $(-size(D, 1)) ",
-            "and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
+        throw(ArgumentError(LazyString(lazy"requested diagonal, $k, must be at least $(-size(D, 1)) ",
+            lazy"and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
     end
 end
 tr(D::Diagonal) = sum(tr, D.diag)
@@ -808,7 +852,7 @@ function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{
             evecs[p[i],i] = one(Td)
         end
     else
-        evecs = Matrix{Td}(I, size(D))
+        evecs = Diagonal(ones(Td, length(λ)))
     end
     Eigen(λ, evecs)
 end
@@ -879,7 +923,7 @@ dot(x::AbstractVector, D::Diagonal, y::AbstractVector) = _mapreduce_prod(dot, x,
 
 dot(A::Diagonal, B::Diagonal) = dot(A.diag, B.diag)
 function dot(D::Diagonal, B::AbstractMatrix)
-    size(D) == size(B) || throw(DimensionMismatch("Matrix sizes $(size(D)) and $(size(B)) differ"))
+    size(D) == size(B) || throw(DimensionMismatch(lazy"Matrix sizes $(size(D)) and $(size(B)) differ"))
     return dot(D.diag, view(B, diagind(B, IndexStyle(B))))
 end
 
@@ -887,7 +931,7 @@ dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
 
 function _mapreduce_prod(f, x, D::Diagonal, y)
     if !(length(x) == length(D.diag) == length(y))
-        throw(DimensionMismatch("x has length $(length(x)), D has size $(size(D)), and y has $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(length(x)), D has size $(size(D)), and y has $(length(y))"))
     end
     if isempty(x) && isempty(D) && isempty(y)
         return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
@@ -913,6 +957,36 @@ end
 @deprecate cholesky!(A::Diagonal, ::Val{false}; check::Bool = true) cholesky!(A::Diagonal, NoPivot(); check) false
 @deprecate cholesky(A::Diagonal, ::Val{false}; check::Bool = true) cholesky(A::Diagonal, NoPivot(); check) false
 
+function cholesky!(A::Diagonal, ::RowMaximum; tol=0.0, check=true)
+    if !ishermitian(A)
+        C = CholeskyPivoted(A, 'U', Vector{BlasInt}(), convert(BlasInt, 1),
+                            tol, convert(BlasInt, -1))
+        check && checkpositivedefinite(convert(BlasInt, -1))
+    else
+        d = A.diag
+        n = length(d)
+        info = 0
+        rank = n
+        p = sortperm(d, rev = true, by = real)
+        tol = tol < 0 ? n*eps(eltype(A))*real(d[p[1]]) : tol # LAPACK behavior
+        permute!(d, p)
+        @inbounds for i in eachindex(d)
+            di = d[i]
+            rootdi, j = _cholpivoted!(di, tol)
+            if j == 0
+                d[i] = rootdi
+            else
+                rank = i - 1
+                info = 1
+                break
+            end
+        end
+        C = CholeskyPivoted(A, 'U', p, convert(BlasInt, rank), tol, convert(BlasInt, info))
+        check && chkfullrank(C)
+    end
+    return C
+end
+
 inv(C::Cholesky{<:Any,<:Diagonal}) = Diagonal(map(inv∘abs2, C.factors.diag))
 
 cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
@@ -950,3 +1024,6 @@ end
 function Base.muladd(A::Diagonal, B::Diagonal, z::Diagonal)
     Diagonal(A.diag .* B.diag .+ z.diag)
 end
+
+uppertriangular(D::Diagonal) = D
+lowertriangular(D::Diagonal) = D
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
index d1d2a156b1ed4..e0124f2e9d870 100644
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ b/stdlib/LinearAlgebra/src/eigen.jl
@@ -658,14 +658,19 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{Eigen,Generaliz
     show(io, mime, F.vectors)
 end
 
-function Base.hash(F::Eigen, h::UInt)
-    return hash(F.values, hash(F.vectors, hash(Eigen, h)))
-end
-function Base.:(==)(A::Eigen, B::Eigen)
-    return A.values == B.values && A.vectors == B.vectors
-end
-function Base.isequal(A::Eigen, B::Eigen)
-    return isequal(A.values, B.values) && isequal(A.vectors, B.vectors)
+_equalcheck(f, Avalues, Avectors, Bvalues, Bvectors) = f(Avalues, Bvalues) && f(Avectors, Bvectors)
+for T in (Eigen, GeneralizedEigen)
+    @eval begin
+        function Base.hash(F::$T, h::UInt)
+            return hash(F.values, hash(F.vectors, hash($T, h)))
+        end
+        function Base.:(==)(A::$T, B::$T)
+            return _equalcheck(==, A..., B...)
+        end
+        function Base.isequal(A::$T, B::$T)
+            return _equalcheck(isequal, A..., B...)
+        end
+    end
 end
 
 # Conversion methods
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index d0a985f3b6e51..e5f23b4981616 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -49,6 +49,72 @@ end
     end
 end
 
+"""
+    @stable_muladdmul
+
+Replaces a function call, that has a `MulAddMul(alpha, beta)` constructor as an
+argument, with a branch over possible values of `isone(alpha)` and `iszero(beta)`
+and constructs `MulAddMul{isone(alpha), iszero(beta)}` explicitly in each branch.
+For example, 'f(x, y, MulAddMul(alpha, beta))` is transformed into
+```
+if isone(alpha)
+    if iszero(beta)
+        f(x, y, MulAddMul{true, true, typeof(alpha), typeof(beta)}(alpha, beta))
+    else
+        f(x, y, MulAddMul{true, false, typeof(alpha), typeof(beta)}(alpha, beta))
+    end
+else
+    if iszero(beta)
+        f(x, y, MulAddMul{false, true, typeof(alpha), typeof(beta)}(alpha, beta))
+    else
+        f(x, y, MulAddMul{false, false, typeof(alpha), typeof(beta)}(alpha, beta))
+    end
+end
+```
+This avoids the type instability of the `MulAddMul(alpha, beta)` constructor,
+which causes runtime dispatch in case alpha and zero are not constants.
+"""
+macro stable_muladdmul(expr)
+    expr.head == :call || throw(ArgumentError("Can only handle function calls."))
+    for (i, e) in enumerate(expr.args)
+        e isa Expr || continue
+        if e.head == :call && e.args[1] == :MulAddMul && length(e.args) == 3
+            local asym = e.args[2]
+            local bsym = e.args[3]
+
+            local e_sub11 = copy(expr)
+            e_sub11.args[i] = :(MulAddMul{true, true, typeof($asym), typeof($bsym)}($asym, $bsym))
+
+            local e_sub10 = copy(expr)
+            e_sub10.args[i] = :(MulAddMul{true, false, typeof($asym), typeof($bsym)}($asym, $bsym))
+
+            local e_sub01 = copy(expr)
+            e_sub01.args[i] = :(MulAddMul{false, true, typeof($asym), typeof($bsym)}($asym, $bsym))
+
+            local e_sub00 = copy(expr)
+            e_sub00.args[i] = :(MulAddMul{false, false, typeof($asym), typeof($bsym)}($asym, $bsym))
+
+            local e_out = quote
+                if isone($asym)
+                    if iszero($bsym)
+                        $e_sub11
+                    else
+                        $e_sub10
+                    end
+                else
+                    if iszero($bsym)
+                        $e_sub01
+                    else
+                        $e_sub00
+                    end
+                end
+            end
+            return esc(e_out)
+        end
+    end
+    throw(ArgumentError("No valid MulAddMul expression found."))
+end
+
 MulAddMul() = MulAddMul{true,true,Bool,Bool}(true, false)
 
 @inline (::MulAddMul{true})(x) = x
@@ -110,7 +176,7 @@ end
 
 function generic_mul!(C::AbstractArray, X::AbstractArray, s::Number, _add::MulAddMul)
     if length(C) != length(X)
-        throw(DimensionMismatch("first array has length $(length(C)) which does not match the length of the second, $(length(X))."))
+        throw(DimensionMismatch(lazy"first array has length $(length(C)) which does not match the length of the second, $(length(X))."))
     end
     for (IC, IX) in zip(eachindex(C), eachindex(X))
         @inbounds _modify!(_add, X[IX] * s, C, IC)
@@ -120,7 +186,7 @@ end
 
 function generic_mul!(C::AbstractArray, s::Number, X::AbstractArray, _add::MulAddMul)
     if length(C) != length(X)
-        throw(DimensionMismatch("first array has length $(length(C)) which does not
+        throw(DimensionMismatch(lazy"first array has length $(length(C)) which does not
 match the length of the second, $(length(X))."))
     end
     for (IC, IX) in zip(eachindex(C), eachindex(X))
@@ -598,8 +664,11 @@ julia> norm(hcat(v,v), Inf) == norm(vcat(v,v), Inf) != norm([v,v], Inf)
 true
 ```
 """
-function norm(itr, p::Real=2)
+Base.@constprop :aggressive function norm(itr, p::Real=2)
     isempty(itr) && return float(norm(zero(eltype(itr))))
+    v, s = iterate(itr)
+    !isnothing(s) && !ismissing(v) && v == itr && throw(ArgumentError(
+        "cannot evaluate norm recursively if the type of the initial element is identical to that of the container"))
     if p == 2
         return norm2(itr)
     elseif p == 1
@@ -740,7 +809,7 @@ julia> opnorm(A, 1)
 5.0
 ```
 """
-function opnorm(A::AbstractMatrix, p::Real=2)
+Base.@constprop :aggressive function opnorm(A::AbstractMatrix, p::Real=2)
     if p == 2
         return opnorm2(A)
     elseif p == 1
@@ -748,7 +817,7 @@ function opnorm(A::AbstractMatrix, p::Real=2)
     elseif p == Inf
         return opnormInf(A)
     else
-        throw(ArgumentError("invalid p-norm p=$p. Valid: 1, 2, Inf"))
+        throw(ArgumentError(lazy"invalid p-norm p=$p. Valid: 1, 2, Inf"))
     end
 end
 
@@ -886,7 +955,7 @@ dot(x::Number, y::Number) = conj(x) * y
 function dot(x::AbstractArray, y::AbstractArray)
     lx = length(x)
     if lx != length(y)
-        throw(DimensionMismatch("first array has length $(lx) which does not match the length of the second, $(length(y))."))
+        throw(DimensionMismatch(lazy"first array has length $(lx) which does not match the length of the second, $(length(y))."))
     end
     if lx == 0
         return dot(zero(eltype(x)), zero(eltype(y)))
@@ -1018,7 +1087,7 @@ julia> tr(A)
 5
 ```
 """
-function tr(A::AbstractMatrix)
+function tr(A)
     checksquare(A)
     sum(diag(A))
 end
@@ -1464,7 +1533,7 @@ julia> axpy!(2, x, y)
 function axpy!(α, x::AbstractArray, y::AbstractArray)
     n = length(x)
     if n != length(y)
-        throw(DimensionMismatch("x has length $n, but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $n, but y has length $(length(y))"))
     end
     iszero(α) && return y
     for (IY, IX) in zip(eachindex(y), eachindex(x))
@@ -1475,7 +1544,7 @@ end
 
 function axpy!(α, x::AbstractArray, rx::AbstractArray{<:Integer}, y::AbstractArray, ry::AbstractArray{<:Integer})
     if length(rx) != length(ry)
-        throw(DimensionMismatch("rx has length $(length(rx)), but ry has length $(length(ry))"))
+        throw(DimensionMismatch(lazy"rx has length $(length(rx)), but ry has length $(length(ry))"))
     elseif !checkindex(Bool, eachindex(IndexLinear(), x), rx)
         throw(BoundsError(x, rx))
     elseif !checkindex(Bool, eachindex(IndexLinear(), y), ry)
@@ -1509,7 +1578,7 @@ julia> axpby!(2, x, 2, y)
 """
 function axpby!(α, x::AbstractArray, β, y::AbstractArray)
     if length(x) != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
     iszero(α) && isone(β) && return y
     for (IX, IY) in zip(eachindex(x), eachindex(y))
@@ -1549,7 +1618,7 @@ function rotate!(x::AbstractVector, y::AbstractVector, c, s)
     require_one_based_indexing(x, y)
     n = length(x)
     if n != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
     @inbounds for i = 1:n
         xi, yi = x[i], y[i]
@@ -1572,7 +1641,7 @@ function reflect!(x::AbstractVector, y::AbstractVector, c, s)
     require_one_based_indexing(x, y)
     n = length(x)
     if n != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
     @inbounds for i = 1:n
         xi, yi = x[i], y[i]
@@ -1607,13 +1676,13 @@ end
 """
     reflectorApply!(x, τ, A)
 
-Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`.
+Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - conj(τ)*[1; x[2:end]]*[1; x[2:end]]')*A`.
 """
 @inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
     require_one_based_indexing(x)
     m, n = size(A, 1), size(A, 2)
     if length(x) != m
-        throw(DimensionMismatch("reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
+        throw(DimensionMismatch(lazy"reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
     end
     m == 0 && return A
     @inbounds for j = 1:n
@@ -1642,6 +1711,15 @@ julia> M = [1 0; 2 2]
 julia> det(M)
 2.0
 ```
+Note that, in general, `det` computes a floating-point approximation of the
+determinant, even for integer matrices, typically via Gaussian elimination.
+Julia includes an exact algorithm for integer determinants (the Bareiss algorithm),
+but only uses it by default for `BigInt` matrices (since determinants quickly
+overflow any fixed integer precision):
+```jldoctest
+julia> det(BigInt[1 0; 2 2]) # exact integer determinant
+2
+```
 """
 function det(A::AbstractMatrix{T}) where {T}
     if istriu(A) || istril(A)
@@ -1934,19 +2012,21 @@ function copytrito!(B::AbstractMatrix, A::AbstractMatrix, uplo::AbstractChar)
     BLAS.chkuplo(uplo)
     m,n = size(A)
     m1,n1 = size(B)
-    (m1 < m || n1 < n) && throw(DimensionMismatch("B of size ($m1,$n1) should have at least the same number of rows and columns than A of size ($m,$n)"))
+    A = Base.unalias(B, A)
     if uplo == 'U'
-        for j=1:n
-            for i=1:min(j,m)
-                @inbounds B[i,j] = A[i,j]
-            end
+        LAPACK.lacpy_size_check((m1, n1), (n < m ? n : m, n))
+        for j in 1:n, i in 1:min(j,m)
+            @inbounds B[i,j] = A[i,j]
         end
-    else  # uplo == 'L'
-        for j=1:n
-            for i=j:m
-                @inbounds B[i,j] = A[i,j]
-            end
+    else # uplo == 'L'
+        LAPACK.lacpy_size_check((m1, n1), (m, m < n ? m : n))
+        for j in 1:n, i in j:m
+            @inbounds B[i,j] = A[i,j]
         end
     end
     return B
 end
+# Forward LAPACK-compatible strided matrices to lacpy
+function copytrito!(B::StridedMatrixStride1{T}, A::StridedMatrixStride1{T}, uplo::AbstractChar) where {T<:BlasFloat}
+    LAPACK.lacpy!(B, A, uplo)
+end
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index 3be41baf24b24..bbaca3c878293 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -66,12 +66,15 @@ similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.da
 AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg{T}(H)
 AbstractMatrix{T}(H::UpperHessenberg{T}) where {T} = copy(H)
 
+Base.dataids(A::UpperHessenberg) = Base.dataids(parent(A))
+Base.unaliascopy(A::UpperHessenberg) = UpperHessenberg(Base.unaliascopy(parent(A)))
+
 copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data))
 real(H::UpperHessenberg{<:Real}) = H
 real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1))
 imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1))
 
-function istriu(A::UpperHessenberg, k::Integer=0)
+Base.@constprop :aggressive function istriu(A::UpperHessenberg, k::Integer=0)
     k <= -1 && return true
     return _istriu(A, k)
 end
@@ -84,13 +87,15 @@ end
 Base.isassigned(H::UpperHessenberg, i::Int, j::Int) =
     i <= j+1 ? isassigned(H.data, i, j) : true
 
-Base.@propagate_inbounds getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} =
+Base.@propagate_inbounds getindex(H::UpperHessenberg{T}, i::Int, j::Int) where {T} =
     i <= j+1 ? convert(T, H.data[i,j]) : zero(T)
 
+Base._reverse(A::UpperHessenberg, dims) = reverse!(Matrix(A); dims)
+
 Base.@propagate_inbounds function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer)
     if i > j+1
         x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)"))
+            lazy"($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)"))
     else
         A.data[i,j] = x
     end
@@ -180,7 +185,7 @@ end
 function ldiv!(F::UpperHessenberg, B::AbstractVecOrMat; shift::Number=false)
     checksquare(F)
     m = size(F,1)
-    m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m"))
+    m != size(B,1) && throw(DimensionMismatch(lazy"wrong right-hand-side # rows != $m"))
     require_one_based_indexing(B)
     n = size(B,2)
     H = F.data
@@ -230,7 +235,7 @@ end
 function rdiv!(B::AbstractMatrix, F::UpperHessenberg; shift::Number=false)
     checksquare(F)
     m = size(F,1)
-    m != size(B,2) && throw(DimensionMismatch("wrong right-hand-side # cols != $m"))
+    m != size(B,2) && throw(DimensionMismatch(lazy"wrong right-hand-side # cols != $m"))
     require_one_based_indexing(B)
     n = size(B,1)
     H = F.data
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index a85978435bf46..97dff0031329b 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -26,7 +26,7 @@ Handle only negative LAPACK error codes
 """
 function chkargsok(ret::BlasInt)
     if ret < 0
-        throw(ArgumentError("invalid argument #$(-ret) to LAPACK call"))
+        throw(ArgumentError(lazy"invalid argument #$(-ret) to LAPACK call"))
     end
 end
 
@@ -35,7 +35,7 @@ function chklapackerror(ret::BlasInt, f...)
     if ret == 0
         return
     elseif ret < 0
-        throw(ArgumentError("invalid argument #$(-ret) to LAPACK call"))
+        throw(ArgumentError(lazy"invalid argument #$(-ret) to LAPACK call"))
     else # ret > 0
         chklapackerror_positive(ret, f...)
     end
@@ -55,10 +55,27 @@ function chkposdef(ret::BlasInt)
     end
 end
 
+# Generic fallback function to assert that parameters are valid
+# In specific cases, the following functions may be more useful
+macro chkvalidparam(position::Int, param, validvalues)
+    :(chkvalidparam($position, $(string(param)), $(esc(param)), $validvalues))
+end
+function chkvalidparam(position::Int, var::String, val, validvals)
+    # mimic `repr` for chars without explicitly calling it
+    # This is because `repr` introduces dynamic dispatch
+    _repr(c::AbstractChar) = "'$c'"
+    _repr(c) = c
+    if val ∉ validvals
+        throw(ArgumentError(
+            lazy"argument #$position: $var must be one of $validvals, but $(_repr(val)) was passed"))
+    end
+    return val
+end
+
 "Check that {c}transpose is correctly specified"
 function chktrans(trans::AbstractChar)
     if !(trans == 'N' || trans == 'C' || trans == 'T')
-        throw(ArgumentError("trans argument must be 'N' (no transpose), 'T' (transpose), or 'C' (conjugate transpose), got $trans"))
+        throw(ArgumentError(lazy"trans argument must be 'N' (no transpose), 'T' (transpose), or 'C' (conjugate transpose), got '$trans'"))
     end
     trans
 end
@@ -66,7 +83,7 @@ end
 "Check that left/right hand side multiply is correctly specified"
 function chkside(side::AbstractChar)
     if !(side == 'L' || side == 'R')
-        throw(ArgumentError("side argument must be 'L' (left hand multiply) or 'R' (right hand multiply), got $side"))
+        throw(ArgumentError(lazy"side argument must be 'L' (left hand multiply) or 'R' (right hand multiply), got '$side'"))
     end
     side
 end
@@ -74,7 +91,7 @@ end
 "Check that unit diagonal flag is correctly specified"
 function chkdiag(diag::AbstractChar)
     if !(diag == 'U' || diag =='N')
-        throw(ArgumentError("diag argument must be 'U' (unit diagonal) or 'N' (non-unit diagonal), got $diag"))
+        throw(ArgumentError(lazy"diag argument must be 'U' (unit diagonal) or 'N' (non-unit diagonal), got '$diag'"))
     end
     diag
 end
@@ -93,6 +110,7 @@ end
 
 function chkuplofinite(A::AbstractMatrix, uplo::AbstractChar)
     require_one_based_indexing(A)
+    chkuplo(uplo)
     m, n = size(A)
     if uplo == 'U'
         @inbounds for j in 1:n, i in 1:j
@@ -164,7 +182,7 @@ for (gbtrf, gbtrs, elty) in
             info = Ref{BlasInt}()
             n    = size(AB,2)
             if m != n || m != size(B,1)
-                throw(DimensionMismatch("matrix AB has dimensions $(size(AB)), but right hand side matrix B has dimensions $(size(B))"))
+                throw(DimensionMismatch(lazy"matrix AB has dimensions $(size(AB)), but right hand side matrix B has dimensions $(size(B))"))
             end
             ccall((@blasfunc($gbtrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
@@ -213,7 +231,9 @@ for (gebal, gebak, elty, relty) in
         #     .. Array Arguments ..
         #      DOUBLE PRECISION   A( LDA, * ), SCALE( * )
         function gebal!(job::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
+            @chkvalidparam 1 job ('N', 'P', 'S', 'B')
             n = checksquare(A)
             chkfinite(A) # balancing routines don't support NaNs and Infs
             ihi = Ref{BlasInt}()
@@ -238,6 +258,7 @@ for (gebal, gebak, elty, relty) in
                         ilo::BlasInt, ihi::BlasInt, scale::AbstractVector{$relty},
                         V::AbstractMatrix{$elty})
             require_one_based_indexing(scale, V)
+            @chkvalidparam 1 job ('N', 'P', 'S', 'B')
             chkstride1(scale, V)
             chkside(side)
             chkfinite(V) # balancing routines don't support NaNs and Infs
@@ -338,7 +359,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             n     = BlasInt(size(A, 2))
             lda   = BlasInt(max(1,stride(A, 2)))
             if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), but needs length $(min(m,n))"))
             end
             lwork = BlasInt(-1)
             work  = Vector{$elty}(undef, 1)
@@ -369,7 +390,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             n     = BlasInt(size(A, 2))
             lda   = BlasInt(max(1,stride(A, 2)))
             if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), but needs length $(min(m,n))"))
             end
             lwork = BlasInt(-1)
             work  = Vector{$elty}(undef, 1)
@@ -399,10 +420,10 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             chkstride1(A,jpvt,tau)
             m,n = size(A)
             if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), but needs length $(min(m,n))"))
             end
             if length(jpvt) != n
-                throw(DimensionMismatch("jpvt has length $(length(jpvt)), but needs length $n"))
+                throw(DimensionMismatch(lazy"jpvt has length $(length(jpvt)), but needs length $n"))
             end
             lda = stride(A,2)
             if lda == 0
@@ -449,11 +470,11 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             minmn = min(m, n)
             nb = size(T, 1)
             if nb > minmn
-                throw(ArgumentError("block size $nb > $minmn too large"))
+                throw(ArgumentError(lazy"block size $nb > $minmn too large"))
             end
             lda = max(1, stride(A,2))
             work = Vector{$elty}(undef, nb*n)
-            if n > 0
+            if minmn > 0
                 info = Ref{BlasInt}()
                 ccall((@blasfunc($geqrt), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
@@ -474,12 +495,12 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             m, n = size(A)
             p, q = size(T)
             if m < n
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but should have more rows than columns"))
+                throw(DimensionMismatch(lazy"input matrix A has dimensions ($m,$n), but should have more rows than columns"))
             end
             if p != n || q != n
-                throw(DimensionMismatch("block reflector T has dimensions ($p,$q), but should have dimensions ($n,$n)"))
+                throw(DimensionMismatch(lazy"block reflector T has dimensions ($p,$q), but should have dimensions ($n,$n)"))
             end
-            if n > 0
+            if n > 0    # this implies `m > 0` because of `m >= n`
                 info = Ref{BlasInt}()
                 ccall((@blasfunc($geqrt3), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
@@ -502,7 +523,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             chkstride1(A,tau)
             m, n  = size(A)
             if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), but needs length $(min(m,n))"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -531,7 +552,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             chkstride1(A,tau)
             m, n  = size(A)
             if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), but needs length $(min(m,n))"))
             end
             lwork = BlasInt(-1)
             work  = Vector{$elty}(undef, 1)
@@ -624,11 +645,13 @@ does not equal zero then the `j`th column of `A` is permuted to the front of
 geqp3!(A::AbstractMatrix, jpvt::AbstractVector{BlasInt}, tau::AbstractVector)
 
 function geqp3!(A::AbstractMatrix{<:BlasFloat}, jpvt::AbstractVector{BlasInt})
+    require_one_based_indexing(A, jpvt)
     m, n = size(A)
     geqp3!(A, jpvt, similar(A, min(m, n)))
 end
 
 function geqp3!(A::AbstractMatrix{<:BlasFloat})
+    require_one_based_indexing(A)
     m, n = size(A)
     geqp3!(A, zeros(BlasInt, n), similar(A, min(m, n)))
 end
@@ -777,6 +800,7 @@ for (larfg, elty) in
         #        .. Array Arguments ..
         #        DOUBLE PRECISION   x( * )
         function larfg!(x::AbstractVector{$elty})
+            require_one_based_indexing(x)
             N    = BlasInt(length(x))
             α    = Ref{$elty}(x[1])
             incx = BlasInt(1)
@@ -805,6 +829,7 @@ for (larf, elty) in
         #        DOUBLE PRECISION   c( ldc, * ), v( * ), work( * )
         function larf!(side::AbstractChar, v::AbstractVector{$elty},
                        τ::$elty, C::AbstractMatrix{$elty}, work::AbstractVector{$elty})
+            require_one_based_indexing(v, C, work)
             m, n = size(C)
             chkside(side)
             ldc = max(1, stride(C, 2))
@@ -820,6 +845,7 @@ for (larf, elty) in
 
         function larf!(side::AbstractChar, v::AbstractVector{$elty},
                        τ::$elty, C::AbstractMatrix{$elty})
+            require_one_based_indexing(v, C)
             m, n = size(C)
             chkside(side)
             lwork = side == 'L' ? n : m
@@ -847,7 +873,7 @@ for (tzrzf, ormrz, elty) in
             chkstride1(A)
             m, n = size(A)
             if n < m
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
+                throw(DimensionMismatch(lazy"input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
             end
             lda = max(1, stride(A,2))
             tau = similar(A, $elty, m)
@@ -952,7 +978,7 @@ for (gels, gesv, getrs, getri, elty) in
             btrn  = trans == 'T'
             m, n  = size(A)
             if size(B,1) != (btrn ? n : m)
-                throw(DimensionMismatch("matrix A has dimensions ($m,$n), transposed: $btrn, but leading dimension of B is $(size(B,1))"))
+                throw(DimensionMismatch(lazy"matrix A has dimensions ($m,$n), transposed: $btrn, but leading dimension of B is $(size(B,1))"))
             end
             info  = Ref{BlasInt}()
             work  = Vector{$elty}(undef, 1)
@@ -995,7 +1021,7 @@ for (gels, gesv, getrs, getri, elty) in
             chkstride1(A, B)
             n = checksquare(A)
             if size(B,1) != n
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)), but needs $n"))
             end
             ipiv = similar(A, BlasInt, n)
             info = Ref{BlasInt}()
@@ -1020,10 +1046,10 @@ for (gels, gesv, getrs, getri, elty) in
             chkstride1(A, B, ipiv)
             n = checksquare(A)
             if n != size(B, 1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)), but needs $n"))
             end
             if n != length(ipiv)
-                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+                throw(DimensionMismatch(lazy"ipiv has length $(length(ipiv)), but needs to be $n"))
             end
             nrhs = size(B, 2)
             info = Ref{BlasInt}()
@@ -1046,7 +1072,7 @@ for (gels, gesv, getrs, getri, elty) in
             chkstride1(A, ipiv)
             n = checksquare(A)
             if n != length(ipiv)
-                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs $n"))
+                throw(DimensionMismatch(lazy"ipiv has length $(length(ipiv)), but needs $n"))
             end
             lda = max(1,stride(A, 2))
             lwork = BlasInt(-1)
@@ -1134,6 +1160,7 @@ for (gesvx, elty) in
                         AF::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, equed::AbstractChar,
                         R::AbstractVector{$elty}, C::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
             require_one_based_indexing(A, AF, ipiv, R, C, B)
+            @chkvalidparam 1 fact ('F', 'N', 'E')
             chktrans(trans)
             chkstride1(ipiv, R, C, B)
             n    = checksquare(A)
@@ -1168,6 +1195,7 @@ for (gesvx, elty) in
         end
 
         function gesvx!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
+            require_one_based_indexing(A, B)
             n = size(A,1)
             X, equed, R, C, B, rcond, ferr, berr, rpgf =
                 gesvx!('N', 'N', A,
@@ -1204,6 +1232,7 @@ for (gesvx, elty, relty) in
                         AF::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, equed::AbstractChar,
                         R::AbstractVector{$relty}, C::AbstractVector{$relty}, B::AbstractVecOrMat{$elty})
             require_one_based_indexing(A, AF, ipiv, R, C, B)
+            @chkvalidparam 1 fact ('F', 'N', 'E')
             chktrans(trans)
             chkstride1(A, AF, ipiv, R, C, B)
             n   = checksquare(A)
@@ -1239,6 +1268,7 @@ for (gesvx, elty, relty) in
 
         #Wrapper for the no-equilibration, no-transpose calculation
         function gesvx!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
+            require_one_based_indexing(A, B)
             n = size(A,1)
             X, equed, R, C, B, rcond, ferr, berr, rpgf =
                 gesvx!('N', 'N', A,
@@ -1305,7 +1335,7 @@ for (gelsd, gelsy, elty) in
             chkstride1(A, B)
             m, n  = size(A)
             if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)) but needs $m"))
             end
             newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
             s     = similar(A, $elty, min(m, n))
@@ -1350,7 +1380,7 @@ for (gelsd, gelsy, elty) in
             n = size(A, 2)
             nrhs = size(B, 2)
             if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)) but needs $m"))
             end
             newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
             lda = max(1, stride(A,2))
@@ -1400,7 +1430,7 @@ for (gelsd, gelsy, elty, relty) in
             chkstride1(A, B)
             m, n  = size(A)
             if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)) but needs $m"))
             end
             newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
             s     = similar(A, $relty, min(m, n))
@@ -1447,7 +1477,7 @@ for (gelsd, gelsy, elty, relty) in
             m, n = size(A)
             nrhs = size(B, 2)
             if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)) but needs $m"))
             end
             newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
             lda = max(1, m)
@@ -1521,13 +1551,13 @@ for (gglse, elty) in ((:dgglse_, :Float64),
             m, n = size(A)
             p = size(B, 1)
             if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)), needs $n"))
+                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)), needs $n"))
             end
             if length(c) != m
-                throw(DimensionMismatch("c has length $(length(c)), needs $m"))
+                throw(DimensionMismatch(lazy"c has length $(length(c)), needs $m"))
             end
             if length(d) != p
-                throw(DimensionMismatch("d has length $(length(d)), needs $p"))
+                throw(DimensionMismatch(lazy"d has length $(length(d)), needs $p"))
             end
             X = zeros($elty, n)
             info  = Ref{BlasInt}()
@@ -1577,8 +1607,11 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
         #       DOUBLE PRECISION   A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
         #      $                   WI( * ), WORK( * ), WR( * )
         function geev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
+            @chkvalidparam 1 jobvl ('N', 'V')
+            @chkvalidparam 2 jobvr ('N', 'V')
             chkfinite(A) # balancing routines don't support NaNs and Infs
             lvecs = jobvl == 'V'
             rvecs = jobvr == 'V'
@@ -1635,6 +1668,7 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
         function gesdd!(job::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
             chkstride1(A)
+            @chkvalidparam 1 job ('A', 'S', 'O', 'N')
             m, n   = size(A)
             minmn  = min(m, n)
             if job == 'A'
@@ -1716,6 +1750,9 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
         function gesvd!(jobu::AbstractChar, jobvt::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
             chkstride1(A)
+            @chkvalidparam 1 jobu ('A', 'S', 'O', 'N')
+            @chkvalidparam 2 jobvt ('A', 'S', 'O', 'N')
+            (jobu == jobvt == 'O') && throw(ArgumentError("jobu and jobvt cannot both be O"))
             m, n   = size(A)
             minmn  = min(m, n)
             S      = similar(A, $relty, minmn)
@@ -1785,9 +1822,12 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
         function ggsvd!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
             require_one_based_indexing(A, B)
             chkstride1(A, B)
+            @chkvalidparam 1 jobu ('U', 'N')
+            @chkvalidparam 2 jobv ('V', 'N')
+            @chkvalidparam 3 jobq ('Q', 'N')
             m, n = size(A)
             if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs $n"))
             end
             p = size(B, 1)
             k = Vector{BlasInt}(undef, 1)
@@ -1912,9 +1952,12 @@ for (f, elty) in ((:dggsvd3_, :Float64),
         function ggsvd3!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
             require_one_based_indexing(A, B)
             chkstride1(A, B)
+            @chkvalidparam 1 jobu ('U', 'N')
+            @chkvalidparam 2 jobv ('V', 'N')
+            @chkvalidparam 3 jobq ('Q', 'N')
             m, n = size(A)
             if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs $n"))
             end
             p = size(B, 1)
             k = Ref{BlasInt}()
@@ -1971,9 +2014,12 @@ for (f, elty, relty) in ((:zggsvd3_, :ComplexF64, :Float64),
         function ggsvd3!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
             require_one_based_indexing(A, B)
             chkstride1(A, B)
+            @chkvalidparam 1 jobu ('U', 'N')
+            @chkvalidparam 2 jobv ('V', 'N')
+            @chkvalidparam 3 jobq ('Q', 'N')
             m, n = size(A)
             if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs $n"))
             end
             p = size(B, 1)
             k = Vector{BlasInt}(undef, 1)
@@ -2057,31 +2103,34 @@ for (geevx, ggev, ggev3, elty) in
         #      $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ),
         #      $                   WI( * ), WORK( * ), WR( * )
         function geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix{$elty})
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lda = max(1,stride(A,2))
-            wr = similar(A, $elty, n)
-            wi = similar(A, $elty, n)
-            if balanc ∉ ['N', 'P', 'S', 'B']
-                throw(ArgumentError("balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
+            require_one_based_indexing(A)
+            @chkvalidparam 1 balanc ('N', 'P', 'S', 'B')
+            @chkvalidparam 4 sense ('N', 'E', 'V', 'B')
+            if sense ∈ ('E', 'B') && !(jobvl == jobvr == 'V')
+                throw(ArgumentError(lazy"sense = '$sense' requires jobvl = 'V' and jobvr = 'V'"))
             end
+            n = checksquare(A)
             ldvl = 0
             if jobvl == 'V'
                 ldvl = n
             elseif jobvl == 'N'
                 ldvl = 0
             else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+                throw(ArgumentError(lazy"jobvl must be 'V' or 'N', but $jobvl was passed"))
             end
-            VL = similar(A, $elty, ldvl, n)
             ldvr = 0
             if jobvr == 'V'
                 ldvr = n
             elseif jobvr == 'N'
                 ldvr = 0
             else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+                throw(ArgumentError(lazy"jobvr must be 'V' or 'N', but $jobvr was passed"))
             end
+            chkfinite(A) # balancing routines don't support NaNs and Infs
+            lda = max(1,stride(A,2))
+            wr = similar(A, $elty, n)
+            wi = similar(A, $elty, n)
+            VL = similar(A, $elty, ldvl, n)
             VR = similar(A, $elty, ldvr, n)
             ilo = Ref{BlasInt}()
             ihi = Ref{BlasInt}()
@@ -2097,7 +2146,7 @@ for (geevx, ggev, ggev3, elty) in
             elseif sense == 'V' || sense == 'B'
                 iworksize = 2*n - 2
             else
-                throw(ArgumentError("sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
+                throw(ArgumentError(lazy"sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
             end
             iwork = Vector{BlasInt}(undef, iworksize)
             info = Ref{BlasInt}()
@@ -2141,30 +2190,30 @@ for (geevx, ggev, ggev3, elty) in
             chkstride1(A,B)
             n, m = checksquare(A,B)
             if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
             end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
             ldvl = 0
             if jobvl == 'V'
                 ldvl = n
             elseif jobvl == 'N'
                 ldvl = 1
             else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+                throw(ArgumentError(lazy"jobvl must be 'V' or 'N', but $jobvl was passed"))
             end
-            vl = similar(A, $elty, ldvl, n)
             ldvr = 0
             if jobvr == 'V'
                 ldvr = n
             elseif jobvr == 'N'
                 ldvr = 1
             else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+                throw(ArgumentError(lazy"jobvr must be 'V' or 'N', but $jobvr was passed"))
             end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alphar = similar(A, $elty, n)
+            alphai = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            vl = similar(A, $elty, ldvl, n)
             vr = similar(A, $elty, ldvr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2205,30 +2254,30 @@ for (geevx, ggev, ggev3, elty) in
             chkstride1(A,B)
             n, m = checksquare(A,B)
             if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
             end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
             ldvl = 0
             if jobvl == 'V'
                 ldvl = n
             elseif jobvl == 'N'
                 ldvl = 1
             else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+                throw(ArgumentError(lazy"jobvl must be 'V' or 'N', but $jobvl was passed"))
             end
-            vl = similar(A, $elty, ldvl, n)
             ldvr = 0
             if jobvr == 'V'
                 ldvr = n
             elseif jobvr == 'N'
                 ldvr = 1
             else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+                throw(ArgumentError(lazy"jobvr must be 'V' or 'N', but $jobvr was passed"))
             end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alphar = similar(A, $elty, n)
+            alphai = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            vl = similar(A, $elty, ldvl, n)
             vr = similar(A, $elty, ldvr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2275,33 +2324,37 @@ for (geevx, ggev, ggev3, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
         #      $                   W( * ), WORK( * )
         function geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix{$elty})
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lda = max(1,stride(A,2))
-            w = similar(A, $elty, n)
-            if balanc ∉ ['N', 'P', 'S', 'B']
-                throw(ArgumentError("balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
+            require_one_based_indexing(A)
+            if balanc ∉ ('N', 'P', 'S', 'B')
+                throw(ArgumentError(lazy"balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
+            end
+            if sense ∉ ('N','E','V','B')
+                throw(ArgumentError(lazy"sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
             end
+            if sense ∈ ('E', 'B') && !(jobvl == jobvr == 'V')
+                throw(ArgumentError(lazy"sense = '$sense' requires jobvl = 'V' and jobvr = 'V'"))
+            end
+            n = checksquare(A)
             ldvl = 0
             if jobvl == 'V'
                 ldvl = n
             elseif jobvl == 'N'
                 ldvl = 0
             else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+                throw(ArgumentError(lazy"jobvl must be 'V' or 'N', but $jobvl was passed"))
             end
-            VL = similar(A, $elty, ldvl, n)
             ldvr = 0
             if jobvr == 'V'
                 ldvr = n
             elseif jobvr == 'N'
                 ldvr = 0
             else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            if sense ∉ ['N','E','V','B']
-                throw(ArgumentError("sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
+                throw(ArgumentError(lazy"jobvr must be 'V' or 'N', but $jobvr was passed"))
             end
+            chkfinite(A) # balancing routines don't support NaNs and Infs
+            lda = max(1,stride(A,2))
+            w = similar(A, $elty, n)
+            VL = similar(A, $elty, ldvl, n)
             VR = similar(A, $elty, ldvr, n)
             ilo = Ref{BlasInt}()
             ihi = Ref{BlasInt}()
@@ -2352,29 +2405,29 @@ for (geevx, ggev, ggev3, elty, relty) in
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
             end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
             ldvl = 0
             if jobvl == 'V'
                 ldvl = n
             elseif jobvl == 'N'
                 ldvl = 1
             else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+                throw(ArgumentError(lazy"jobvl must be 'V' or 'N', but $jobvl was passed"))
             end
-            vl = similar(A, $elty, ldvl, n)
             ldvr = 0
             if jobvr == 'V'
                 ldvr = n
             elseif jobvr == 'N'
                 ldvr = 1
             else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+                throw(ArgumentError(lazy"jobvr must be 'V' or 'N', but $jobvr was passed"))
             end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alpha = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            vl = similar(A, $elty, ldvl, n)
             vr = similar(A, $elty, ldvr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2417,29 +2470,29 @@ for (geevx, ggev, ggev3, elty, relty) in
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
             end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
             ldvl = 0
             if jobvl == 'V'
                 ldvl = n
             elseif jobvl == 'N'
                 ldvl = 1
             else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+                throw(ArgumentError(lazy"jobvl must be 'V' or 'N', but $jobvl was passed"))
             end
-            vl = similar(A, $elty, ldvl, n)
             ldvr = 0
             if jobvr == 'V'
                 ldvr = n
             elseif jobvr == 'N'
                 ldvr = 1
             else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+                throw(ArgumentError(lazy"jobvr must be 'V' or 'N', but $jobvr was passed"))
             end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alpha = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            vl = similar(A, $elty, ldvl, n)
             vr = similar(A, $elty, ldvr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2524,9 +2577,10 @@ for (laic1, elty) in
         function laic1!(job::Integer, x::AbstractVector{$elty},
                         sest::$elty, w::AbstractVector{$elty}, gamma::$elty)
             require_one_based_indexing(x, w)
+            @chkvalidparam 1 job (1,2)
             j = length(x)
             if j != length(w)
-                throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
+                throw(DimensionMismatch(lazy"vectors must have same length, but length of x is $j and length of w is $(length(w))"))
             end
             sestpr = Ref{$elty}()
             s = Ref{$elty}()
@@ -2558,9 +2612,10 @@ for (laic1, elty, relty) in
         function laic1!(job::Integer, x::AbstractVector{$elty},
                         sest::$relty, w::AbstractVector{$elty}, gamma::$elty)
             require_one_based_indexing(x, w)
+            @chkvalidparam 1 job (1,2)
             j = length(x)
             if j != length(w)
-                throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
+                throw(DimensionMismatch(lazy"vectors must have same length, but length of x is $j and length of w is $(length(w))"))
             end
             sestpr = Ref{$relty}()
             s = Ref{$elty}()
@@ -2595,13 +2650,13 @@ for (gtsv, gttrf, gttrs, elty) in
             chkstride1(B, dl, d, du)
             n = length(d)
             if !(n >= length(dl) >= n - 1)
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $n or $(n - 1)"))
+                throw(DimensionMismatch(lazy"subdiagonal has length $(length(dl)), but should be $n or $(n - 1)"))
             end
             if !(n >= length(du) >= n - 1)
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $n or $(n - 1)"))
+                throw(DimensionMismatch(lazy"superdiagonal has length $(length(du)), but should be $n or $(n - 1)"))
             end
             if n != size(B,1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)), but should have $n"))
             end
             if n == 0
                 return B # Early exit if possible
@@ -2626,10 +2681,10 @@ for (gtsv, gttrf, gttrs, elty) in
             chkstride1(dl,d,du)
             n    = length(d)
             if length(dl) != n - 1
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $(n - 1)"))
+                throw(DimensionMismatch(lazy"subdiagonal has length $(length(dl)), but should be $(n - 1)"))
             end
             if length(du) != n - 1
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $(n - 1)"))
+                throw(DimensionMismatch(lazy"superdiagonal has length $(length(du)), but should be $(n - 1)"))
             end
             du2  = similar(d, $elty, n-2)
             ipiv = similar(d, BlasInt, n)
@@ -2657,13 +2712,13 @@ for (gtsv, gttrf, gttrs, elty) in
             chkstride1(B, ipiv, dl, d, du, du2)
             n = length(d)
             if length(dl) != n - 1
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $(n - 1)"))
+                throw(DimensionMismatch(lazy"subdiagonal has length $(length(dl)), but should be $(n - 1)"))
             end
             if length(du) != n - 1
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $(n - 1)"))
+                throw(DimensionMismatch(lazy"superdiagonal has length $(length(du)), but should be $(n - 1)"))
             end
             if n != size(B,1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
+                throw(DimensionMismatch(lazy"B has leading dimension $(size(B,1)), but should have $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($gttrs), libblastrampoline), Cvoid,
@@ -2727,7 +2782,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             n = size(A, 2)
             m = min(n, size(A, 1))
             if k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= m = $m"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2761,7 +2816,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             m = size(A, 1)
             n = min(m, size(A, 2))
             if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2797,7 +2852,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             m = size(A, 1)
             n = min(m, size(A, 2))
             if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2832,10 +2887,10 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             chkstride1(A,tau)
             m, n = size(A)
             if n < m
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
+                throw(DimensionMismatch(lazy"input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
             end
             if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2873,16 +2928,16 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             nA = size(A, 2)
             k   = length(tau)
             if side == 'L' && m != nA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
+                throw(DimensionMismatch(lazy"for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
             end
             if side == 'R' && n != nA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $n, must equal the second dimension of A, $nA"))
+                throw(DimensionMismatch(lazy"for a right-sided multiplication, the second dimension of C, $n, must equal the second dimension of A, $nA"))
             end
             if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= m = $m"))
             end
             if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2920,16 +2975,16 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             mA  = size(A, 1)
             k   = length(tau)
             if side == 'L' && m != mA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
+                throw(DimensionMismatch(lazy"for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
             end
             if side == 'R' && n != mA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
+                throw(DimensionMismatch(lazy"for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
             end
             if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= m = $m"))
             end
             if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -2970,16 +3025,16 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             mA  = size(A, 1)
             k   = length(tau)
             if side == 'L' && m != mA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
+                throw(DimensionMismatch(lazy"for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
             end
             if side == 'R' && n != mA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
+                throw(DimensionMismatch(lazy"for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
             end
             if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= m = $m"))
             end
             if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -3020,16 +3075,16 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             nA  = size(A, 2)
             k   = length(tau)
             if side == 'L' && m != nA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
+                throw(DimensionMismatch(lazy"for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
             end
             if side == 'R' && n != nA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $nA"))
+                throw(DimensionMismatch(lazy"for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $nA"))
             end
             if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= m = $m"))
             end
             if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
+                throw(DimensionMismatch(lazy"invalid number of reflectors: k = $k should be <= n = $n"))
             end
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -3062,31 +3117,31 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             end
             if side == 'L'
                 if !(0 <= k <= m)
-                    throw(DimensionMismatch("wrong value for k = $k: must be between 0 and $m"))
+                    throw(DimensionMismatch(lazy"wrong value for k = $k: must be between 0 and $m"))
                 end
                 if m != size(V,1)
-                    throw(DimensionMismatch("first dimensions of C, $m, and V, $(size(V,1)) must match"))
+                    throw(DimensionMismatch(lazy"first dimensions of C, $m, and V, $(size(V,1)) must match"))
                 end
                 ldv = stride(V,2)
                 if ldv < max(1, m)
-                    throw(DimensionMismatch("Q and C don't fit! The stride of V, $ldv, is too small"))
+                    throw(DimensionMismatch(lazy"Q and C don't fit! The stride of V, $ldv, is too small"))
                 end
                 wss = n*k
             elseif side == 'R'
                 if !(0 <= k <= n)
-                    throw(DimensionMismatch("wrong value for k = $k: must be between 0 and $n"))
+                    throw(DimensionMismatch(lazy"wrong value for k = $k: must be between 0 and $n"))
                 end
                 if n != size(V,1)
-                    throw(DimensionMismatch("second dimension of C, $n, and first dimension of V, $(size(V,1)) must match"))
+                    throw(DimensionMismatch(lazy"second dimension of C, $n, and first dimension of V, $(size(V,1)) must match"))
                 end
                 ldv = stride(V,2)
                 if ldv < max(1, n)
-                    throw(DimensionMismatch("Q and C don't fit! The stride of V, $ldv, is too small"))
+                    throw(DimensionMismatch(lazy"Q and C don't fit! The stride of V, $ldv, is too small"))
                 end
                 wss = m*k
             end
             if !(1 <= nb <= k)
-                throw(DimensionMismatch("wrong value for nb = $nb, which must be between 1 and $k"))
+                throw(DimensionMismatch(lazy"wrong value for nb = $nb, which must be between 1 and $k"))
             end
             ldc = stride(C, 2)
             work = Vector{$elty}(undef, wss)
@@ -3207,7 +3262,7 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
             n = checksquare(A)
             chkuplo(uplo)
             if size(B,1) != n
-                throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
+                throw(DimensionMismatch(lazy"first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($posv), libblastrampoline), Cvoid,
@@ -3277,7 +3332,7 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
             chkuplo(uplo)
             nrhs = size(B,2)
             if size(B,1) != n
-                throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
+                throw(DimensionMismatch(lazy"first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
             end
             lda = max(1,stride(A,2))
             if lda == 0 || nrhs == 0
@@ -3303,6 +3358,7 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
         #       DOUBLE PRECISION   A( LDA, * ), WORK( 2*N )
         #       INTEGER            PIV( N )
         function pstrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, tol::Real)
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -3393,10 +3449,10 @@ for (ptsv, pttrf, elty, relty) in
             chkstride1(B, D, E)
             n = length(D)
             if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"E has length $(length(E)), but needs $(n - 1)"))
             end
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($ptsv), libblastrampoline), Cvoid,
@@ -3417,7 +3473,7 @@ for (ptsv, pttrf, elty, relty) in
             chkstride1(D, E)
             n = length(D)
             if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"E has length $(length(E)), but needs $(n - 1)"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($pttrf), libblastrampoline), Cvoid,
@@ -3461,10 +3517,10 @@ for (pttrs, elty, relty) in
             chkstride1(B, D, E)
             n = length(D)
             if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"E has length $(length(E)), but needs $(n - 1)"))
             end
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
@@ -3495,10 +3551,10 @@ for (pttrs, elty, relty) in
             chkuplo(uplo)
             n = length(D)
             if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"E has length $(length(E)), but needs $(n - 1)"))
             end
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
@@ -3534,6 +3590,7 @@ for (trtri, trtrs, elty) in
         #     .. Array Arguments ..
         #      DOUBLE PRECISION   A( LDA, * )
         function trtri!(uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -3563,7 +3620,7 @@ for (trtri, trtrs, elty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($trtrs), libblastrampoline), Cvoid,
@@ -3616,10 +3673,12 @@ for (trcon, trevc, trrfs, elty) in
         # INTEGER            IWORK( * )
         # DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             chkdiag(diag)
             n = checksquare(A)
             chkuplo(uplo)
+            @chkvalidparam 1 norm ('O', '1', 'I')
             rcond = Ref{$elty}()
             work  = Vector{$elty}(undef, 3n)
             iwork = Vector{BlasInt}(undef, n)
@@ -3646,14 +3705,15 @@ for (trcon, trevc, trrfs, elty) in
         # LOGICAL            SELECT( * )
         # DOUBLE PRECISION   T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
         #$                   WORK( * )
-        function trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty},
+        Base.@constprop :aggressive function trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty},
                         VL::AbstractMatrix{$elty} = similar(T),
                         VR::AbstractMatrix{$elty} = similar(T))
             require_one_based_indexing(select, T, VL, VR)
             # Extract
-            if side ∉ ['L','R','B']
-                throw(ArgumentError("side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
+            if side ∉ ('L','R','B')
+                throw(ArgumentError(lazy"side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
             end
+            @chkvalidparam 2 howmny ('A', 'B', 'S')
             n, mm = checksquare(T), size(VL, 2)
             ldt, ldvl, ldvr = stride(T, 2), stride(VL, 2), stride(VR, 2)
 
@@ -3717,7 +3777,7 @@ for (trcon, trevc, trrfs, elty) in
             n = size(A,2)
             nrhs = size(B,2)
             if nrhs != size(X,2)
-                throw(DimensionMismatch("second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
+                throw(DimensionMismatch(lazy"second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
             end
             work = Vector{$elty}(undef, 3n)
             iwork = Vector{BlasInt}(undef, n)
@@ -3749,8 +3809,10 @@ for (trcon, trevc, trrfs, elty, relty) in
         # DOUBLE PRECISION   RWORK( * )
         # COMPLEX*16         A( LDA, * ), WORK( * )
         function trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
+            @chkvalidparam 1 norm ('O', '1', 'I')
             chkuplo(uplo)
             chkdiag(diag)
             rcond = Ref{$relty}(1)
@@ -3790,9 +3852,10 @@ for (trcon, trevc, trrfs, elty, relty) in
 
             # Check
             chkstride1(T, select, VL, VR)
-            if side ∉ ['L','R','B']
-                throw(ArgumentError("side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
+            if side ∉ ('L','R','B')
+                throw(ArgumentError(lazy"side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
             end
+            @chkvalidparam 2 howmny ('A', 'B', 'S')
 
             # Allocate
             m = Ref{BlasInt}()
@@ -3851,7 +3914,7 @@ for (trcon, trevc, trrfs, elty, relty) in
             n = size(A,2)
             nrhs = size(B,2)
             if nrhs != size(X,2)
-                throw(DimensionMismatch("second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
+                throw(DimensionMismatch(lazy"second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
             end
             work  = Vector{$elty}(undef, 2n)
             rwork = Vector{$relty}(undef, n)
@@ -3919,10 +3982,11 @@ for (stev, stebz, stegr, stein, elty) in
     @eval begin
         function stev!(job::AbstractChar, dv::AbstractVector{$elty}, ev::AbstractVector{$elty})
             require_one_based_indexing(dv, ev)
+            @chkvalidparam 1 job ('N', 'V')
             chkstride1(dv, ev)
             n = length(dv)
             if length(ev) != n - 1 && length(ev) != n
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than or equal to dv's length, $n)"))
+                throw(DimensionMismatch(lazy"ev has length $(length(ev)) but needs one less than or equal to dv's length, $n)"))
             end
             Zmat = similar(dv, $elty, (n, job != 'N' ? n : 0))
             work = Vector{$elty}(undef, max(1, 2n-2))
@@ -3941,10 +4005,12 @@ for (stev, stebz, stegr, stein, elty) in
         #*  eigenvalues.
         function stebz!(range::AbstractChar, order::AbstractChar, vl::$elty, vu::$elty, il::Integer, iu::Integer, abstol::Real, dv::AbstractVector{$elty}, ev::AbstractVector{$elty})
             require_one_based_indexing(dv, ev)
+            @chkvalidparam 1 range ('A', 'V', 'I')
+            @chkvalidparam 2 order ('B', 'E')
             chkstride1(dv, ev)
             n = length(dv)
             if length(ev) != n - 1
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than dv's length, $n)"))
+                throw(DimensionMismatch(lazy"ev has length $(length(ev)) but needs one less than dv's length, $n)"))
             end
             m = Ref{BlasInt}()
             nsplit = Vector{BlasInt}(undef, 1)
@@ -3972,6 +4038,8 @@ for (stev, stebz, stegr, stein, elty) in
 
         function stegr!(jobz::AbstractChar, range::AbstractChar, dv::AbstractVector{$elty}, ev::AbstractVector{$elty}, vl::Real, vu::Real, il::Integer, iu::Integer)
             require_one_based_indexing(dv, ev)
+            @chkvalidparam 1 jobz ('N', 'V')
+            @chkvalidparam 2 range ('A', 'V', 'I')
             chkstride1(dv, ev)
             n = length(dv)
             ne = length(ev)
@@ -3981,7 +4049,7 @@ for (stev, stebz, stegr, stein, elty) in
                 eev = copy(ev)
                 eev[n] = zero($elty)
             else
-                throw(DimensionMismatch("ev has length $ne but needs one less than or equal to dv's length, $n)"))
+                throw(DimensionMismatch(lazy"ev has length $ne but needs one less than or equal to dv's length, $n)"))
             end
 
             abstol = Vector{$elty}(undef, 1)
@@ -4031,12 +4099,12 @@ for (stev, stebz, stegr, stein, elty) in
                 ev = copy(ev_in)
                 ev[n] = zero($elty)
             else
-                throw(DimensionMismatch("ev_in has length $ne but needs one less than or equal to dv's length, $n)"))
+                throw(DimensionMismatch(lazy"ev_in has length $ne but needs one less than or equal to dv's length, $n)"))
             end
             ldz = n #Leading dimension
             #Number of eigenvalues to find
             if !(1 <= length(w_in) <= n)
-                throw(DimensionMismatch("w_in has length $(length(w_in)), but needs to be between 1 and $n"))
+                throw(DimensionMismatch(lazy"w_in has length $(length(w_in)), but needs to be between 1 and $n"))
             end
             m = length(w_in)
             #If iblock and isplit are invalid input, assume worst-case block partitioning,
@@ -4144,6 +4212,7 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
         #       INTEGER            IPIV( * )
         #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function syconv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A, ipiv)
             chkstride1(A, ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4171,7 +4240,7 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
@@ -4201,6 +4270,7 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
         #       INTEGER            IPIV( * )
         #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4225,6 +4295,8 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
         end
 
         function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            chkuplo(uplo)
             n = checksquare(A)
             ipiv = similar(A, BlasInt, n)
             sytrf!(uplo, A, ipiv)
@@ -4267,6 +4339,7 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
         #      INTEGER            IPIV( * )
         #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A, ipiv)
             chkstride1(A, ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4296,7 +4369,7 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
@@ -4328,7 +4401,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
@@ -4358,6 +4431,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
         #       INTEGER            IPIV( * )
         #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function sytrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4390,6 +4464,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
         #      INTEGER            IPIV( * )
         #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function sytri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A, ipiv)
             chkstride1(A, ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4419,7 +4494,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
@@ -4453,10 +4528,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
                 throw(ArgumentError("way must be C or R"))
             end
             if length(ipiv) != n
-                throw(ArgumentError("length of pivot vector was $(length(ipiv)) but should have been $n"))
+                throw(ArgumentError(lazy"length of pivot vector was $(length(ipiv)) but should have been $n"))
             end
             if length(e) != n
-                throw(ArgumentError("length of e vector was $(length(e)) but should have been $n"))
+                throw(ArgumentError(lazy"length of e vector was $(length(e)) but should have been $n"))
             end
 
             # allocate
@@ -4492,6 +4567,7 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
        #        INTEGER            IPIV( * )
        #        COMPLEX*16         A( LDA, * ), WORK( * )
         function syconv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A,ipiv)
             chkstride1(A,ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4519,7 +4595,7 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
@@ -4549,6 +4625,7 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4570,6 +4647,8 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
         end
 
         function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            chkuplo(uplo)
             n = checksquare(A)
             ipiv = similar(A, BlasInt, n)
             hetrf!(uplo, A, ipiv)
@@ -4614,6 +4693,7 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function hetri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A, ipiv)
             chkstride1(A, ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4638,10 +4718,11 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
         function hetrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
                        ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
             require_one_based_indexing(A, ipiv, B)
+            chkuplo(uplo)
             chkstride1(A,B,ipiv)
             n = checksquare(A)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
@@ -4672,7 +4753,7 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
@@ -4702,6 +4783,7 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function hetrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4732,6 +4814,7 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function hetri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A,ipiv)
             chkstride1(A,ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4757,9 +4840,10 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
                              ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
             require_one_based_indexing(A, ipiv, B)
             chkstride1(A,B,ipiv)
+            chkuplo(uplo)
             n = checksquare(A)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
@@ -4791,7 +4875,7 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
@@ -4822,6 +4906,7 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4846,6 +4931,8 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
         end
 
         function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            chkuplo(uplo)
             n = checksquare(A)
             ipiv = similar(A, BlasInt, n)
             sytrf!(uplo, A, ipiv)
@@ -4889,6 +4976,7 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A, ipiv)
             chkstride1(A, ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -4917,7 +5005,7 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
@@ -4949,7 +5037,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
@@ -4980,6 +5068,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function sytrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
@@ -5013,6 +5102,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function sytri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
+            require_one_based_indexing(A, ipiv)
             chkstride1(A, ipiv)
             n = checksquare(A)
             chkuplo(uplo)
@@ -5041,7 +5131,7 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             n = checksquare(A)
             chkuplo(uplo)
             if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
+                throw(DimensionMismatch(lazy"B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
             ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
@@ -5074,13 +5164,13 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             # check
             chkuplo(uplo)
             if way != 'C' && way != 'R'
-                throw(ArgumentError("way must be 'C' or 'R'"))
+                throw(ArgumentError(lazy"way must be 'C' or 'R'"))
             end
             if length(ipiv) != n
-                throw(ArgumentError("length of pivot vector was $(length(ipiv)) but should have been $n"))
+                throw(ArgumentError(lazy"length of pivot vector was $(length(ipiv)) but should have been $n"))
             end
             if length(e) != n
-                throw(ArgumentError("length of e vector was $(length(e)) but should have been $n"))
+                throw(ArgumentError(lazy"length of e vector was $(length(e)) but should have been $n"))
             end
 
             # allocate
@@ -5239,7 +5329,10 @@ for (syev, syevr, syevd, sygvd, elty) in
         #       INTEGER            INFO, LDA, LWORK, N
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            @chkvalidparam 1 jobz ('N', 'V')
+            chkuplo(uplo)
             chkstride1(A)
             n = checksquare(A)
             W     = similar(A, $elty, n)
@@ -5273,15 +5366,18 @@ for (syev, syevr, syevd, sygvd, elty) in
         #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
         function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
                         vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
+            require_one_based_indexing(A)
+            @chkvalidparam 1 jobz ('N', 'V')
+            @chkvalidparam 2 range ('A', 'V', 'I')
             chkstride1(A)
             n = checksquare(A)
-            chkuplofinite(A, uplo)
             if range == 'I' && !(1 <= il <= iu <= n)
-                throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu = $iu), which must be between 1 and n = $n"))
+                throw(ArgumentError(lazy"illegal choice of eigenvalue indices (il = $il, iu = $iu), which must be between 1 and n = $n"))
             end
             if range == 'V' && vl >= vu
-                throw(ArgumentError("lower boundary, $vl, must be less than upper boundary, $vu"))
+                throw(ArgumentError(lazy"lower boundary, $vl, must be less than upper boundary, $vu"))
             end
+            chkuplofinite(A, uplo)
             lda = stride(A,2)
             m = Ref{BlasInt}()
             W = similar(A, $elty, n)
@@ -5333,7 +5429,9 @@ for (syev, syevr, syevd, sygvd, elty) in
         # *     .. Array Arguments ..
         #       INTEGER            IWORK( * )
         #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            @chkvalidparam 1 jobz ('N', 'V')
             chkstride1(A)
             n = checksquare(A)
             chkuplofinite(A, uplo)
@@ -5375,10 +5473,14 @@ for (syev, syevr, syevd, sygvd, elty) in
         #       INTEGER            IWORK( * )
         #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
         function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            @chkvalidparam 1 itype 1:3
+            @chkvalidparam 2 jobz ('N', 'V')
+            chkuplo(uplo)
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($n,$n), and B, ($m,$m), must match"))
             end
             lda = max(1, stride(A, 2))
             ldb = max(1, stride(B, 2))
@@ -5424,7 +5526,9 @@ for (syev, syevr, syevd, sygvd, elty, relty) in
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION   RWORK( * ), W( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            @chkvalidparam 1 jobz ('N', 'V')
             chkstride1(A)
             chkuplofinite(A, uplo)
             n = checksquare(A)
@@ -5464,14 +5568,17 @@ for (syev, syevr, syevd, sygvd, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), WORK( * ), Z( LDZ, * )
         function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
                         vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
+            require_one_based_indexing(A)
+            @chkvalidparam 1 jobz ('N', 'V')
+            @chkvalidparam 2 range ('A', 'V', 'I')
             chkstride1(A)
             chkuplofinite(A, uplo)
             n = checksquare(A)
             if range == 'I' && !(1 <= il <= iu <= n)
-                throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu=$iu), which must be between 1 and n = $n"))
+                throw(ArgumentError(lazy"illegal choice of eigenvalue indices (il = $il, iu=$iu), which must be between 1 and n = $n"))
             end
             if range == 'V' && vl >= vu
-                throw(ArgumentError("lower boundary, $vl, must be less than upper boundary, $vu"))
+                throw(ArgumentError(lazy"lower boundary, $vl, must be less than upper boundary, $vu"))
             end
             lda = max(1,stride(A,2))
             m = Ref{BlasInt}()
@@ -5532,7 +5639,9 @@ for (syev, syevr, syevd, sygvd, elty, relty) in
         #       INTEGER            IWORK( * )
         #       DOUBLE PRECISION   RWORK( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
+            @chkvalidparam 1 jobz ('N', 'V')
             chkstride1(A)
             chkuplofinite(A, uplo)
             n = checksquare(A)
@@ -5578,12 +5687,15 @@ for (syev, syevr, syevd, sygvd, elty, relty) in
         #       DOUBLE PRECISION   RWORK( * ), W( * )
         #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
         function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            @chkvalidparam 1 itype 1:3
+            @chkvalidparam 2 jobz ('N', 'V')
             chkstride1(A, B)
             chkuplofinite(A, uplo)
             chkuplofinite(B, uplo)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($n,$n), and B, ($m,$m), must match"))
             end
             lda = max(1, stride(A, 2))
             ldb = max(1, stride(B, 2))
@@ -5654,11 +5766,6 @@ syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractM
 Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
 (`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
 of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
-
-Use the divide-and-conquer method, instead of the QR iteration used by
-`syev!` or multiple relatively robust representations used by `syevr!`.
-See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
-a comparison of the accuracy and performatce of different methods.
 """
 syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
 
@@ -5694,19 +5801,19 @@ for (bdsqr, relty, elty) in
             # Do checks
             chkuplo(uplo)
             if length(e_) != n - 1
-                throw(DimensionMismatch("off-diagonal has length $(length(e_)) but should have length $(n - 1)"))
+                throw(DimensionMismatch(lazy"off-diagonal has length $(length(e_)) but should have length $(n - 1)"))
             end
             if ncvt > 0 && ldvt < n
-                throw(DimensionMismatch("leading dimension of Vt, $ldvt, must be at least $n"))
+                throw(DimensionMismatch(lazy"leading dimension of Vt, $ldvt, must be at least $n"))
             end
             if ldu < nru
-                throw(DimensionMismatch("leading dimension of U, $ldu, must be at least $nru"))
+                throw(DimensionMismatch(lazy"leading dimension of U, $ldu, must be at least $nru"))
             end
             if size(U, 2) != n
-                throw(DimensionMismatch("U must have $n columns but has $(size(U, 2))"))
+                throw(DimensionMismatch(lazy"U must have $n columns but has $(size(U, 2))"))
             end
             if ncc > 0 && ldc < n
-                throw(DimensionMismatch("leading dimension of C, $ldc, must be at least $n"))
+                throw(DimensionMismatch(lazy"leading dimension of C, $ldc, must be at least $n"))
             end
             # Allocate
             work = Vector{$relty}(undef, 4n)
@@ -5773,7 +5880,7 @@ for (bdsdc, elty) in
                 ldvt=ldu=max(1, n)
                 lwork=3*n^2 + 4*n
             else
-                throw(ArgumentError("COMPQ argument must be 'N', 'P' or 'I', got $(repr(compq))"))
+                throw(ArgumentError(lazy"COMPQ argument must be 'N', 'P' or 'I', got $(repr(compq))"))
             end
             u  = similar(d, $elty, (ldu,  n))
             vt = similar(d, $elty, (ldvt, n))
@@ -5827,6 +5934,8 @@ for (gecon, elty) in
         #       INTEGER            IWORK( * )
         #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
         function gecon!(normtype::AbstractChar, A::AbstractMatrix{$elty}, anorm::$elty)
+            require_one_based_indexing(A)
+            @chkvalidparam 1 normtype ('0', '1', 'I')
             chkstride1(A)
             n = checksquare(A)
             lda = max(1, stride(A, 2))
@@ -5861,6 +5970,8 @@ for (gecon, elty, relty) in
         #       DOUBLE PRECISION   RWORK( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function gecon!(normtype::AbstractChar, A::AbstractMatrix{$elty}, anorm::$relty)
+            require_one_based_indexing(A)
+            @chkvalidparam 1 normtype ('0', '1', 'I')
             chkstride1(A)
             n = checksquare(A)
             lda = max(1, stride(A, 2))
@@ -5903,6 +6014,7 @@ for (gehrd, elty) in
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * )
         function gehrd!(ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
             chkfinite(A) # balancing routines don't support NaNs and Infs
@@ -5956,7 +6068,7 @@ for (orghr, elty) in
             chkstride1(A, tau)
             n = checksquare(A)
             if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), needs $(n - 1)"))
             end
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -6005,11 +6117,13 @@ for (ormhr, elty) in
 
             require_one_based_indexing(A, tau, C)
             chkstride1(A, tau, C)
+            chkside(side)
+            chktrans(trans)
             n = checksquare(A)
             mC, nC = size(C, 1), size(C, 2)
 
             if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), needs $(n - 1)"))
             end
             if (side == 'L' && mC != n) || (side == 'R' && nC != n)
                 throw(DimensionMismatch("A and C matrices are not conformable"))
@@ -6052,6 +6166,8 @@ for (hseqr, elty) in
         function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
                         H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
             require_one_based_indexing(H, Z)
+            @chkvalidparam 1 job ('E', 'S')
+            @chkvalidparam 2 compz ('N', 'I', 'V')
             chkstride1(H)
             n = checksquare(H)
             checksquare(Z) == n || throw(DimensionMismatch())
@@ -6094,6 +6210,8 @@ for (hseqr, elty) in
         function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
                         H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
             require_one_based_indexing(H, Z)
+            @chkvalidparam 1 job ('E', 'S')
+            @chkvalidparam 2 compz ('N', 'I', 'V')
             chkstride1(H)
             n = checksquare(H)
             checksquare(Z) == n || throw(DimensionMismatch())
@@ -6152,10 +6270,10 @@ for (hetrd, elty) in
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION  A( LDA, * ), D( * ), E( * ), TAU( * ), WORK( * )
         function hetrd!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A)
             chkstride1(A)
             n = checksquare(A)
-            chkuplo(uplo)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
+            chkuplofinite(A, uplo) # balancing routines don't support NaNs and Infs
             tau = similar(A, $elty, max(0,n - 1))
             d = Vector{$relty}(undef, n)
             e = Vector{$relty}(undef, max(0,n - 1))
@@ -6206,7 +6324,7 @@ for (orgtr, elty) in
             chkstride1(A, tau)
             n = checksquare(A)
             if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), needs $(n - 1)"))
             end
             chkuplo(uplo)
             work = Vector{$elty}(undef, 1)
@@ -6257,14 +6375,16 @@ for (ormtr, elty) in
             require_one_based_indexing(A, tau, C)
             chkstride1(A, tau, C)
             n = checksquare(A)
+            chkside(side)
             chkuplo(uplo)
+            chktrans(trans)
             mC, nC = size(C, 1), size(C, 2)
 
             if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
+                throw(DimensionMismatch(lazy"tau has length $(length(tau)), needs $(n - 1)"))
             end
             if (side == 'L' && mC != n) || (side == 'R' && nC != n)
-                throw(DimensionMismatch("A and C matrices are not conformable"))
+                throw(DimensionMismatch(lazy"A and C matrices are not conformable"))
             end
 
             work = Vector{$elty}(undef, 1)
@@ -6305,6 +6425,7 @@ for (gees, gges, gges3, elty) in
         #    $                   WR( * )
         function gees!(jobvs::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
+            @chkvalidparam 1 jobvs ('N', 'V')
             chkstride1(A)
             n     = checksquare(A)
             sdim  = Vector{BlasInt}(undef, 1)
@@ -6331,7 +6452,7 @@ for (gees, gges, gges3, elty) in
                     resize!(work, lwork)
                 end
             end
-            A, vs, iszero(wi) ? wr : complex.(wr, wi)
+            iszero(wi) ? (A, vs, wr) : (A, vs, complex.(wr, wi))
         end
 
         # *     .. Scalar Arguments ..
@@ -6344,10 +6465,13 @@ for (gees, gges, gges3, elty) in
         #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
         #      $                   VSR( LDVSR, * ), WORK( * )
         function gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            @chkvalidparam 1 jobvsl ('N', 'V')
+            @chkvalidparam 2 jobvsr ('N', 'V')
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($n,$n), and B, ($m,$m), must match"))
             end
             sdim = BlasInt(0)
             alphar = similar(A, $elty, n)
@@ -6393,10 +6517,13 @@ for (gees, gges, gges3, elty) in
         #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
         #      $                   VSR( LDVSR, * ), WORK( * )
         function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            @chkvalidparam 1 jobvsl ('N', 'V')
+            @chkvalidparam 2 jobvsr ('N', 'V')
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($n,$n), and B, ($m,$m), must match"))
             end
             sdim = BlasInt(0)
             alphar = similar(A, $elty, n)
@@ -6448,6 +6575,7 @@ for (gees, gges, gges3, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
         function gees!(jobvs::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
+            @chkvalidparam 1 jobvs ('N', 'V')
             chkstride1(A)
             n     = checksquare(A)
             sort  = 'N'
@@ -6489,10 +6617,13 @@ for (gees, gges, gges3, elty, relty) in
         #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
         #      $                   WORK( * )
         function gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            @chkvalidparam 1 jobvsl ('N', 'V')
+            @chkvalidparam 2 jobvsr ('N', 'V')
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($n,$n), and B, ($m,$m), must match"))
             end
             sdim = BlasInt(0)
             alpha = similar(A, $elty, n)
@@ -6539,10 +6670,13 @@ for (gees, gges, gges3, elty, relty) in
         #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
         #      $                   WORK( * )
         function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            @chkvalidparam 1 jobvsl ('N', 'V')
+            @chkvalidparam 2 jobvsr ('N', 'V')
             chkstride1(A, B)
             n, m = checksquare(A, B)
             if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($n,$n), and B, ($m,$m), must match"))
             end
             sdim = BlasInt(0)
             alpha = similar(A, $elty, n)
@@ -6627,6 +6761,8 @@ for (trexc, trsen, tgsen, elty) in
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
         function trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
+            require_one_based_indexing(T, Q)
+            @chkvalidparam 1 compq ('V', 'N')
             chkstride1(T, Q)
             n = checksquare(T)
             ldt = max(1, stride(T, 2))
@@ -6659,6 +6795,9 @@ for (trexc, trsen, tgsen, elty) in
         #       DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), WR( * )
         function trsen!(job::AbstractChar, compq::AbstractChar, select::AbstractVector{BlasInt},
                         T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
+            require_one_based_indexing(T, Q, select)
+            @chkvalidparam 1 job ('N', 'E', 'V', 'B')
+            @chkvalidparam 2 compq ('V', 'N')
             chkstride1(T, Q, select)
             n = checksquare(T)
             ldt = max(1, stride(T, 2))
@@ -6694,7 +6833,7 @@ for (trexc, trsen, tgsen, elty) in
                     resize!(iwork, liwork)
                 end
             end
-            T, Q, iszero(wi) ? wr : complex.(wr, wi), s[], sep[]
+            iszero(wi) ? (T, Q, wr, s[], sep[]) : (T, Q, complex.(wr, wi), s[], sep[])
         end
         trsen!(select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
             trsen!('N', 'V', select, T, Q)
@@ -6714,16 +6853,17 @@ for (trexc, trsen, tgsen, elty) in
         #        ..
         function tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix{$elty}, T::AbstractMatrix{$elty},
                         Q::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
+            require_one_based_indexing(select, S, T, Q, Z)
             chkstride1(select, S, T, Q, Z)
             n, nt, nq, nz = checksquare(S, T, Q, Z)
             if n != nt
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
+                throw(DimensionMismatch(lazy"dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
             end
             if n != nq
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
+                throw(DimensionMismatch(lazy"dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
             end
             if n != nz
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
+                throw(DimensionMismatch(lazy"dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
             end
             lds = max(1, stride(S, 2))
             ldt = max(1, stride(T, 2))
@@ -6779,6 +6919,8 @@ for (trexc, trsen, tgsen, elty, relty) in
         #      .. Array Arguments ..
         #      DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
         function trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
+            require_one_based_indexing(T, Q)
+            @chkvalidparam 1 compq ('V', 'N')
             chkstride1(T, Q)
             n = checksquare(T)
             ldt = max(1, stride(T, 2))
@@ -6809,6 +6951,9 @@ for (trexc, trsen, tgsen, elty, relty) in
         #      COMPLEX            Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * )
         function trsen!(job::AbstractChar, compq::AbstractChar, select::AbstractVector{BlasInt},
                         T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
+            require_one_based_indexing(select, T, Q)
+            @chkvalidparam 1 job ('N', 'E', 'V', 'B')
+            @chkvalidparam 2 compq ('N', 'V')
             chkstride1(select, T, Q)
             n = checksquare(T)
             ldt = max(1, stride(T, 2))
@@ -6859,16 +7004,17 @@ for (trexc, trsen, tgsen, elty, relty) in
         #        ..
         function tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix{$elty}, T::AbstractMatrix{$elty},
                         Q::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
+            require_one_based_indexing(select, S, T, Q, Z)
             chkstride1(select, S, T, Q, Z)
             n, nt, nq, nz = checksquare(S, T, Q, Z)
             if n != nt
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
+                throw(DimensionMismatch(lazy"dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
             end
             if n != nq
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
+                throw(DimensionMismatch(lazy"dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
             end
             if n != nz
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
+                throw(DimensionMismatch(lazy"dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
             end
             lds = max(1, stride(S, 2))
             ldt = max(1, stride(T, 2))
@@ -6959,13 +7105,15 @@ for (fn, elty, relty) in ((:dtrsyl_, :Float64, :Float64),
         function trsyl!(transa::AbstractChar, transb::AbstractChar, A::AbstractMatrix{$elty},
                         B::AbstractMatrix{$elty}, C::AbstractMatrix{$elty}, isgn::Int=1)
             require_one_based_indexing(A, B, C)
+            chktrans(transa)
+            chktrans(transb)
             chkstride1(A, B, C)
             m, n = checksquare(A), checksquare(B)
             lda = max(1, stride(A, 2))
             ldb = max(1, stride(B, 2))
             m1, n1 = size(C)
             if m != m1 || n != n1
-                throw(DimensionMismatch("dimensions of A, ($m,$n), and C, ($m1,$n1), must match"))
+                throw(DimensionMismatch(lazy"dimensions of A, ($m,$n), and C, ($m1,$n1), must match"))
             end
             ldc = max(1, stride(C, 2))
             scale = Ref{$relty}()
@@ -7013,9 +7161,15 @@ for (fn, elty) in ((:dlacpy_, :Float64),
         function lacpy!(B::AbstractMatrix{$elty}, A::AbstractMatrix{$elty}, uplo::AbstractChar)
             require_one_based_indexing(A, B)
             chkstride1(A, B)
-            m,n = size(A)
-            m1,n1 = size(B)
-            (m1 < m || n1 < n) && throw(DimensionMismatch("B of size ($m1,$n1) should have at least the same number of rows and columns than A of size ($m,$n)"))
+            m, n = size(A)
+            m1, n1 = size(B)
+            if uplo == 'U'
+                lacpy_size_check((m1, n1), (n < m ? n : m, n))
+            elseif uplo == 'L'
+                lacpy_size_check((m1, n1), (m, m < n ? m : n))
+            else
+                lacpy_size_check((m1, n1), (m, n))
+            end
             lda = max(1, stride(A, 2))
             ldb = max(1, stride(B, 2))
             ccall((@blasfunc($fn), libblastrampoline), Cvoid,
@@ -7027,6 +7181,9 @@ for (fn, elty) in ((:dlacpy_, :Float64),
     end
 end
 
+# The noinline annotation reduces latency
+@noinline lacpy_size_check((m1, n1), (m, n)) = (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
+
 """
     lacpy!(B, A, uplo) -> B
 
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index b133741611adc..606ddedbe1343 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -13,6 +13,19 @@ struct lbt_library_info_t
     f2c::Int32
     cblas::Int32
 end
+
+macro get_warn(map, key)
+    return quote
+        if !haskey($(esc(map)), $(esc(key)))
+            @warn(string("[LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown"))
+            # All the unknown values share a common value: `-1`
+            $(esc(map))[$(esc(LBT_INTERFACE_UNKNOWN))]
+        else
+            $(esc(map))[$(esc(key))]
+        end
+    end
+end
+
 const LBT_INTERFACE_LP64    = 32
 const LBT_INTERFACE_ILP64   = 64
 const LBT_INTERFACE_UNKNOWN = -1
@@ -35,10 +48,12 @@ const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
 
 const LBT_COMPLEX_RETSTYLE_NORMAL   =  0
 const LBT_COMPLEX_RETSTYLE_ARGUMENT =  1
+const LBT_COMPLEX_RETSTYLE_FNDA     =  2
 const LBT_COMPLEX_RETSTYLE_UNKNOWN  = -1
 const LBT_COMPLEX_RETSTYLE_MAP = Dict(
     LBT_COMPLEX_RETSTYLE_NORMAL   => :normal,
     LBT_COMPLEX_RETSTYLE_ARGUMENT => :argument,
+    LBT_COMPLEX_RETSTYLE_FNDA     => :float_normal_double_argument,
     LBT_COMPLEX_RETSTYLE_UNKNOWN  => :unknown,
 )
 const LBT_INV_COMPLEX_RETSTYLE_MAP = Dict(v => k for (k, v) in LBT_COMPLEX_RETSTYLE_MAP)
@@ -69,10 +84,10 @@ struct LBTLibraryInfo
             lib_info.handle,
             unsafe_string(lib_info.suffix),
             unsafe_wrap(Vector{UInt8}, lib_info.active_forwards, div(num_exported_symbols,8)+1),
-            LBT_INTERFACE_MAP[lib_info.interface],
-            LBT_COMPLEX_RETSTYLE_MAP[lib_info.complex_retstyle],
-            LBT_F2C_MAP[lib_info.f2c],
-            LBT_CBLAS_MAP[lib_info.cblas],
+            @get_warn(LBT_INTERFACE_MAP, lib_info.interface),
+            @get_warn(LBT_COMPLEX_RETSTYLE_MAP, lib_info.complex_retstyle),
+            @get_warn(LBT_F2C_MAP, lib_info.f2c),
+            @get_warn(LBT_CBLAS_MAP, lib_info.cblas),
         )
     end
 end
@@ -247,11 +262,11 @@ If the given `symbol_name` is not contained within the list of exported symbols,
 function lbt_find_backing_library(symbol_name, interface::Symbol;
                                   config::LBTConfig = lbt_get_config())
     if interface ∉ (:ilp64, :lp64)
-        throw(ArgumentError("Invalid interface specification: '$(interface)'"))
+        throw(ArgumentError(lazy"Invalid interface specification: '$(interface)'"))
     end
     symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
     if symbol_idx === nothing
-        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
+        throw(ArgumentError(lazy"Invalid exported symbol name '$(symbol_name)'"))
     end
     # Convert to zero-indexed
     symbol_idx -= 1
@@ -269,6 +284,25 @@ function lbt_find_backing_library(symbol_name, interface::Symbol;
 end
 
 
+"""
+    lbt_forwarded_funcs(config::LBTConfig, lib::LBTLibraryInfo)
+
+Given a backing library `lib`, return the list of all functions that are
+forwarded to that library, as a vector of `String`s.
+"""
+function lbt_forwarded_funcs(config::LBTConfig, lib::LBTLibraryInfo)
+    forwarded_funcs = String[]
+    for (symbol_idx, symbol) in enumerate(config.exported_symbols)
+        forward_byte_offset = div(symbol_idx - 1, 8)
+        forward_byte_mask = 1 << mod(symbol_idx - 1, 8)
+        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
+            push!(forwarded_funcs, symbol)
+        end
+    end
+    return forwarded_funcs
+end
+
+
 ## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
 ## bizarre and complex setups to be created.  If you run into strange errors while using
 ## it, the first thing you should ask yourself is whether you've set things up properly.
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
index d3d6234961c44..89e57d0dd27eb 100644
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ b/stdlib/LinearAlgebra/src/ldlt.jl
@@ -175,7 +175,7 @@ function ldiv!(S::LDLt{<:Any,<:SymTridiagonal}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     n, nrhs = size(B, 1), size(B, 2)
     if size(S,1) != n
-        throw(DimensionMismatch("Matrix has dimensions $(size(S)) but right hand side has first dimension $n"))
+        throw(DimensionMismatch(lazy"Matrix has dimensions $(size(S)) but right hand side has first dimension $n"))
     end
     d = S.data.dv
     l = S.data.ev
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
index be32f1e863ff1..d2e82af5d6409 100644
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ b/stdlib/LinearAlgebra/src/lu.jl
@@ -95,6 +95,11 @@ end
 function lu!(A::HermOrSym{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T);
         check::Bool = true, allowsingular::Bool = false) where {T}
     copytri!(A.data, A.uplo, isa(A, Hermitian))
+    @inbounds if isa(A, Hermitian) # realify diagonal
+        for i in axes(A, 1)
+            A.data[i,i] = A[i,i]
+        end
+    end
     lu!(A.data, pivot; check, allowsingular)
 end
 # for backward compatibility
@@ -680,7 +685,7 @@ function ldiv!(A::LU{T,Tridiagonal{T,V}}, B::AbstractVecOrMat) where {T,V}
     require_one_based_indexing(B)
     n = size(A,1)
     if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
+        throw(DimensionMismatch(lazy"matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
     end
     nrhs = size(B,2)
     dl = A.factors.dl
@@ -713,7 +718,7 @@ function ldiv!(transA::TransposeFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B
     A = transA.parent
     n = size(A,1)
     if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
+        throw(DimensionMismatch(lazy"matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
     end
     nrhs = size(B,2)
     dl = A.factors.dl
@@ -750,7 +755,7 @@ function ldiv!(adjA::AdjointFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::Ab
     A = adjA.parent
     n = size(A,1)
     if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
+        throw(DimensionMismatch(lazy"matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
     end
     nrhs = size(B,2)
     dl = A.factors.dl
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index 95a24b0d798ea..9a232d3ad1e51 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -61,34 +61,43 @@ function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
 end
 
 # these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-(*)(a::AbstractVector, tB::TransposeAbsMat) = reshape(a, length(a), 1) * tB
-(*)(a::AbstractVector, adjB::AdjointAbsMat) = reshape(a, length(a), 1) * adjB
-(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
+function (*)(a::AbstractVector, B::AbstractMatrix)
+    require_one_based_indexing(a)
+    reshape(a, length(a), 1) * B
+end
 
 # Add a level of indirection and specialize _mul! to avoid ambiguities in mul!
 @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
                 alpha::Number, beta::Number) = _mul!(y, A, x, alpha, beta)
 
-@inline _mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
+_mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
                 alpha::Number, beta::Number) =
-    generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, MulAddMul(alpha, beta))
-
+    generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, alpha, beta)
 # BLAS cases
 # equal eltypes
-@inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} =
+generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
+                alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemv!(y, tA, A, x, alpha, beta)
+generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
+                _add::MulAddMul = MulAddMul()) where {T<:BlasFloat} =
     gemv!(y, tA, A, x, _add.alpha, _add.beta)
 # Real (possibly transposed) matrix times complex vector.
 # Multiply the matrix with the real and imaginary parts separately
-@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
+generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
+                alpha::Number, beta::Number) where {T<:BlasReal} =
+    gemv!(y, tA, A, x, alpha, beta)
+generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
+                _add::MulAddMul = MulAddMul()) where {T<:BlasReal} =
     gemv!(y, tA, A, x, _add.alpha, _add.beta)
 # Complex matrix times real vector.
 # Reinterpret the matrix as a real matrix and do real matvec computation.
 # works only in cooperation with BLAS when A is untransposed (tA == 'N')
 # but that check is included in gemv! anyway
-@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
+generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+                alpha::Number, beta::Number) where {T<:BlasReal} =
+    gemv!(y, tA, A, x, alpha, beta)
+generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+                _add::MulAddMul = MulAddMul()) where {T<:BlasReal} =
     gemv!(y, tA, A, x, _add.alpha, _add.beta)
 
 # Vector-Matrix multiplication
@@ -285,15 +294,24 @@ true
 @inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) = _mul!(C, A, B, α, β)
 # Add a level of indirection and specialize _mul! to avoid ambiguities in mul!
 @inline _mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) =
-    generic_matmatmul!(
+    generic_matmatmul_wrapper!(
         C,
         wrapper_char(A),
         wrapper_char(B),
         _unwrap(A),
         _unwrap(B),
-        MulAddMul(α, β)
+        α, β,
+        Val(wrapper_char_NTC(A) & wrapper_char_NTC(B))
     )
 
+# this indirection allows is to specialize on the types of the wrappers of A and B to some extent,
+# even though the wrappers are stripped off in mul!
+# By default, we ignore the wrapper info and forward the arguments to generic_matmatmul!
+Base.@constprop :aggressive function generic_matmatmul_wrapper!(C, tA, tB, A, B, α, β, @nospecialize(val))
+    generic_matmatmul!(C, tA, tB, A, B, α, β)
+end
+
+
 """
     rmul!(A, B)
 
@@ -360,48 +378,106 @@ julia> lmul!(F.Q, B)
 """
 lmul!(A, B)
 
-# THE one big BLAS dispatch
-# aggressive constant propagation makes mul!(C, A, B) invoke gemm_wrapper! directly
-Base.@constprop :aggressive function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                                    _add::MulAddMul=MulAddMul()) where {T<:BlasFloat}
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        if tA == 'T' && tB == 'N' && A === B
-            return syrk_wrapper!(C, 'T', A, _add)
-        elseif tA == 'N' && tB == 'T' && A === B
-            return syrk_wrapper!(C, 'N', A, _add)
-        elseif tA == 'C' && tB == 'N' && A === B
-            return herk_wrapper!(C, 'C', A, _add)
-        elseif tA == 'N' && tB == 'C' && A === B
-            return herk_wrapper!(C, 'N', A, _add)
-        else
-            return gemm_wrapper!(C, tA, tB, A, B, _add)
+# We may inline the matmul2x2! and matmul3x3! calls for `α == true`
+# to simplify the @stable_muladdmul branches
+function matmul2x2or3x3_nonzeroalpha!(C, tA, tB, A, B, α, β)
+    if size(C) == size(A) == size(B) == (2,2)
+        matmul2x2!(C, tA, tB, A, B, α, β)
+        return true
+    end
+    if size(C) == size(A) == size(B) == (3,3)
+        matmul3x3!(C, tA, tB, A, B, α, β)
+        return true
+    end
+    return false
+end
+function matmul2x2or3x3_nonzeroalpha!(C, tA, tB, A, B, α::Bool, β)
+    if size(C) == size(A) == size(B) == (2,2)
+        Aelements, Belements = _matmul2x2_elements(C, tA, tB, A, B)
+        @stable_muladdmul _modify2x2!(Aelements, Belements, C, MulAddMul(true, β))
+        return true
+    end
+    if size(C) == size(A) == size(B) == (3,3)
+        Aelements, Belements = _matmul3x3_elements(C, tA, tB, A, B)
+        @stable_muladdmul _modify3x3!(Aelements, Belements, C, MulAddMul(true, β))
+        return true
+    end
+    return false
+end
+
+# THE one big BLAS dispatch. This is split into two methods to improve latency
+Base.@constprop :aggressive function generic_matmatmul_wrapper!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
+                                    α::Number, β::Number, ::Val{true}) where {T<:BlasFloat}
+    mA, nA = lapack_size(tA, A)
+    mB, nB = lapack_size(tB, B)
+    if any(iszero, size(A)) || any(iszero, size(B)) || iszero(α)
+        if size(C) != (mA, nB)
+            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
+        end
+        return _rmul_or_fill!(C, β)
+    end
+    matmul2x2or3x3_nonzeroalpha!(C, tA, tB, A, B, α, β) && return C
+    # We convert the chars to uppercase to potentially unwrap a WrapperChar,
+    # and extract the char corresponding to the wrapper type
+    tA_uc, tB_uc = uppercase(tA), uppercase(tB)
+    # the map in all ensures constprop by acting on tA and tB individually, instead of looping over them.
+    if tA_uc == 'T' && tB_uc == 'N' && A === B
+        return syrk_wrapper!(C, 'T', A, α, β)
+    elseif tA_uc == 'N' && tB_uc == 'T' && A === B
+        return syrk_wrapper!(C, 'N', A, α, β)
+    elseif tA_uc == 'C' && tB_uc == 'N' && A === B
+        return herk_wrapper!(C, 'C', A, α, β)
+    elseif tA_uc == 'N' && tB_uc == 'C' && A === B
+        return herk_wrapper!(C, 'N', A, α, β)
+    else
+        return gemm_wrapper!(C, tA, tB, A, B, α, β)
+    end
+end
+Base.@constprop :aggressive function generic_matmatmul_wrapper!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
+                                    α::Number, β::Number, ::Val{false}) where {T<:BlasFloat}
+    mA, nA = lapack_size(tA, A)
+    mB, nB = lapack_size(tB, B)
+    if any(iszero, size(A)) || any(iszero, size(B)) || iszero(α)
+        if size(C) != (mA, nB)
+            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
         end
+        return _rmul_or_fill!(C, β)
     end
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+    matmul2x2or3x3_nonzeroalpha!(C, tA, tB, A, B, α, β) && return C
+    # We convert the chars to uppercase to potentially unwrap a WrapperChar,
+    # and extract the char corresponding to the wrapper type
+    tA_uc, tB_uc = uppercase(tA), uppercase(tB)
+    alpha, beta = promote(α, β, zero(T))
     if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        if (tA == 'S' || tA == 's') && tB == 'N'
+        if tA_uc == 'S' && tB_uc == 'N'
             return BLAS.symm!('L', tA == 'S' ? 'U' : 'L', alpha, A, B, beta, C)
-        elseif (tB == 'S' || tB == 's') && tA == 'N'
+        elseif tA_uc == 'N' && tB_uc == 'S'
             return BLAS.symm!('R', tB == 'S' ? 'U' : 'L', alpha, B, A, beta, C)
-        elseif (tA == 'H' || tA == 'h') && tB == 'N'
+        elseif tA_uc == 'H' && tB_uc == 'N'
             return BLAS.hemm!('L', tA == 'H' ? 'U' : 'L', alpha, A, B, beta, C)
-        elseif (tB == 'H' || tB == 'h') && tA == 'N'
+        elseif tA_uc == 'N' && tB_uc == 'H'
             return BLAS.hemm!('R', tB == 'H' ? 'U' : 'L', alpha, B, A, beta, C)
         end
     end
-    return _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
+    return _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(α, β))
 end
-
-# Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency.
-Base.@constprop :aggressive function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                    _add::MulAddMul=MulAddMul()) where {T<:BlasReal}
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        gemm_wrapper!(C, tA, tB, A, B, _add)
-    else
-        _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
-    end
+# legacy method
+Base.@constprop :aggressive generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
+        _add::MulAddMul = MulAddMul()) where {T<:BlasFloat} =
+    generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta)
+
+function generic_matmatmul_wrapper!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+                    α::Number, β::Number, ::Val{true}) where {T<:BlasReal}
+    gemm_wrapper!(C, tA, tB, A, B, α, β)
 end
-
+Base.@constprop :aggressive function generic_matmatmul_wrapper!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+                    α::Number, β::Number, ::Val{false}) where {T<:BlasReal}
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(α, β))
+end
+# legacy method
+Base.@constprop :aggressive generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+        _add::MulAddMul = MulAddMul()) where {T<:BlasReal} =
+    generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta)
 
 # Supporting functions for matrix multiplication
 
@@ -423,6 +499,12 @@ end
     A
 end
 
+_fullstride2(A, f=identity) = f(stride(A, 2)) >= size(A, 1)
+# for some standard StridedArrays, the _fullstride2 condition is known to hold at compile-time
+# We specialize the function for certain StridedArray subtypes
+_fullstride2(A::StridedArrayStdSubArray, ::typeof(abs)) = true
+_fullstride2(A::StridedArrayStdSubArrayIncr, ::typeof(identity)) = true
+
 Base.@constprop :aggressive function gemv!(y::StridedVector{T}, tA::AbstractChar,
                 A::StridedVecOrMat{T}, x::StridedVector{T},
                α::Number=true, β::Number=false) where {T<:BlasFloat}
@@ -434,23 +516,24 @@ Base.@constprop :aggressive function gemv!(y::StridedVector{T}, tA::AbstractChar
     mA == 0 && return y
     nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
     if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        stride(A, 1) == 1 && _fullstride2(A, abs) &&
         !iszero(stride(x, 1)) && # We only check input's stride here.
-        if tA in ('N', 'T', 'C')
+        if tA_uc in ('N', 'T', 'C')
             return BLAS.gemv!(tA, alpha, A, x, beta, y)
-        elseif tA in ('S', 's')
+        elseif tA_uc == 'S'
             return BLAS.symv!(tA == 'S' ? 'U' : 'L', alpha, A, x, beta, y)
-        elseif tA in ('H', 'h')
+        elseif tA_uc == 'H'
             return BLAS.hemv!(tA == 'H' ? 'U' : 'L', alpha, A, x, beta, y)
         end
     end
-    if tA in ('S', 's', 'H', 'h')
+    if tA_uc in ('S', 'H')
         # re-wrap again and use plain ('N') matvec mul algorithm,
         # because _generic_matvecmul! can't handle the HermOrSym cases specifically
-        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
+        return @stable_muladdmul _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
     else
-        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return @stable_muladdmul _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
@@ -464,15 +547,16 @@ Base.@constprop :aggressive function gemv!(y::StridedVector{Complex{T}}, tA::Abs
     mA == 0 && return y
     nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
     if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        stride(y, 1) == 1 && tA == 'N' && # reinterpret-based optimization is valid only for contiguous `y`
-        !iszero(stride(x, 1))
+            stride(A, 1) == 1 && _fullstride2(A, abs) &&
+            stride(y, 1) == 1 && tA_uc == 'N' && # reinterpret-based optimization is valid only for contiguous `y`
+            !iszero(stride(x, 1))
         BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
         return y
     else
-        Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
-        return _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β))
+        Anew, ta = tA_uc in ('S', 'H') ? (wrap(A, tA), oftype(tA, 'N')) : (A, tA)
+        return @stable_muladdmul _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β))
     end
 end
 
@@ -487,27 +571,31 @@ Base.@constprop :aggressive function gemv!(y::StridedVector{Complex{T}}, tA::Abs
     mA == 0 && return y
     nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
     @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1)) && tA in ('N', 'T', 'C')
+            stride(A, 1) == 1 && _fullstride2(A, abs) &&
+            !iszero(stride(x, 1)) && tA_uc in ('N', 'T', 'C')
         xfl = reinterpret(reshape, T, x) # Use reshape here.
         yfl = reinterpret(reshape, T, y)
         BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :])
         BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :])
         return y
-    elseif tA in ('S', 's', 'H', 'h')
+    elseif tA_uc in ('S', 'H')
         # re-wrap again and use plain ('N') matvec mul algorithm,
         # because _generic_matvecmul! can't handle the HermOrSym cases specifically
-        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
+        return @stable_muladdmul _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
     else
-        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return @stable_muladdmul _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
-function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T},
-        _add = MulAddMul()) where {T<:BlasFloat}
+# the aggressive constprop pushes tA and tB into gemm_wrapper!, which is needed for wrap calls within it
+# to be concretely inferred
+Base.@constprop :aggressive function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T},
+        alpha::Number, beta::Number) where {T<:BlasFloat}
     nC = checksquare(C)
-    if tA == 'T'
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
+    if tA_uc == 'T'
         (nA, mA) = size(A,1), size(A,2)
         tAt = 'N'
     else
@@ -517,35 +605,31 @@ function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat
     if nC != mA
         throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
     end
-    if mA == 0 || nA == 0 || iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, _add)
-    end
-    if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, _add)
-    end
 
     # BLAS.syrk! only updates symmetric C
     # alternatively, make non-zero β a show-stopper for BLAS.syrk!
-    if iszero(_add.beta) || issymmetric(C)
-        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+    if iszero(beta) || issymmetric(C)
+        α, β = promote(alpha, beta, zero(T))
         if (alpha isa Union{Bool,T} &&
-            beta isa Union{Bool,T} &&
-            stride(A, 1) == stride(C, 1) == 1 &&
-            stride(A, 2) >= size(A, 1) &&
-            stride(C, 2) >= size(C, 1))
+                beta isa Union{Bool,T} &&
+                stride(A, 1) == stride(C, 1) == 1 &&
+                _fullstride2(A) && _fullstride2(C))
             return copytri!(BLAS.syrk!('U', tA, alpha, A, beta, C), 'U')
         end
     end
-    return gemm_wrapper!(C, tA, tAt, A, A, _add)
+    return gemm_wrapper!(C, tA, tAt, A, A, alpha, beta)
 end
-
-function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
-        _add = MulAddMul()) where {T<:BlasReal}
+# legacy method
+syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T}, _add::MulAddMul = MulAddMul()) where {T<:BlasFloat} =
+    syrk_wrapper!(C, tA, A, _add.alpha, _add.beta)
+
+# the aggressive constprop pushes tA and tB into gemm_wrapper!, which is needed for wrap calls within it
+# to be concretely inferred
+Base.@constprop :aggressive function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
+        α::Number, β::Number) where {T<:BlasReal}
     nC = checksquare(C)
-    if tA == 'C'
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
+    if tA_uc == 'C'
         (nA, mA) = size(A,1), size(A,2)
         tAt = 'N'
     else
@@ -555,48 +639,50 @@ function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA
     if nC != mA
         throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
     end
-    if mA == 0 || nA == 0 || iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, _add)
-    end
-    if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, _add)
-    end
 
     # Result array does not need to be initialized as long as beta==0
     #    C = Matrix{T}(undef, mA, mA)
 
-    if iszero(_add.beta) || issymmetric(C)
-        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+    if iszero(β) || issymmetric(C)
+        alpha, beta = promote(α, β, zero(T))
         if (alpha isa Union{Bool,T} &&
-            beta isa Union{Bool,T} &&
-            stride(A, 1) == stride(C, 1) == 1 &&
-            stride(A, 2) >= size(A, 1) &&
-            stride(C, 2) >= size(C, 1))
+                beta isa Union{Bool,T} &&
+                stride(A, 1) == stride(C, 1) == 1 &&
+                _fullstride2(A) && _fullstride2(C))
             return copytri!(BLAS.herk!('U', tA, alpha, A, beta, C), 'U', true)
         end
     end
-    return gemm_wrapper!(C, tA, tAt, A, A, _add)
+    return gemm_wrapper!(C, tA, tAt, A, A, α, β)
 end
-
-function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
+# legacy method
+herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
+        _add::MulAddMul = MulAddMul()) where {T<:BlasReal} =
+    herk_wrapper!(C, tA, A, _add.alpha, _add.beta)
+
+# Aggressive constprop helps propagate the values of tA and tB into wrap, which
+# makes the calls concretely inferred
+Base.@constprop :aggressive function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
                       A::StridedVecOrMat{T},
                       B::StridedVecOrMat{T}) where {T<:BlasFloat}
     mA, nA = lapack_size(tA, A)
     mB, nB = lapack_size(tB, B)
     C = similar(B, T, mA, nB)
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        gemm_wrapper!(C, tA, tB, A, B)
+    # We convert the chars to uppercase to potentially unwrap a WrapperChar,
+    # and extract the char corresponding to the wrapper type
+    tA_uc, tB_uc = uppercase(tA), uppercase(tB)
+    # the map in all ensures constprop by acting on tA and tB individually, instead of looping over them.
+    if all(map(in(('N', 'T', 'C')), (tA_uc, tB_uc)))
+        gemm_wrapper!(C, tA, tB, A, B, true, false)
     else
-        _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
+        _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul())
     end
 end
 
-function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
+# Aggressive constprop helps propagate the values of tA and tB into wrap, which
+# makes the calls concretely inferred
+Base.@constprop :aggressive function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
                        A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                       _add = MulAddMul()) where {T<:BlasFloat}
+                       α::Number, β::Number) where {T<:BlasFloat}
     mA, nA = lapack_size(tA, A)
     mB, nB = lapack_size(tB, B)
 
@@ -608,35 +694,25 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
         throw(ArgumentError("output matrix must not be aliased with input matrix"))
     end
 
-    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
-        if size(C) != (mA, nB)
-            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
-        end
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    if mA == 2 && nA == 2 && nB == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == 3 && nA == 3 && nB == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+    alpha, beta = promote(α, β, zero(T))
     if (alpha isa Union{Bool,T} &&
-        beta isa Union{Bool,T} &&
-        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
-        stride(A, 2) >= size(A, 1) &&
-        stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1))
+            beta isa Union{Bool,T} &&
+            stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
+            _fullstride2(A) && _fullstride2(B) && _fullstride2(C))
         return BLAS.gemm!(tA, tB, alpha, A, B, beta, C)
     end
-    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(α, β))
 end
-
-function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
+# legacy method
+gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
+        A::StridedVecOrMat{T}, B::StridedVecOrMat{T}, _add::MulAddMul = MulAddMul()) where {T<:BlasFloat} =
+    gemm_wrapper!(C, tA, tB, A, B, _add.alpha, _add.beta)
+
+# Aggressive constprop helps propagate the values of tA and tB into wrap, which
+# makes the calls concretely inferred
+Base.@constprop :aggressive function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
                        A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                       _add = MulAddMul()) where {T<:BlasReal}
+                       α::Number, β::Number) where {T<:BlasReal}
     mA, nA = lapack_size(tA, A)
     mB, nB = lapack_size(tB, B)
 
@@ -648,34 +724,24 @@ function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::Abs
         throw(ArgumentError("output matrix must not be aliased with input matrix"))
     end
 
-    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
-        if size(C) != (mA, nB)
-            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
-        end
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    if mA == 2 && nA == 2 && nB == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == 3 && nA == 3 && nB == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
+    alpha, beta = promote(α, β, zero(T))
 
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
 
     # Make-sure reinterpret-based optimization is BLAS-compatible.
     if (alpha isa Union{Bool,T} &&
-        beta isa Union{Bool,T} &&
-        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
-        stride(A, 2) >= size(A, 1) &&
-        stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1) && tA == 'N')
+            beta isa Union{Bool,T} &&
+            stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
+            _fullstride2(A) && _fullstride2(B) && _fullstride2(C) && tA_uc == 'N')
         BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C))
         return C
     end
-    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(α, β))
 end
+# legacy method
+gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
+        A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, _add::MulAddMul = MulAddMul()) where {T<:BlasReal} =
+    gemm_wrapper!(C, tA, tB, A, B, _add.alpha, _add.beta)
 
 # blas.jl defines matmul for floats; other integer and mixed precision
 # cases are handled here
@@ -703,9 +769,10 @@ parameters must satisfy `length(ir_dest) == length(ir_src)` and
 See also [`copy_transpose!`](@ref) and [`copy_adjoint!`](@ref).
 """
 function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
-    if tM == 'N'
+    tM_uc = uppercase(tM) # potentially convert a WrapperChar to a Char
+    if tM_uc == 'N'
         copyto!(B, ir_dest, jr_dest, M, ir_src, jr_src)
-    elseif tM == 'T'
+    elseif tM_uc == 'T'
         copy_transpose!(B, ir_dest, jr_dest, M, jr_src, ir_src)
     else
         copy_adjoint!(B, ir_dest, jr_dest, M, jr_src, ir_src)
@@ -734,11 +801,12 @@ range parameters must satisfy `length(ir_dest) == length(jr_src)` and
 See also [`copyto!`](@ref) and [`copy_adjoint!`](@ref).
 """
 function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
-    if tM == 'N'
+    tM_uc = uppercase(tM) # potentially convert a WrapperChar to a Char
+    if tM_uc == 'N'
         copy_transpose!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
         copyto!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
+        tM_uc == 'C' && conj!(@view B[ir_dest, jr_dest])
     end
     B
 end
@@ -749,9 +817,12 @@ end
 # NOTE: the generic version is also called as fallback for
 #       strides != 1 cases
 
+generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, alpha::Number, beta::Number) =
+    @stable_muladdmul generic_matvecmul!(C, tA, A, B, MulAddMul(alpha, beta))
 @inline function generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
                                     _add::MulAddMul = MulAddMul())
-    Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
+    tA_uc = uppercase(tA) # potentially convert a WrapperChar to a Char
+    Anew, ta = tA_uc in ('S', 'H') ? (wrap(A, tA), oftype(tA, 'N')) : (A, tA)
     return _generic_matvecmul!(C, ta, Anew, B, _add)
 end
 
@@ -779,7 +850,8 @@ function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::Abst
         else
             for k = 1:mA
                 aoffs = (k-1)*Astride
-                s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
+                firstterm = transpose(A[aoffs + 1])*B[1]
+                s = zero(firstterm + firstterm)
                 for i = 1:nA
                     s += transpose(A[aoffs+i]) * B[i]
                 end
@@ -794,7 +866,8 @@ function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::Abst
         else
             for k = 1:mA
                 aoffs = (k-1)*Astride
-                s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
+                firstterm = A[aoffs + 1]'B[1]
+                s = zero(firstterm + firstterm)
                 for i = 1:nA
                     s += A[aoffs + i]'B[i]
                 end
@@ -827,12 +900,15 @@ function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S})
     mA, nA = lapack_size(tA, A)
     mB, nB = lapack_size(tB, B)
     C = similar(B, promote_op(matprod, T, S), mA, nB)
-    generic_matmatmul!(C, tA, tB, A, B)
+    generic_matmatmul!(C, tA, tB, A, B, true, false)
 end
 
 # aggressive const prop makes mixed eltype mul!(C, A, B) invoke _generic_matmatmul! directly
-Base.@constprop :aggressive generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) =
+# legacy method
+Base.@constprop :aggressive generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul = MulAddMul()) =
     _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
+Base.@constprop :aggressive generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) =
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(α, β))
 
 @noinline function _generic_matmatmul!(C::AbstractVecOrMat{R}, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S},
                              _add::MulAddMul) where {T,S,R}
@@ -894,162 +970,156 @@ end
 
 
 # multiply 2x2 matrices
-function matmul2x2(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
+Base.@constprop :aggressive function matmul2x2(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
     matmul2x2!(similar(B, promote_op(matprod, T, S), 2, 2), tA, tB, A, B)
 end
 
-function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                    _add::MulAddMul = MulAddMul())
+function __matmul_checks(C, A, B, sz)
     require_one_based_indexing(C, A, B)
-    if !(size(A) == size(B) == size(C) == (2,2))
+    if C === A || B === C
+        throw(ArgumentError("output matrix must not be aliased with input matrix"))
+    end
+    if !(size(A) == size(B) == size(C) == sz)
         throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
     end
+    return nothing
+end
+
+# separate function with the core of matmul2x2! that doesn't depend on a MulAddMul
+Base.@constprop :aggressive function _matmul2x2_elements(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix)
+    __matmul_checks(C, A, B, (2,2))
+    __matmul2x2_elements(tA, tB, A, B)
+end
+Base.@constprop :aggressive function __matmul2x2_elements(tA, A::AbstractMatrix)
     @inbounds begin
-    if tA == 'N'
+    tA_uc = uppercase(tA) # possibly unwrap a WrapperChar
+    if tA_uc == 'N'
         A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2]
-    elseif tA == 'T'
+    elseif tA_uc == 'T'
         # TODO making these lazy could improve perf
         A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1]))
         A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2]))
-    elseif tA == 'C'
+    elseif tA_uc == 'C'
         # TODO making these lazy could improve perf
         A11 = copy(A[1,1]'); A12 = copy(A[2,1]')
         A21 = copy(A[1,2]'); A22 = copy(A[2,2]')
-    elseif tA == 'S'
-        A11 = symmetric(A[1,1], :U); A12 = A[1,2]
-        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U)
-    elseif tA == 's'
-        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1]))
-        A21 = A[2,1]; A22 = symmetric(A[2,2], :L)
-    elseif tA == 'H'
-        A11 = hermitian(A[1,1], :U); A12 = A[1,2]
-        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U)
-    else # if tA == 'h'
-        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1]))
-        A21 = A[2,1]; A22 = hermitian(A[2,2], :L)
-    end
-    if tB == 'N'
-        B11 = B[1,1]; B12 = B[1,2];
-        B21 = B[2,1]; B22 = B[2,2]
-    elseif tB == 'T'
-        # TODO making these lazy could improve perf
-        B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1]))
-        B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2]))
-    elseif tB == 'C'
-        # TODO making these lazy could improve perf
-        B11 = copy(B[1,1]'); B12 = copy(B[2,1]')
-        B21 = copy(B[1,2]'); B22 = copy(B[2,2]')
-    elseif tB == 'S'
-        B11 = symmetric(B[1,1], :U); B12 = B[1,2]
-        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U)
-    elseif tB == 's'
-        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1]))
-        B21 = B[2,1]; B22 = symmetric(B[2,2], :L)
-    elseif tB == 'H'
-        B11 = hermitian(B[1,1], :U); B12 = B[1,2]
-        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U)
-    else # if tB == 'h'
-        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1]))
-        B21 = B[2,1]; B22 = hermitian(B[2,2], :L)
+    elseif tA_uc == 'S'
+        if isuppercase(tA) # tA == 'S'
+            A11 = symmetric(A[1,1], :U); A12 = A[1,2]
+            A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U)
+        else
+            A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1]))
+            A21 = A[2,1]; A22 = symmetric(A[2,2], :L)
+        end
+    elseif tA_uc == 'H'
+        if isuppercase(tA) # tA == 'H'
+            A11 = hermitian(A[1,1], :U); A12 = A[1,2]
+            A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U)
+        else # if tA == 'h'
+            A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1]))
+            A21 = A[2,1]; A22 = hermitian(A[2,2], :L)
+        end
     end
+    end # inbounds
+    A11, A12, A21, A22
+end
+Base.@constprop :aggressive __matmul2x2_elements(tA, tB, A, B) = __matmul2x2_elements(tA, A), __matmul2x2_elements(tB, B)
+
+function _modify2x2!(Aelements, Belements, C, _add)
+    (A11, A12, A21, A22), (B11, B12, B21, B22) = Aelements, Belements
+    @inbounds begin
     _modify!(_add, A11*B11 + A12*B21, C, (1,1))
-    _modify!(_add, A11*B12 + A12*B22, C, (1,2))
     _modify!(_add, A21*B11 + A22*B21, C, (2,1))
+    _modify!(_add, A11*B12 + A12*B22, C, (1,2))
     _modify!(_add, A21*B12 + A22*B22, C, (2,2))
     end # inbounds
     C
 end
+Base.@constprop :aggressive function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
+                    α = true, β = false)
+    Aelements, Belements = _matmul2x2_elements(C, tA, tB, A, B)
+    @stable_muladdmul _modify2x2!(Aelements, Belements, C, MulAddMul(α, β))
+    C
+end
 
 # Multiply 3x3 matrices
-function matmul3x3(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
+Base.@constprop :aggressive function matmul3x3(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
     matmul3x3!(similar(B, promote_op(matprod, T, S), 3, 3), tA, tB, A, B)
 end
 
-function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                    _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    if !(size(A) == size(B) == size(C) == (3,3))
-        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
-    end
+# separate function with the core of matmul3x3! that doesn't depend on a MulAddMul
+Base.@constprop :aggressive function _matmul3x3_elements(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix)
+    __matmul_checks(C, A, B, (3,3))
+    __matmul3x3_elements(tA, tB, A, B)
+end
+Base.@constprop :aggressive function __matmul3x3_elements(tA, A::AbstractMatrix)
     @inbounds begin
-    if tA == 'N'
+    tA_uc = uppercase(tA) # possibly unwrap a WrapperChar
+    if tA_uc == 'N'
         A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3]
         A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3]
         A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3]
-    elseif tA == 'T'
+    elseif tA_uc == 'T'
         # TODO making these lazy could improve perf
         A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
         A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])); A23 = copy(transpose(A[3,2]))
         A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = copy(transpose(A[3,3]))
-    elseif tA == 'C'
+    elseif tA_uc == 'C'
         # TODO making these lazy could improve perf
         A11 = copy(A[1,1]'); A12 = copy(A[2,1]'); A13 = copy(A[3,1]')
         A21 = copy(A[1,2]'); A22 = copy(A[2,2]'); A23 = copy(A[3,2]')
         A31 = copy(A[1,3]'); A32 = copy(A[2,3]'); A33 = copy(A[3,3]')
-    elseif tA == 'S'
-        A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
-        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3]
-        A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U)
-    elseif tA == 's'
-        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
-        A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2]))
-        A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L)
-    elseif tA == 'H'
-        A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
-        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3]
-        A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U)
-    else # if tA == 'h'
-        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1]))
-        A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2]))
-        A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L)
-    end
-
-    if tB == 'N'
-        B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3]
-        B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3]
-        B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3]
-    elseif tB == 'T'
-        # TODO making these lazy could improve perf
-        B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
-        B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])); B23 = copy(transpose(B[3,2]))
-        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = copy(transpose(B[3,3]))
-    elseif tB == 'C'
-        # TODO making these lazy could improve perf
-        B11 = copy(B[1,1]'); B12 = copy(B[2,1]'); B13 = copy(B[3,1]')
-        B21 = copy(B[1,2]'); B22 = copy(B[2,2]'); B23 = copy(B[3,2]')
-        B31 = copy(B[1,3]'); B32 = copy(B[2,3]'); B33 = copy(B[3,3]')
-    elseif tB == 'S'
-        B11 = symmetric(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
-        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U); B23 = B[2,3]
-        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = symmetric(B[3,3], :U)
-    elseif tB == 's'
-        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
-        B21 = B[2,1]; B22 = symmetric(B[2,2], :L); B23 = copy(transpose(B[3,2]))
-        B31 = B[3,1]; B32 = B[3,2]; B33 = symmetric(B[3,3], :L)
-    elseif tB == 'H'
-        B11 = hermitian(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
-        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U); B23 = B[2,3]
-        B31 = copy(adjoint(B[1,3])); B32 = copy(adjoint(B[2,3])); B33 = hermitian(B[3,3], :U)
-    else # if tB == 'h'
-        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])); B13 = copy(adjoint(B[3,1]))
-        B21 = B[2,1]; B22 = hermitian(B[2,2], :L); B23 = copy(adjoint(B[3,2]))
-        B31 = B[3,1]; B32 = B[3,2]; B33 = hermitian(B[3,3], :L)
+    elseif tA_uc == 'S'
+        if isuppercase(tA) # tA == 'S'
+            A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
+            A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3]
+            A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U)
+        else
+            A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
+            A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2]))
+            A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L)
+        end
+    elseif tA_uc == 'H'
+        if isuppercase(tA) # tA == 'H'
+            A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
+            A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3]
+            A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U)
+        else # if tA == 'h'
+            A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1]))
+            A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2]))
+            A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L)
+        end
     end
+    end # inbounds
+    A11, A12, A13, A21, A22, A23, A31, A32, A33
+end
+Base.@constprop :aggressive __matmul3x3_elements(tA, tB, A, B) = __matmul3x3_elements(tA, A), __matmul3x3_elements(tB, B)
 
+function _modify3x3!(Aelements, Belements, C, _add)
+    (A11, A12, A13, A21, A22, A23, A31, A32, A33),
+        (B11, B12, B13, B21, B22, B23, B31, B32, B33) = Aelements, Belements
+    @inbounds begin
     _modify!(_add, A11*B11 + A12*B21 + A13*B31, C, (1,1))
-    _modify!(_add, A11*B12 + A12*B22 + A13*B32, C, (1,2))
-    _modify!(_add, A11*B13 + A12*B23 + A13*B33, C, (1,3))
-
     _modify!(_add, A21*B11 + A22*B21 + A23*B31, C, (2,1))
-    _modify!(_add, A21*B12 + A22*B22 + A23*B32, C, (2,2))
-    _modify!(_add, A21*B13 + A22*B23 + A23*B33, C, (2,3))
-
     _modify!(_add, A31*B11 + A32*B21 + A33*B31, C, (3,1))
+
+    _modify!(_add, A11*B12 + A12*B22 + A13*B32, C, (1,2))
+    _modify!(_add, A21*B12 + A22*B22 + A23*B32, C, (2,2))
     _modify!(_add, A31*B12 + A32*B22 + A33*B32, C, (3,2))
+
+    _modify!(_add, A11*B13 + A12*B23 + A13*B33, C, (1,3))
+    _modify!(_add, A21*B13 + A22*B23 + A23*B33, C, (2,3))
     _modify!(_add, A31*B13 + A32*B23 + A33*B33, C, (3,3))
     end # inbounds
     C
 end
+Base.@constprop :aggressive function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
+                    α = true, β = false)
+
+    Aelements, Belements = _matmul3x3_elements(C, tA, tB, A, B)
+    @stable_muladdmul _modify3x3!(Aelements, Belements, C, MulAddMul(α, β))
+    C
+end
 
 const RealOrComplex = Union{Real,Complex}
 
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 782e4778c56c9..9a89e58372d08 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -417,7 +417,7 @@ true
     `qr` returns multiple types because LAPACK uses several representations
     that minimize the memory storage requirements of products of Householder
     elementary reflectors, so that the `Q` and `R` matrices can be stored
-    compactly rather as two separate dense matrices.
+    compactly rather than two separate dense matrices.
 """
 function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
     require_one_based_indexing(A)
@@ -535,13 +535,20 @@ function ldiv!(A::QRCompactWY{T}, B::AbstractMatrix{T}) where {T}
     return B
 end
 
+function rank(A::QRPivoted; atol::Real=0, rtol::Real=min(size(A)...) * eps(real(float(one(eltype(A.Q))))) * iszero(atol))
+    m = min(size(A)...)
+    m == 0 && return 0
+    tol = max(atol, rtol*abs(A.R[1,1]))
+    return something(findfirst(i -> abs(A.R[i,i]) <= tol, 1:m), m+1) - 1
+end
+
 # Julia implementation similar to xgelsy
 function ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}, rcond::Real) where {T<:BlasFloat}
     require_one_based_indexing(B)
     m, n = size(A)
 
     if m > size(B, 1) || n > size(B, 1)
-        throw(DimensionMismatch("B has leading dimension $(size(B, 1)) but needs at least $(max(m, n))"))
+        throw(DimensionMismatch(lazy"B has leading dimension $(size(B, 1)) but needs at least $(max(m, n))"))
     end
 
     if length(A.factors) == 0 || length(B) == 0
@@ -734,7 +741,7 @@ _ret_size(A::Factorization, B::AbstractMatrix) = (max(size(A, 2), size(B, 1)), s
 function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{T}}) where T<:BlasReal
     require_one_based_indexing(BIn)
     m, n = size(A)
-    m == size(BIn, 1) || throw(DimensionMismatch("left hand side has $m rows, but right hand side has $(size(BIn,1)) rows"))
+    m == size(BIn, 1) || throw(DimensionMismatch(lazy"left hand side has $m rows, but right hand side has $(size(BIn,1)) rows"))
 
 # |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
 # |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 136f566f68447..5a7c98cfdf32c 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -15,20 +15,25 @@ Diagonal(A::Bidiagonal) = Diagonal(A.dv)
 SymTridiagonal(A::Bidiagonal) =
     iszero(A.ev) ? SymTridiagonal(A.dv, A.ev) :
         throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
-Tridiagonal(A::Bidiagonal) =
-    Tridiagonal(A.uplo == 'U' ? fill!(similar(A.ev), 0) : A.ev, A.dv,
-                A.uplo == 'U' ? A.ev : fill!(similar(A.ev), 0))
+function Tridiagonal(A::Bidiagonal)
+    # ensure that the types are identical, even if zero returns a different type
+    z = oftype(A.ev, zero(A.ev))
+    Tridiagonal(A.uplo == 'U' ? z : A.ev, A.dv, A.uplo == 'U' ? A.ev : z)
+end
+
+_diagview(S::SymTridiagonal{<:Number}) = S.dv
+_diagview(S::SymTridiagonal) = view(S, diagind(S, IndexStyle(S)))
 
 # conversions from SymTridiagonal to other special matrix types
-Diagonal(A::SymTridiagonal) = Diagonal(A.dv)
+Diagonal(A::SymTridiagonal) = Diagonal(_diagview(A))
 
 # These can fail when ev has the same length as dv
 # TODO: Revisit when a good solution for #42477 is found
-Bidiagonal(A::SymTridiagonal) =
+Bidiagonal(A::SymTridiagonal{<:Number}) =
     iszero(A.ev) ? Bidiagonal(A.dv, A.ev, :U) :
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-Tridiagonal(A::SymTridiagonal) =
-    Tridiagonal(copy(A.ev), A.dv, A.ev)
+Tridiagonal(A::SymTridiagonal{<:Number}) =
+    Tridiagonal(A.ev, A.dv, A.ev)
 
 # conversions from Tridiagonal to other special matrix types
 Diagonal(A::Tridiagonal) = Diagonal(A.d)
@@ -109,9 +114,9 @@ end
 
 # disambiguation between triangular and banded matrices, banded ones "dominate"
 _mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
-    _mul!(C, A, B, MulAddMul(alpha, beta))
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
 _mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular, alpha::Number, beta::Number) =
-    _mul!(C, A, B, MulAddMul(alpha, beta))
+    @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
 
 function *(H::UpperHessenberg, B::Bidiagonal)
     T = promote_op(matprod, eltype(H), eltype(B))
@@ -161,26 +166,45 @@ function (-)(A::Diagonal, B::Bidiagonal)
     Bidiagonal(newdv, typeof(newdv)(-B.ev), B.uplo)
 end
 
+# Return a SymTridiagonal if the elements of `newdv` are
+# statically known to be symmetric. Return a Tridiagonal otherwise
+function _symtri_or_tri(dl, d, du)
+    new_du = oftype(d, du)
+    new_dl = oftype(d, dl)
+    if symmetric_type(eltype(d)) == eltype(d)
+        SymTridiagonal(d, new_du)
+    else
+        Tridiagonal(new_dl, d, new_du)
+    end
+end
+
 @commutative function (+)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag + B.dv
-    SymTridiagonal(A.diag + B.dv, typeof(newdv)(B.ev))
+    newdv = A.diag + _diagview(B)
+    _symtri_or_tri(_evview_transposed(B), newdv, _evview(B))
 end
 
 function (-)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag - B.dv
-    SymTridiagonal(newdv, typeof(newdv)(-B.ev))
+    newdv = A.diag - _diagview(B)
+    _symtri_or_tri(-_evview_transposed(B), newdv, -_evview(B))
 end
 
 function (-)(A::SymTridiagonal, B::Diagonal)
-    newdv = A.dv - B.diag
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
+    newdv = _diagview(A) - B.diag
+    _symtri_or_tri(_evview_transposed(A), newdv, _evview(A))
 end
 
 # this set doesn't have the aforementioned problem
-
-@commutative (+)(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
--(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
--(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
+_evview_transposed(S::SymTridiagonal{<:Number}) = _evview(S)
+_evview_transposed(S::SymTridiagonal) = transpose.(_evview(S))
+@commutative function (+)(A::Tridiagonal, B::SymTridiagonal)
+    Tridiagonal(A.dl+_evview_transposed(B), A.d+_diagview(B), A.du+_evview(B))
+end
+function -(A::Tridiagonal, B::SymTridiagonal)
+    Tridiagonal(A.dl-_evview_transposed(B), A.d-_diagview(B), A.du-_evview(B))
+end
+function -(A::SymTridiagonal, B::Tridiagonal)
+    Tridiagonal(_evview_transposed(A)-B.dl, _diagview(A)-B.d, _evview(A)-B.du)
+end
 
 @commutative function (+)(A::Diagonal, B::Tridiagonal)
     newdv = A.diag + B.d
@@ -213,18 +237,18 @@ function (-)(A::Tridiagonal, B::Bidiagonal)
 end
 
 @commutative function (+)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv + B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
+    newdv = A.dv + _diagview(B)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview_transposed(B)), newdv, A.ev+_evview(B)) : (A.ev+_evview_transposed(B), newdv, typeof(newdv)(_evview(B))))...)
 end
 
 function (-)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
+    newdv = A.dv - _diagview(B)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview_transposed(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview_transposed(B), newdv, typeof(newdv)(-_evview(B))))...)
 end
 
 function (-)(A::SymTridiagonal, B::Bidiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
+    newdv = _diagview(A) - B.dv
+    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview_transposed(A)), newdv, _evview(A)-B.ev) : (_evview_transposed(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
 @commutative function (+)(A::Tridiagonal, B::UniformScaling)
@@ -254,7 +278,7 @@ function (-)(A::UniformScaling, B::Tridiagonal)
 end
 function (-)(A::UniformScaling, B::SymTridiagonal)
     dv = Ref(A) .- B.dv
-    SymTridiagonal(dv, convert(typeof(dv), -B.ev))
+    SymTridiagonal(dv, convert(typeof(dv), -_evview(B)))
 end
 function (-)(A::UniformScaling, B::Bidiagonal)
     dv = Ref(A) .- B.dv
@@ -264,6 +288,25 @@ function (-)(A::UniformScaling, B::Diagonal)
     Diagonal(Ref(A) .- B.diag)
 end
 
+for f in (:+, :-)
+    @eval function $f(D::Diagonal{<:Number}, S::Symmetric)
+        uplo = sym_uplo(S.uplo)
+        return Symmetric(parentof_applytri($f, Symmetric(D, uplo), S), uplo)
+    end
+    @eval function $f(S::Symmetric, D::Diagonal{<:Number})
+        uplo = sym_uplo(S.uplo)
+        return Symmetric(parentof_applytri($f, S, Symmetric(D, uplo)), uplo)
+    end
+    @eval function $f(D::Diagonal{<:Real}, H::Hermitian)
+        uplo = sym_uplo(H.uplo)
+        return Hermitian(parentof_applytri($f, Hermitian(D, uplo), H), uplo)
+    end
+    @eval function $f(H::Hermitian, D::Diagonal{<:Real})
+        uplo = sym_uplo(H.uplo)
+        return Hermitian(parentof_applytri($f, H, Hermitian(D, uplo)), uplo)
+    end
+end
+
 ## Diagonal construction from UniformScaling
 Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
 Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
@@ -284,10 +327,17 @@ _small_enough(A::Union{Diagonal, Bidiagonal}) = size(A, 1) <= 1
 _small_enough(A::Tridiagonal) = size(A, 1) <= 2
 _small_enough(A::SymTridiagonal) = size(A, 1) <= 2
 
-function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, x)
+function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal}, x)
+    xT = convert(eltype(A), x)
+    (iszero(xT) || _small_enough(A)) && return fillstored!(A, xT)
+    throw(ArgumentError(lazy"array of type $(typeof(A)) and size $(size(A)) can
+    not be filled with $x, since some of its entries are constrained."))
+end
+function fill!(A::SymTridiagonal, x)
+    issymmetric(x) || throw(ArgumentError("cannot fill a SymTridiagonal with an asymmetric value"))
     xT = convert(eltype(A), x)
     (iszero(xT) || _small_enough(A)) && return fillstored!(A, xT)
-    throw(ArgumentError("array of type $(typeof(A)) and size $(size(A)) can
+    throw(ArgumentError(lazy"array of type $(typeof(A)) and size $(size(A)) can
     not be filled with $x, since some of its entries are constrained."))
 end
 
@@ -307,11 +357,97 @@ isdiag(A::HermOrSym{<:Any,<:Diagonal}) = isdiag(parent(A))
 dot(x::AbstractVector, A::RealHermSymComplexSym{<:Real,<:Diagonal}, y::AbstractVector) =
     dot(x, A.data, y)
 
+# O(N) implementations using the banded structure
+function copyto!(dest::BandedMatrix, src::BandedMatrix)
+    if axes(dest) == axes(src)
+        _copyto_banded!(dest, src)
+    else
+        @invoke copyto!(dest::AbstractMatrix, src::AbstractMatrix)
+    end
+    return dest
+end
+function _copyto_banded!(T::Tridiagonal, D::Diagonal)
+    T.d .= D.diag
+    T.dl .= view(D, diagind(D, -1, IndexStyle(D)))
+    T.du .= view(D, diagind(D,  1, IndexStyle(D)))
+    return T
+end
+function _copyto_banded!(SymT::SymTridiagonal, D::Diagonal)
+    issymmetric(D) || throw(ArgumentError("cannot copy a non-symmetric Diagonal matrix to a SymTridiagonal"))
+    SymT.dv .= D.diag
+    _ev = _evview(SymT)
+    _ev .= view(D, diagind(D,  1, IndexStyle(D)))
+    return SymT
+end
+function _copyto_banded!(B::Bidiagonal, D::Diagonal)
+    B.dv .= D.diag
+    B.ev .= view(D, diagind(D,  B.uplo == 'U' ? 1 : -1, IndexStyle(D)))
+    return B
+end
+function _copyto_banded!(D::Diagonal, B::Bidiagonal)
+    isdiag(B) ||
+        throw(ArgumentError("cannot copy a Bidiagonal with a non-zero off-diagonal band to a Diagonal"))
+    D.diag .= B.dv
+    return D
+end
+function _copyto_banded!(D::Diagonal, T::Tridiagonal)
+    isdiag(T) ||
+        throw(ArgumentError("cannot copy a Tridiagonal with a non-zero off-diagonal band to a Diagonal"))
+    D.diag .= T.d
+    return D
+end
+function _copyto_banded!(D::Diagonal, SymT::SymTridiagonal)
+    isdiag(SymT) ||
+        throw(ArgumentError("cannot copy a SymTridiagonal with a non-zero off-diagonal band to a Diagonal"))
+    # we broadcast identity for numbers using the fact that symmetric(x::Number) = x
+    # this potentially allows us to access faster copyto! paths
+    _symmetric = eltype(SymT) <: Number ? identity : symmetric
+    D.diag .= _symmetric.(SymT.dv)
+    return D
+end
+function _copyto_banded!(T::Tridiagonal, B::Bidiagonal)
+    T.d .= B.dv
+    if B.uplo == 'U'
+        T.du .= B.ev
+        T.dl .= view(B, diagind(B, -1, IndexStyle(B)))
+    else
+        T.dl .= B.ev
+        T.du .= view(B, diagind(B,  1, IndexStyle(B)))
+    end
+    return T
+end
+function _copyto_banded!(SymT::SymTridiagonal, B::Bidiagonal)
+    issymmetric(B) || throw(ArgumentError("cannot copy a non-symmetric Bidiagonal matrix to a SymTridiagonal"))
+    SymT.dv .= B.dv
+    _ev = _evview(SymT)
+    _ev .= B.ev
+    return SymT
+end
+function _copyto_banded!(B::Bidiagonal, T::Tridiagonal)
+    if B.uplo == 'U' && !iszero(T.dl)
+        throw(ArgumentError("cannot copy a Tridiagonal with a non-zero subdiagonal to a Bidiagonal with uplo=:U"))
+    elseif B.uplo == 'L' && !iszero(T.du)
+        throw(ArgumentError("cannot copy a Tridiagonal with a non-zero superdiagonal to a Bidiagonal with uplo=:L"))
+    end
+    B.dv .= T.d
+    B.ev .= B.uplo == 'U' ? T.du : T.dl
+    return B
+end
+function _copyto_banded!(B::Bidiagonal, SymT::SymTridiagonal)
+    isdiag(SymT) ||
+        throw(ArgumentError("cannot copy a SymTridiagonal with a non-zero off-diagonal band to a Bidiagonal"))
+    # we broadcast identity for numbers using the fact that symmetric(x::Number) = x
+    # this potentially allows us to access faster copyto! paths
+    _symmetric = eltype(SymT) <: Number ? identity : symmetric
+    B.dv .= _symmetric.(SymT.dv)
+    return B
+end
+
 # equals and approx equals methods for structured matrices
 # SymTridiagonal == Tridiagonal is already defined in tridiag.jl
 
 ==(A::Diagonal, B::Bidiagonal) = iszero(B.ev) && A.diag == B.dv
-==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == B.dv
+==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == _diagview(B)
 ==(B::Bidiagonal, A::Diagonal) = A == B
 ==(A::Diagonal, B::Tridiagonal) = iszero(B.dl) && iszero(B.du) && A.diag == B.d
 ==(B::Tridiagonal, A::Diagonal) = A == B
@@ -325,7 +461,7 @@ function ==(A::Bidiagonal, B::Tridiagonal)
 end
 ==(B::Tridiagonal, A::Bidiagonal) = A == B
 
-==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
+==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == _diagview(B)
 ==(B::SymTridiagonal, A::Bidiagonal) = A == B
 
 # TODO: remove these deprecations (used by SparseArrays in the past)
@@ -446,7 +582,11 @@ end
 
 # factorizations
 function cholesky(S::RealHermSymComplexHerm{<:Real,<:SymTridiagonal}, ::NoPivot = NoPivot(); check::Bool = true)
-    T = choltype(eltype(S))
+    T = choltype(S)
     B = Bidiagonal{T}(diag(S, 0), diag(S, S.uplo == 'U' ? 1 : -1), sym_uplo(S.uplo))
     cholesky!(Hermitian(B, sym_uplo(S.uplo)), NoPivot(); check = check)
 end
+
+# istriu/istril for triangular wrappers of structured matrices
+_istril(A::LowerTriangular{<:Any, <:BandedMatrix}, k) = istril(parent(A), k)
+_istriu(A::UpperTriangular{<:Any, <:BandedMatrix}, k) = istriu(parent(A), k)
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
index e5393bfc719f1..0c06f84116fc7 100644
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
@@ -8,8 +8,8 @@ struct StructuredMatrixStyle{T} <: Broadcast.AbstractArrayStyle{2} end
 StructuredMatrixStyle{T}(::Val{2}) where {T} = StructuredMatrixStyle{T}()
 StructuredMatrixStyle{T}(::Val{N}) where {T,N} = Broadcast.DefaultArrayStyle{N}()
 
-const StructuredMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,LowerTriangular,UnitLowerTriangular,UpperTriangular,UnitUpperTriangular}
-for ST in Base.uniontypes(StructuredMatrix)
+const StructuredMatrix{T} = Union{Diagonal{T},Bidiagonal{T},SymTridiagonal{T},Tridiagonal{T},LowerTriangular{T},UnitLowerTriangular{T},UpperTriangular{T},UnitUpperTriangular{T}}
+for ST in (Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,LowerTriangular,UnitLowerTriangular,UpperTriangular,UnitUpperTriangular)
     @eval Broadcast.BroadcastStyle(::Type{<:$ST}) = $(StructuredMatrixStyle{ST}())
 end
 
@@ -78,7 +78,7 @@ find_uplo(bc::Broadcasted) = mapfoldl(find_uplo, merge_uplos, Broadcast.cat_nest
 function structured_broadcast_alloc(bc, ::Type{Bidiagonal}, ::Type{ElType}, n) where {ElType}
     uplo = n > 0 ? find_uplo(bc) : 'U'
     n1 = max(n - 1, 0)
-    if uplo == 'T'
+    if count_structedmatrix(Bidiagonal, bc) > 1 && uplo == 'T'
         return Tridiagonal(Array{ElType}(undef, n1), Array{ElType}(undef, n), Array{ElType}(undef, n1))
     end
     return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n1), uplo)
@@ -96,7 +96,7 @@ structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) w
 structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
     UnitUpperTriangular(Array{ElType}(undef, n, n))
 structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} =
-    Matrix(Array{ElType}(undef, n, n))
+    Array{ElType}(undef, n, n)
 
 # A _very_ limited list of structure-preserving functions known at compile-time. This list is
 # derived from the formerly-implemented `broadcast` methods in 0.6. Note that this must
@@ -133,27 +133,50 @@ fails as `zero(::Tuple{Int})` is not defined. However,
 iszerodefined(::Type) = false
 iszerodefined(::Type{<:Number}) = true
 iszerodefined(::Type{<:AbstractArray{T}}) where T = iszerodefined(T)
+iszerodefined(::Type{<:UniformScaling{T}}) where T = iszerodefined(T)
+
+count_structedmatrix(T, bc::Broadcasted) = sum(Base.Fix2(isa, T), Broadcast.cat_nested(bc); init = 0)
+
+"""
+    fzeropreserving(bc) -> Bool
+
+Return true if the broadcasted function call evaluates to zero for structural zeros of the
+structured arguments.
+
+For trivial broadcasted values such as `bc::Number`, this reduces to `iszero(bc)`.
+"""
+function fzeropreserving(bc)
+    v = fzero(bc)
+    isnothing(v) && return false
+    v2 = something(v)
+    iszerodefined(typeof(v2)) ? iszero(v2) : isequal(v2, 0)
+end
 
-fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && (iszerodefined(typeof(v)) ? iszero(v) : v == 0))
 # Like sparse matrices, we assume that the zero-preservation property of a broadcasted
 # expression is stable.  We can test the zero-preservability by applying the function
 # in cases where all other arguments are known scalars against a zero from the structured
 # matrix. If any non-structured matrix argument is not a known scalar, we give up.
-fzero(x::Number) = x
-fzero(::Type{T}) where T = T
-fzero(r::Ref) = r[]
-fzero(t::Tuple{Any}) = t[1]
-fzero(S::StructuredMatrix) = zero(eltype(S))
-fzero(x) = missing
+fzero(x::Number) = Some(x)
+fzero(::Type{T}) where T = Some(T)
+fzero(r::Ref) = Some(r[])
+fzero(t::Tuple{Any}) = Some(only(t))
+fzero(S::StructuredMatrix) = Some(zero(eltype(S)))
+fzero(::StructuredMatrix{<:AbstractMatrix{T}}) where {T<:Number} = Some(haszero(T) ? zero(T)*I : nothing)
+fzero(x) = nothing
 function fzero(bc::Broadcast.Broadcasted)
     args = map(fzero, bc.args)
-    return any(ismissing, args) ? missing : bc.f(args...)
+    return any(isnothing, args) ? nothing : Some(bc.f(map(something, args)...))
 end
 
 function Base.similar(bc::Broadcasted{StructuredMatrixStyle{T}}, ::Type{ElType}) where {T,ElType}
     inds = axes(bc)
-    if isstructurepreserving(bc) || (fzeropreserving(bc) && !(T <: Union{SymTridiagonal,UnitLowerTriangular,UnitUpperTriangular}))
+    fzerobc = fzeropreserving(bc)
+    if isstructurepreserving(bc) || (fzerobc && !(T <: Union{SymTridiagonal,UnitLowerTriangular,UnitUpperTriangular}))
         return structured_broadcast_alloc(bc, T, ElType, length(inds[1]))
+    elseif fzerobc && T <: UnitLowerTriangular
+        return similar(convert(Broadcasted{StructuredMatrixStyle{LowerTriangular}}, bc), ElType)
+    elseif fzerobc && T <: UnitUpperTriangular
+        return similar(convert(Broadcasted{StructuredMatrixStyle{UpperTriangular}}, bc), ElType)
     end
     return similar(convert(Broadcasted{DefaultArrayStyle{ndims(bc)}}, bc), ElType)
 end
@@ -166,12 +189,25 @@ isvalidstructbc(dest::Bidiagonal, bc::Broadcasted{StructuredMatrixStyle{Bidiagon
     (size(dest, 1) < 2 || find_uplo(bc) == dest.uplo) &&
     (isstructurepreserving(bc) || fzeropreserving(bc))
 
+@inline function getindex(bc::Broadcasted, b::BandIndex)
+    @boundscheck checkbounds(bc, b)
+    @inbounds Broadcast._broadcast_getindex(bc, b)
+end
+
+function Broadcast.newindex(A::StructuredMatrix, b::BandIndex)
+    # we use the fact that a StructuredMatrix is square,
+    # and we apply newindex to both the axes at once to obtain the result
+    size(A,1) > 1 ? b : BandIndex(0, 1)
+end
+# All structured matrices are square, and therefore they only broadcast out if they are size (1, 1)
+Broadcast.newindex(D::StructuredMatrix, I::CartesianIndex{2}) = size(D) == (1,1) ? CartesianIndex(1,1) : I
+
 function copyto!(dest::Diagonal, bc::Broadcasted{<:StructuredMatrixStyle})
     isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.diag[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.diag[i] = @inbounds bc[BandIndex(0, i)]
     end
     return dest
 end
@@ -181,15 +217,15 @@ function copyto!(dest::Bidiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.dv[i] = @inbounds bc[BandIndex(0, i)]
     end
     if dest.uplo == 'U'
         for i = 1:size(dest, 1)-1
-            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+            dest.ev[i] = @inbounds bc[BandIndex(1, i)]
         end
     else
         for i = 1:size(dest, 1)-1
-            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
+            dest.ev[i] = @inbounds bc[BandIndex(-1, i)]
         end
     end
     return dest
@@ -200,11 +236,11 @@ function copyto!(dest::SymTridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.dv[i] = @inbounds bc[BandIndex(0, i)]
     end
     for i = 1:size(dest, 1)-1
-        v = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        v == (@inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
+        v = @inbounds bc[BandIndex(1, i)]
+        v == (@inbounds bc[BandIndex(-1, i)]) || throw(ArgumentError(lazy"broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
         dest.ev[i] = v
     end
     return dest
@@ -215,11 +251,13 @@ function copyto!(dest::Tridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.d[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.d[i] = @inbounds bc[BandIndex(0, i)]
+    end
+    for i = 1:size(dest, 1)-1
+        dest.du[i] = @inbounds bc[BandIndex(1, i)]
     end
     for i = 1:size(dest, 1)-1
-        dest.du[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        dest.dl[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
+        dest.dl[i] = @inbounds bc[BandIndex(-1, i)]
     end
     return dest
 end
@@ -230,7 +268,7 @@ function copyto!(dest::LowerTriangular, bc::Broadcasted{<:StructuredMatrixStyle}
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for j in axs[2]
         for i in j:axs[1][end]
-            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
+            @inbounds dest.data[i,j] = bc[CartesianIndex(i, j)]
         end
     end
     return dest
@@ -242,7 +280,7 @@ function copyto!(dest::UpperTriangular, bc::Broadcasted{<:StructuredMatrixStyle}
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for j in axs[2]
         for i in 1:j
-            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
+            @inbounds dest.data[i,j] = bc[CartesianIndex(i, j)]
         end
     end
     return dest
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
index dc8d717932be3..7a88c4a6e14c4 100644
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ b/stdlib/LinearAlgebra/src/svd.jl
@@ -149,8 +149,9 @@ and `V` is ``N \\times N``, while in the thin factorization `U` is ``M
 \\times K`` and `V` is ``N \\times K``, where ``K = \\min(M,N)`` is the
 number of singular values.
 
-If `alg = DivideAndConquer()` a divide-and-conquer algorithm is used to calculate the SVD.
-Another (typically slower but more accurate) option is `alg = QRIteration()`.
+`alg` specifies which algorithm and LAPACK method to use for SVD:
+- `alg = DivideAndConquer()` (default): Calls `LAPACK.gesdd!`.
+- `alg = QRIteration()`: Calls `LAPACK.gesvd!` (typically slower but more accurate) .
 
 !!! compat "Julia 1.3"
     The `alg` keyword argument requires Julia 1.3 or later.
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index 21047dad8fcd9..ab7b5ee031260 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -12,7 +12,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     end
 end
 """
-    Symmetric(A, uplo=:U)
+    Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
 
 Construct a `Symmetric` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
@@ -63,7 +63,7 @@ function Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
 end
 
 """
-    symmetric(A, uplo=:U)
+    symmetric(A, uplo::Symbol=:U)
 
 Construct a symmetric view of `A`. If `A` is a matrix, `uplo` controls whether the upper
 (if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
@@ -105,7 +105,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     end
 end
 """
-    Hermitian(A, uplo=:U)
+    Hermitian(A::AbstractMatrix, uplo::Symbol=:U)
 
 Construct a `Hermitian` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
@@ -153,7 +153,7 @@ function Hermitian(A::AbstractMatrix, uplo::Symbol=:U)
 end
 
 """
-    hermitian(A, uplo=:U)
+    hermitian(A, uplo::Symbol=:U)
 
 Construct a hermitian view of `A`. If `A` is a matrix, `uplo` controls whether the upper
 (if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
@@ -221,6 +221,7 @@ const HermOrSym{T,        S} = Union{Hermitian{T,S}, Symmetric{T,S}}
 const RealHermSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}}
 const RealHermSymComplexHerm{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Hermitian{Complex{T},S}}
 const RealHermSymComplexSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Symmetric{Complex{T},S}}
+const SelfAdjoint = Union{Symmetric{<:Real}, Hermitian{<:Number}}
 
 size(A::HermOrSym) = size(A.data)
 axes(A::HermOrSym) = axes(A.data)
@@ -233,7 +234,7 @@ axes(A::HermOrSym) = axes(A.data)
     end
 end
 
-@inline function getindex(A::Symmetric, i::Integer, j::Integer)
+@inline function getindex(A::Symmetric, i::Int, j::Int)
     @boundscheck checkbounds(A, i, j)
     @inbounds if i == j
         return symmetric(A.data[i, j], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
@@ -243,7 +244,7 @@ end
         return transpose(A.data[j, i])
     end
 end
-@inline function getindex(A::Hermitian, i::Integer, j::Integer)
+@inline function getindex(A::Hermitian, i::Int, j::Int)
     @boundscheck checkbounds(A, i, j)
     @inbounds if i == j
         return hermitian(A.data[i, j], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
@@ -254,11 +255,18 @@ end
     end
 end
 
+Base._reverse(A::Symmetric, dims::Integer) = reverse!(Matrix(A); dims)
+Base._reverse(A::Symmetric, ::Colon) = Symmetric(reverse(A.data), A.uplo == 'U' ? :L : :U)
+
 @propagate_inbounds function setindex!(A::Symmetric, v, i::Integer, j::Integer)
     i == j || throw(ArgumentError("Cannot set a non-diagonal index in a symmetric matrix"))
     setindex!(A.data, v, i, j)
+    return A
 end
 
+Base._reverse(A::Hermitian, dims) = reverse!(Matrix(A); dims)
+Base._reverse(A::Hermitian, ::Colon) = Hermitian(reverse(A.data), A.uplo == 'U' ? :L : :U)
+
 @propagate_inbounds function setindex!(A::Hermitian, v, i::Integer, j::Integer)
     if i != j
         throw(ArgumentError("Cannot set a non-diagonal index in a Hermitian matrix"))
@@ -267,8 +275,13 @@ end
     else
         setindex!(A.data, v, i, j)
     end
+    return A
 end
 
+Base.dataids(A::HermOrSym) = Base.dataids(parent(A))
+Base.unaliascopy(A::Hermitian) = Hermitian(Base.unaliascopy(parent(A)), sym_uplo(A.uplo))
+Base.unaliascopy(A::Symmetric) = Symmetric(Base.unaliascopy(parent(A)), sym_uplo(A.uplo))
+
 _conjugation(::Symmetric) = transpose
 _conjugation(::Hermitian) = adjoint
 
@@ -277,21 +290,21 @@ diag(A::Hermitian) = hermitian.(diag(parent(A)), sym_uplo(A.uplo))
 
 function applytri(f, A::HermOrSym)
     if A.uplo == 'U'
-        f(UpperTriangular(A.data))
+        f(uppertriangular(A.data))
     else
-        f(LowerTriangular(A.data))
+        f(lowertriangular(A.data))
     end
 end
 
 function applytri(f, A::HermOrSym, B::HermOrSym)
     if A.uplo == B.uplo == 'U'
-        f(UpperTriangular(A.data), UpperTriangular(B.data))
+        f(uppertriangular(A.data), uppertriangular(B.data))
     elseif A.uplo == B.uplo == 'L'
-        f(LowerTriangular(A.data), LowerTriangular(B.data))
+        f(lowertriangular(A.data), lowertriangular(B.data))
     elseif A.uplo == 'U'
-        f(UpperTriangular(A.data), UpperTriangular(_conjugation(B)(B.data)))
+        f(uppertriangular(A.data), uppertriangular(_conjugation(B)(B.data)))
     else # A.uplo == 'L'
-        f(UpperTriangular(_conjugation(A)(A.data)), UpperTriangular(B.data))
+        f(uppertriangular(_conjugation(A)(A.data)), uppertriangular(B.data))
     end
 end
 parentof_applytri(f, args...) = applytri(parent ∘ f, args...)
@@ -312,22 +325,6 @@ end
 # storage type of A (not wrapped in a symmetry type). The following method covers these cases.
 similar(A::Union{Symmetric,Hermitian}, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
 
-# Conversion
-function Matrix{T}(A::Symmetric) where {T}
-    B = copytri!(convert(Matrix{T}, copy(A.data)), A.uplo)
-    for i = 1:size(A, 1)
-        B[i,i] = symmetric(A[i,i], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
-    end
-    return B
-end
-function Matrix{T}(A::Hermitian) where {T}
-    B = copytri!(convert(Matrix{T}, copy(A.data)), A.uplo, true)
-    for i = 1:size(A, 1)
-        B[i,i] = hermitian(A[i,i], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
-    end
-    return B
-end
-
 parent(A::HermOrSym) = A.data
 Symmetric{T,S}(A::Symmetric{T,S}) where {T,S<:AbstractMatrix{T}} = A
 Symmetric{T,S}(A::Symmetric) where {T,S<:AbstractMatrix{T}} = Symmetric{T,S}(convert(S,A.data),A.uplo)
@@ -342,23 +339,55 @@ copy(A::Symmetric) = (Symmetric(parentof_applytri(copy, A), sym_uplo(A.uplo)))
 copy(A::Hermitian) = (Hermitian(parentof_applytri(copy, A), sym_uplo(A.uplo)))
 
 function copyto!(dest::Symmetric, src::Symmetric)
-    if src.uplo == dest.uplo
-        copyto!(dest.data, src.data)
+    if axes(dest) != axes(src)
+        @invoke copyto!(dest::AbstractMatrix, src::AbstractMatrix)
+    elseif src.uplo == dest.uplo
+        copytrito!(dest.data, src.data, src.uplo)
     else
-        transpose!(dest.data, src.data)
+        transpose!(dest.data, Base.unalias(dest.data, src.data))
     end
     return dest
 end
 
 function copyto!(dest::Hermitian, src::Hermitian)
-    if src.uplo == dest.uplo
-        copyto!(dest.data, src.data)
+    if axes(dest) != axes(src)
+        @invoke copyto!(dest::AbstractMatrix, src::AbstractMatrix)
+    elseif src.uplo == dest.uplo
+        copytrito!(dest.data, src.data, src.uplo)
     else
-        adjoint!(dest.data, src.data)
+        adjoint!(dest.data, Base.unalias(dest.data, src.data))
     end
     return dest
 end
 
+@propagate_inbounds function copyto!(dest::StridedMatrix, A::HermOrSym)
+    if axes(dest) != axes(A)
+        @invoke copyto!(dest::StridedMatrix, A::AbstractMatrix)
+    else
+        _copyto!(dest, Base.unalias(dest, A))
+    end
+    return dest
+end
+@propagate_inbounds function _copyto!(dest::StridedMatrix, A::HermOrSym)
+    copytrito!(dest, parent(A), A.uplo)
+    conjugate = A isa Hermitian
+    copytri!(dest, A.uplo, conjugate)
+    _symmetrize_diagonal!(dest, A)
+    return dest
+end
+@inline function _symmetrize_diagonal!(B, A::Symmetric)
+    for i = 1:size(A, 1)
+        B[i,i] = symmetric(A[i,i], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
+    end
+    return B
+end
+@inline function _symmetrize_diagonal!(B, A::Hermitian)
+    for i = 1:size(A, 1)
+        B[i,i] = hermitian(A[i,i], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
+    end
+    return B
+end
+
 # fill[stored]!
 fill!(A::HermOrSym, x) = fillstored!(A, x)
 function fillstored!(A::HermOrSym{T}, x) where T
@@ -422,8 +451,8 @@ Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
 Base.copy(A::Transpose{<:Any,<:Hermitian}) =
     Hermitian(copy(transpose(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
 
-tr(A::Symmetric) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
-tr(A::Hermitian) = real(tr(A.data))
+tr(A::Symmetric{<:Number}) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
+tr(A::Hermitian{<:Number}) = real(tr(A.data))
 
 Base.conj(A::Symmetric) = Symmetric(parentof_applytri(conj, A), sym_uplo(A.uplo))
 Base.conj(A::Hermitian) = Hermitian(parentof_applytri(conj, A), sym_uplo(A.uplo))
@@ -483,7 +512,7 @@ for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:(Hermitian{<:Uni
         function dot(A::$T, B::$T)
             n = size(A, 2)
             if n != size(B, 2)
-                throw(DimensionMismatch("A has dimensions $(size(A)) but B has dimensions $(size(B))"))
+                throw(DimensionMismatch(lazy"A has dimensions $(size(A)) but B has dimensions $(size(B))"))
             end
 
             dotprod = $real(zero(dot(first(A), first(B))))
@@ -521,6 +550,128 @@ for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:(Hermitian{<:Uni
     end
 end
 
+function kron(A::Hermitian{<:Union{Real,Complex},<:StridedMatrix}, B::Hermitian{<:Union{Real,Complex},<:StridedMatrix})
+    resultuplo = A.uplo == 'U' || B.uplo == 'U' ? :U : :L
+    C = Hermitian(Matrix{promote_op(*, eltype(A), eltype(B))}(undef, _kronsize(A, B)), resultuplo)
+    return kron!(C, A, B)
+end
+function kron(A::Symmetric{<:Number,<:StridedMatrix}, B::Symmetric{<:Number,<:StridedMatrix})
+    resultuplo = A.uplo == 'U' || B.uplo == 'U' ? :U : :L
+    C = Symmetric(Matrix{promote_op(*, eltype(A), eltype(B))}(undef, _kronsize(A, B)), resultuplo)
+    return kron!(C, A, B)
+end
+
+function kron!(C::Hermitian{<:Union{Real,Complex},<:StridedMatrix}, A::Hermitian{<:Union{Real,Complex},<:StridedMatrix}, B::Hermitian{<:Union{Real,Complex},<:StridedMatrix})
+    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
+    if ((A.uplo == 'U' || B.uplo == 'U') && C.uplo != 'U') || ((A.uplo == 'L' && B.uplo == 'L') && C.uplo != 'L')
+        throw(ArgumentError("C.uplo must match A.uplo and B.uplo, got $(C.uplo) $(A.uplo) $(B.uplo)"))
+    end
+    _hermkron!(C.data, A.data, B.data, conj, real, A.uplo, B.uplo)
+    return C
+end
+function kron!(C::Symmetric{<:Number,<:StridedMatrix}, A::Symmetric{<:Number,<:StridedMatrix}, B::Symmetric{<:Number,<:StridedMatrix})
+    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
+    if ((A.uplo == 'U' || B.uplo == 'U') && C.uplo != 'U') || ((A.uplo == 'L' && B.uplo == 'L') && C.uplo != 'L')
+        throw(ArgumentError("C.uplo must match A.uplo and B.uplo, got $(C.uplo) $(A.uplo) $(B.uplo)"))
+    end
+    _hermkron!(C.data, A.data, B.data, identity, identity, A.uplo, B.uplo)
+    return C
+end
+
+function _hermkron!(C, A, B, conj, real, Auplo, Buplo)
+    n_A = size(A, 1)
+    n_B = size(B, 1)
+    @inbounds if Auplo == 'U' && Buplo == 'U'
+        for j = 1:n_A
+            jnB = (j - 1) * n_B
+            for i = 1:(j-1)
+                Aij = A[i, j]
+                inB = (i - 1) * n_B
+                for l = 1:n_B
+                    for k = 1:(l-1)
+                        C[inB+k, jnB+l] = Aij * B[k, l]
+                        C[inB+l, jnB+k] = Aij * conj(B[k, l])
+                    end
+                    C[inB+l, jnB+l] = Aij * real(B[l, l])
+                end
+            end
+            Ajj = real(A[j, j])
+            for l = 1:n_B
+                for k = 1:(l-1)
+                    C[jnB+k, jnB+l] = Ajj * B[k, l]
+                end
+                C[jnB+l, jnB+l] = Ajj * real(B[l, l])
+            end
+        end
+    elseif Auplo == 'U' && Buplo == 'L'
+        for j = 1:n_A
+            jnB = (j - 1) * n_B
+            for i = 1:(j-1)
+                Aij = A[i, j]
+                inB = (i - 1) * n_B
+                for l = 1:n_B
+                    C[inB+l, jnB+l] = Aij * real(B[l, l])
+                    for k = (l+1):n_B
+                        C[inB+l, jnB+k] = Aij * conj(B[k, l])
+                        C[inB+k, jnB+l] = Aij * B[k, l]
+                    end
+                end
+            end
+            Ajj = real(A[j, j])
+            for l = 1:n_B
+                C[jnB+l, jnB+l] = Ajj * real(B[l, l])
+                for k = (l+1):n_B
+                    C[jnB+l, jnB+k] = Ajj * conj(B[k, l])
+                end
+            end
+        end
+    elseif Auplo == 'L' && Buplo == 'U'
+        for j = 1:n_A
+            jnB = (j - 1) * n_B
+            Ajj = real(A[j, j])
+            for l = 1:n_B
+                for k = 1:(l-1)
+                    C[jnB+k, jnB+l] = Ajj * B[k, l]
+                end
+                C[jnB+l, jnB+l] = Ajj * real(B[l, l])
+            end
+            for i = (j+1):n_A
+                conjAij = conj(A[i, j])
+                inB = (i - 1) * n_B
+                for l = 1:n_B
+                    for k = 1:(l-1)
+                        C[jnB+k, inB+l] = conjAij * B[k, l]
+                        C[jnB+l, inB+k] = conjAij * conj(B[k, l])
+                    end
+                    C[jnB+l, inB+l] = conjAij * real(B[l, l])
+                end
+            end
+        end
+    else #if Auplo == 'L' && Buplo == 'L'
+        for j = 1:n_A
+            jnB = (j - 1) * n_B
+            Ajj = real(A[j, j])
+            for l = 1:n_B
+                C[jnB+l, jnB+l] = Ajj * real(B[l, l])
+                for k = (l+1):n_B
+                    C[jnB+k, jnB+l] = Ajj * B[k, l]
+                end
+            end
+            for i = (j+1):n_A
+                Aij = A[i, j]
+                inB = (i - 1) * n_B
+                for l = 1:n_B
+                    C[inB+l, jnB+l] = Aij * real(B[l, l])
+                    for k = (l+1):n_B
+                        C[inB+k, jnB+l] = Aij * B[k, l]
+                        C[inB+l, jnB+k] = Aij * conj(B[k, l])
+                    end
+                end
+            end
+        end
+    end
+end
+
 (-)(A::Symmetric) = Symmetric(parentof_applytri(-, A), sym_uplo(A.uplo))
 (-)(A::Hermitian) = Hermitian(parentof_applytri(-, A), sym_uplo(A.uplo))
 
@@ -642,6 +793,11 @@ function svd(A::RealHermSymComplexHerm; full::Bool=false)
     end
     return SVD(vecs, vals, V')
 end
+function svd(A::RealHermSymComplexHerm{Float16}; full::Bool = false)
+    T = eltype(A)
+    F = svd(eigencopy_oftype(A, eigtype(T)); full)
+    return SVD{T}(F)
+end
 
 function svdvals!(A::RealHermSymComplexHerm)
     vals = eigvals!(A)
@@ -667,7 +823,7 @@ function ^(A::Symmetric{<:Real}, p::Real)
     if all(λ -> λ ≥ 0, F.values)
         return Symmetric((F.vectors * Diagonal((F.values).^p)) * F.vectors')
     else
-        return Symmetric((F.vectors * Diagonal((complex(F.values)).^p)) * F.vectors')
+        return Symmetric((F.vectors * Diagonal(complex.(F.values).^p)) * F.vectors')
     end
 end
 function ^(A::Symmetric{<:Complex}, p::Real)
@@ -699,7 +855,7 @@ function ^(A::Hermitian{T}, p::Real) where T
             return Hermitian(retmat)
         end
     else
-        return (F.vectors * Diagonal((complex(F.values).^p))) * F.vectors'
+        return (F.vectors * Diagonal((complex.(F.values).^p))) * F.vectors'
     end
 end
 
@@ -829,7 +985,7 @@ for func in (:log, :sqrt)
                 end
                 return Hermitian(retmat)
             else
-                retmat = (F.vectors * Diagonal(($func).(complex(F.values)))) * F.vectors'
+                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
                 return retmat
             end
         end
@@ -844,7 +1000,7 @@ function cbrt(A::HermOrSym{<:Real})
 end
 
 """
-    hermitianpart(A, uplo=:U) -> Hermitian
+    hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) -> Hermitian
 
 Return the Hermitian part of the square matrix `A`, defined as `(A + A') / 2`, as a
 [`Hermitian`](@ref) matrix. For real matrices `A`, this is also known as the symmetric part
@@ -860,7 +1016,7 @@ See also [`hermitianpart!`](@ref) for the corresponding in-place operation.
 hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart(A), uplo)
 
 """
-    hermitianpart!(A, uplo=:U) -> Hermitian
+    hermitianpart!(A::AbstractMatrix, uplo::Symbol=:U) -> Hermitian
 
 Overwrite the square matrix `A` in-place with its Hermitian part `(A + A') / 2`, and return
 [`Hermitian(A, uplo)`](@ref). For real matrices `A`, this is also known as the symmetric
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
index 0c86383685807..fee524a702187 100644
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl
@@ -1,18 +1,63 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # preserve HermOrSym wrapper
-eigencopy_oftype(A::Hermitian, S) = Hermitian(copy_similar(A, S), sym_uplo(A.uplo))
-eigencopy_oftype(A::Symmetric, S) = Symmetric(copy_similar(A, S), sym_uplo(A.uplo))
+# Call `copytrito!` instead of `copy_similar` to only copy the matching triangular half
+eigencopy_oftype(A::Hermitian, S) = Hermitian(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
+eigencopy_oftype(A::Symmetric, S) = Symmetric(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
+eigencopy_oftype(A::Symmetric{<:Complex}, S) = copyto!(similar(parent(A), S), A)
+
+default_eigen_alg(A) = DivideAndConquer()
 
 # Eigensolvers for symmetric and Hermitian matrices
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) =
-    Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
+function eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing)
+    if alg === DivideAndConquer()
+        Eigen(sorteig!(LAPACK.syevd!('V', A.uplo, A.data)..., sortby)...)
+    elseif alg === QRIteration()
+        Eigen(sorteig!(LAPACK.syev!('V', A.uplo, A.data)..., sortby)...)
+    elseif alg === RobustRepresentations()
+        Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
+    else
+        throw(ArgumentError("Unsupported value for `alg` keyword."))
+    end
+end
+function eigen(A::RealHermSymComplexHerm{Float16}; sortby::Union{Function,Nothing}=nothing)
+    S = eigtype(eltype(A))
+    E = eigen!(eigencopy_oftype(A, S), sortby=sortby)
+    values = convert(AbstractVector{Float16}, E.values)
+    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
+    return Eigen(values, vectors)
+end
+
+"""
+    eigen(A::Union{Hermitian, Symmetric}, alg::Algorithm = default_eigen_alg(A)) -> Eigen
+
+Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
+which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
+matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
+
+Iterating the decomposition produces the components `F.values` and `F.vectors`.
+
+`alg` specifies which algorithm and LAPACK method to use for eigenvalue decomposition:
+- `alg = DivideAndConquer()` (default): Calls `LAPACK.syevd!`.
+- `alg = QRIteration()`: Calls `LAPACK.syev!`.
+- `alg = RobustRepresentations()`: Multiple relatively robust representations method, Calls `LAPACK.syevr!`.
+
+See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
+a comparison of the accuracy and performance of different algorithms.
+
+The default `alg` used may change in the future.
 
-function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
+!!! compat "Julia 1.12"
+    The `alg` keyword argument requires Julia 1.12 or later.
+
+The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
+"""
+function eigen(A::RealHermSymComplexHerm, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing)
     S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), sortby=sortby)
+    eigen!(eigencopy_oftype(A, S), alg; sortby)
 end
 
+
 eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
     Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
 
@@ -63,17 +108,42 @@ function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
     eigen!(eigencopy_oftype(A, S), vl, vh)
 end
 
-function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing)
-    vals = LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
+
+function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing)
+    vals::Vector{real(eltype(A))} = if alg === DivideAndConquer()
+        LAPACK.syevd!('N', A.uplo, A.data)
+    elseif alg === QRIteration()
+        LAPACK.syev!('N', A.uplo, A.data)
+    elseif alg === RobustRepresentations()
+        LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
+    else
+        throw(ArgumentError("Unsupported value for `alg` keyword."))
+    end
     !isnothing(sortby) && sort!(vals, by=sortby)
     return vals
 end
 
-function eigvals(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
+"""
+    eigvals(A::Union{Hermitian, Symmetric}, alg::Algorithm = default_eigen_alg(A))) -> values
+
+Return the eigenvalues of `A`.
+
+`alg` specifies which algorithm and LAPACK method to use for eigenvalue decomposition:
+- `alg = DivideAndConquer()` (default): Calls `LAPACK.syevd!`.
+- `alg = QRIteration()`: Calls `LAPACK.syev!`.
+- `alg = RobustRepresentations()`: Multiple relatively robust representations method, Calls `LAPACK.syevr!`.
+
+See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
+a comparison of the accuracy and performance of different methods.
+
+The default `alg` used may change in the future.
+"""
+function eigvals(A::RealHermSymComplexHerm, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing)
     S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), sortby=sortby)
+    eigvals!(eigencopy_oftype(A, S), alg; sortby)
 end
 
+
 """
     eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
 
@@ -295,8 +365,16 @@ function eigvals!(A::StridedMatrix{T}, F::LU{T,<:StridedMatrix}; sortby::Union{F
     return eigvals!(A; sortby)
 end
 
-
-function eigen(A::Hermitian{Complex{T}, <:Tridiagonal}; kwargs...) where {T}
+eigen(A::Hermitian{<:Complex, <:Tridiagonal}; kwargs...) =
+    _eigenhermtridiag(A; kwargs...)
+# disambiguation
+function eigen(A::Hermitian{Complex{Float16}, <:Tridiagonal}; kwargs...)
+    E = _eigenhermtridiag(A; kwargs...)
+    values = convert(AbstractVector{Float16}, E.values)
+    vectors = convert(AbstractMatrix{ComplexF16}, E.vectors)
+    return Eigen(values, vectors)
+end
+function _eigenhermtridiag(A::Hermitian{<:Complex,<:Tridiagonal}; kwargs...)
     (; dl, d, du) = parent(A)
     N = length(d)
     if N <= 1
diff --git a/stdlib/LinearAlgebra/src/transpose.jl b/stdlib/LinearAlgebra/src/transpose.jl
index 8aa04f7d34b48..a36919b2e557a 100644
--- a/stdlib/LinearAlgebra/src/transpose.jl
+++ b/stdlib/LinearAlgebra/src/transpose.jl
@@ -74,27 +74,32 @@ julia> A
 ```
 """
 adjoint!(B::AbstractMatrix, A::AbstractMatrix) = transpose_f!(adjoint, B, A)
+
+@noinline function check_transpose_axes(axesA, axesB)
+    axesB == reverse(axesA) || throw(DimensionMismatch("axes of the destination are incompatible with that of the source"))
+end
+
 function transpose!(B::AbstractVector, A::AbstractMatrix)
-    axes(B,1) == axes(A,2) && axes(A,1) == 1:1 || throw(DimensionMismatch("transpose"))
+    check_transpose_axes((axes(B,1), axes(B,2)), axes(A))
     copyto!(B, A)
 end
 function transpose!(B::AbstractMatrix, A::AbstractVector)
-    axes(B,2) == axes(A,1) && axes(B,1) == 1:1 || throw(DimensionMismatch("transpose"))
+    check_transpose_axes(axes(B), (axes(A,1), axes(A,2)))
     copyto!(B, A)
 end
 function adjoint!(B::AbstractVector, A::AbstractMatrix)
-    axes(B,1) == axes(A,2) && axes(A,1) == 1:1 || throw(DimensionMismatch("transpose"))
+    check_transpose_axes((axes(B,1), axes(B,2)), axes(A))
     ccopy!(B, A)
 end
 function adjoint!(B::AbstractMatrix, A::AbstractVector)
-    axes(B,2) == axes(A,1) && axes(B,1) == 1:1 || throw(DimensionMismatch("transpose"))
+    check_transpose_axes(axes(B), (axes(A,1), axes(A,2)))
     ccopy!(B, A)
 end
 
 const transposebaselength=64
 function transpose_f!(f, B::AbstractMatrix, A::AbstractMatrix)
     inds = axes(A)
-    axes(B,1) == inds[2] && axes(B,2) == inds[1] || throw(DimensionMismatch(string(f)))
+    check_transpose_axes(axes(B), inds)
 
     m, n = length(inds[1]), length(inds[2])
     if m*n<=4*transposebaselength
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
index aff87e3ae50ba..df0d0d4fd0d8b 100644
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ b/stdlib/LinearAlgebra/src/triangular.jl
@@ -43,6 +43,7 @@ for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTr
         similar(A::$t, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
 
         copy(A::$t) = $t(copy(A.data))
+        Base.unaliascopy(A::$t) = $t(Base.unaliascopy(A.data))
 
         real(A::$t{<:Real}) = A
         real(A::$t{<:Complex}) = (B = real(A.data); $t(B))
@@ -50,16 +51,6 @@ for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTr
     end
 end
 
-similar(A::UpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UpperTriangular(similar(parent(parent(A)), T))
-similar(A::UnitUpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UnitUpperTriangular(similar(parent(parent(A)), T))
-similar(A::LowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    LowerTriangular(similar(parent(parent(A)), T))
-similar(A::UnitLowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UnitLowerTriangular(similar(parent(parent(A)), T))
-
-
 """
     LowerTriangular(A::AbstractMatrix)
 
@@ -153,6 +144,14 @@ const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTri
 const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}}
 const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
 
+uppertriangular(M) = UpperTriangular(M)
+lowertriangular(M) = LowerTriangular(M)
+
+uppertriangular(U::UpperOrUnitUpperTriangular) = U
+lowertriangular(U::LowerOrUnitLowerTriangular) = U
+
+Base.dataids(A::UpperOrLowerTriangular) = Base.dataids(A.data)
+
 imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
 imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
 imag(A::UpperTriangular{<:Any,<:StridedMaybeAdjOrTransMat}) = imag.(A)
@@ -176,7 +175,6 @@ function imag(A::UnitUpperTriangular)
     return Uim
 end
 
-Array(A::AbstractTriangular) = Matrix(A)
 parent(A::UpperOrLowerTriangular) = A.data
 
 # For strided matrices, we may only loop over the filled triangle
@@ -184,37 +182,6 @@ copy(A::UpperOrLowerTriangular{<:Any, <:StridedMaybeAdjOrTransMat}) = copyto!(si
 
 # then handle all methods that requires specific handling of upper/lower and unit diagonal
 
-function Matrix{T}(A::LowerTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    tril!(B)
-    B
-end
-function Matrix{T}(A::UnitLowerTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    tril!(B)
-    for i = 1:size(B,1)
-        B[i,i] = oneunit(T)
-    end
-    B
-end
-function Matrix{T}(A::UpperTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    triu!(B)
-    B
-end
-function Matrix{T}(A::UnitUpperTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    triu!(B)
-    for i = 1:size(B,1)
-        B[i,i] = oneunit(T)
-    end
-    B
-end
-
 function full!(A::LowerTriangular)
     B = A.data
     tril!(B)
@@ -260,19 +227,33 @@ Base.isstored(A::UnitUpperTriangular, i::Int, j::Int) =
 Base.isstored(A::UpperTriangular, i::Int, j::Int) =
     i <= j ? Base.isstored(A.data, i, j) : false
 
-@propagate_inbounds getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
+@propagate_inbounds getindex(A::UnitLowerTriangular{T}, i::Int, j::Int) where {T} =
     i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-@propagate_inbounds getindex(A::LowerTriangular, i::Integer, j::Integer) =
+@propagate_inbounds getindex(A::LowerTriangular, i::Int, j::Int) =
     i >= j ? A.data[i,j] : _zero(A.data,j,i)
-@propagate_inbounds getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
+@propagate_inbounds getindex(A::UnitUpperTriangular{T}, i::Int, j::Int) where {T} =
     i < j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-@propagate_inbounds getindex(A::UpperTriangular, i::Integer, j::Integer) =
+@propagate_inbounds getindex(A::UpperTriangular, i::Int, j::Int) =
     i <= j ? A.data[i,j] : _zero(A.data,j,i)
 
+_zero_triangular_half_str(::Type{<:UpperOrUnitUpperTriangular}) = "lower"
+_zero_triangular_half_str(::Type{<:LowerOrUnitLowerTriangular}) = "upper"
+
+@noinline function throw_nonzeroerror(T, @nospecialize(x), i, j)
+    Ts = _zero_triangular_half_str(T)
+    Tn = nameof(T)
+    throw(ArgumentError(
+        lazy"cannot set index in the $Ts triangular part ($i, $j) of an $Tn matrix to a nonzero value ($x)"))
+end
+@noinline function throw_nononeerror(T, @nospecialize(x), i, j)
+    Tn = nameof(T)
+    throw(ArgumentError(
+        lazy"cannot set index on the diagonal ($i, $j) of an $Tn matrix to a non-unit value ($x)"))
+end
+
 @propagate_inbounds function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
     if i > j
-        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
+        iszero(x) || throw_nonzeroerror(typeof(A), x, i, j)
     else
         A.data[i,j] = x
     end
@@ -281,11 +262,9 @@ end
 
 @propagate_inbounds function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
     if i > j
-        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
+        iszero(x) || throw_nonzeroerror(typeof(A), x, i, j)
     elseif i == j
-        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
-            "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
+        x == oneunit(x) || throw_nononeerror(typeof(A), x, i, j)
     else
         A.data[i,j] = x
     end
@@ -294,8 +273,7 @@ end
 
 @propagate_inbounds function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
     if i < j
-        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
-            "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
+        iszero(x) || throw_nonzeroerror(typeof(A), x, i, j)
     else
         A.data[i,j] = x
     end
@@ -304,34 +282,44 @@ end
 
 @propagate_inbounds function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
     if i < j
-        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
-            "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
+        iszero(x) || throw_nonzeroerror(typeof(A), x, i, j)
     elseif i == j
-        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
-            "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
+        x == oneunit(x) || throw_nononeerror(typeof(A), x, i, j)
     else
         A.data[i,j] = x
     end
     return A
 end
 
+@noinline function throw_setindex_structuralzero_error(T, @nospecialize(x))
+    Ts = _zero_triangular_half_str(T)
+    Tn = nameof(T)
+    throw(ArgumentError(
+        lazy"cannot set indices in the $Ts triangular part of an $Tn matrix to a nonzero value ($x)"))
+end
+
 @inline function fill!(A::UpperTriangular, x)
-    iszero(x) || throw(ArgumentError("cannot set indices in the lower triangular part " *
-            "of an UpperTriangular matrix to a nonzero value ($x)"))
+    iszero(x) || throw_setindex_structuralzero_error(typeof(A), x)
     for col in axes(A,2), row in firstindex(A,1):col
         @inbounds A.data[row, col] = x
     end
     A
 end
 @inline function fill!(A::LowerTriangular, x)
-    iszero(x) || throw(ArgumentError("cannot set indices in the upper triangular part " *
-            "of a LowerTriangular matrix to a nonzero value ($x)"))
+    iszero(x) || throw_setindex_structuralzero_error(typeof(A), x)
     for col in axes(A,2), row in col:lastindex(A,1)
         @inbounds A.data[row, col] = x
     end
     A
 end
 
+Base._reverse(A::UpperOrUnitUpperTriangular, dims::Integer) = reverse!(Matrix(A); dims)
+Base._reverse(A::UpperTriangular, ::Colon) = LowerTriangular(reverse(A.data))
+Base._reverse(A::UnitUpperTriangular, ::Colon) = UnitLowerTriangular(reverse(A.data))
+Base._reverse(A::LowerOrUnitLowerTriangular, dims) = reverse!(Matrix(A); dims)
+Base._reverse(A::LowerTriangular, ::Colon) = UpperTriangular(reverse(A.data))
+Base._reverse(A::UnitLowerTriangular, ::Colon) = UnitUpperTriangular(reverse(A.data))
+
 ## structured matrix methods ##
 function Base.replace_in_print_matrix(A::Union{UpperTriangular,UnitUpperTriangular},
                                       i::Integer, j::Integer, s::AbstractString)
@@ -342,14 +330,32 @@ function Base.replace_in_print_matrix(A::Union{LowerTriangular,UnitLowerTriangul
     return i >= j ? s : Base.replace_with_centered_mark(s)
 end
 
-function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
+istril(A::UnitLowerTriangular, k::Integer=0) = k >= 0
+istriu(A::UnitUpperTriangular, k::Integer=0) = k <= 0
+Base.@constprop :aggressive function istril(A::LowerTriangular, k::Integer=0)
     k >= 0 && return true
     return _istril(A, k)
 end
-function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
+@inline function _istril(A::LowerTriangular, k)
+    P = parent(A)
+    m = size(A, 1)
+    for j in max(1, k + 2):m
+        all(iszero, view(P, j:min(j - k - 1, m), j)) || return false
+    end
+    return true
+end
+Base.@constprop :aggressive function istriu(A::UpperTriangular, k::Integer=0)
     k <= 0 && return true
     return _istriu(A, k)
 end
+@inline function _istriu(A::UpperTriangular, k)
+    P = parent(A)
+    m = size(A, 1)
+    for j in 1:min(m, m + k - 1)
+        all(iszero, view(P, max(1, j - k + 1):j, j)) || return false
+    end
+    return true
+end
 istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
 istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
 istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
@@ -511,36 +517,121 @@ tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
 tr(A::UpperTriangular) = tr(A.data)
 tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
 
+for T in (:UpperOrUnitUpperTriangular, :LowerOrUnitLowerTriangular)
+    @eval @propagate_inbounds function copyto!(dest::$T, U::$T)
+        if axes(dest) != axes(U)
+            @invoke copyto!(dest::AbstractArray, U::AbstractArray)
+        else
+            _copyto!(dest, U)
+        end
+        return dest
+    end
+end
+
 # copy and scale
-function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
-    checkbounds(A, axes(B)...)
+for (T, UT) in ((:UpperTriangular, :UnitUpperTriangular), (:LowerTriangular, :UnitLowerTriangular))
+    @eval @inline function _copyto!(A::$T, B::$T)
+        @boundscheck checkbounds(A, axes(B)...)
+        copytrito!(parent(A), parent(B), uplo_char(A))
+        return A
+    end
+    @eval @inline function _copyto!(A::$UT, B::$T)
+        for dind in diagind(A, IndexStyle(A))
+            if A[dind] != B[dind]
+                throw_nononeerror(typeof(A), B[dind], Tuple(dind)...)
+            end
+        end
+        _copyto!($T(parent(A)), B)
+        return A
+    end
+end
+@inline function _copyto!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular)
+    @boundscheck checkbounds(A, axes(B)...)
     n = size(B,1)
+    B2 = Base.unalias(A, B)
     for j = 1:n
-        for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
-            @inbounds A[i,j] = B[i,j]
+        for i = 1:j-1
+            @inbounds parent(A)[i,j] = parent(B2)[i,j]
+        end
+        if A isa UpperTriangular # copy diagonal
+            @inbounds parent(A)[j,j] = B2[j,j]
         end
     end
     return A
 end
-function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
-    checkbounds(A, axes(B)...)
+@inline function _copyto!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular)
+    @boundscheck checkbounds(A, axes(B)...)
     n = size(B,1)
+    B2 = Base.unalias(A, B)
     for j = 1:n
-        for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
-            @inbounds A[i,j] = B[i,j]
+        if A isa LowerTriangular # copy diagonal
+            @inbounds parent(A)[j,j] = B2[j,j]
+        end
+        for i = j+1:n
+            @inbounds parent(A)[i,j] = parent(B2)[i,j]
         end
     end
     return A
 end
 
+_triangularize!(::UpperOrUnitUpperTriangular) = triu!
+_triangularize!(::LowerOrUnitLowerTriangular) = tril!
+
+@propagate_inbounds function copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular)
+    if axes(dest) != axes(U)
+        @invoke copyto!(dest::StridedMatrix, U::AbstractArray)
+    else
+        _copyto!(dest, U)
+    end
+    return dest
+end
+@propagate_inbounds function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular)
+    copytrito!(dest, parent(U), U isa UpperOrUnitUpperTriangular ? 'U' : 'L')
+    copytrito!(dest, U, U isa UpperOrUnitUpperTriangular ? 'L' : 'U')
+    return dest
+end
+@propagate_inbounds function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular{<:Any, <:StridedMatrix})
+    U2 = Base.unalias(dest, U)
+    copyto_unaliased!(dest, U2)
+    return dest
+end
+# for strided matrices, we explicitly loop over the arrays to improve cache locality
+# This fuses the copytrito! for the two halves
+@inline function copyto_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:Any, <:StridedMatrix})
+    @boundscheck checkbounds(dest, axes(U)...)
+    isunit = U isa UnitUpperTriangular
+    for col in axes(dest,2)
+        for row in 1:col-isunit
+            @inbounds dest[row,col] = U.data[row,col]
+        end
+        for row in col+!isunit:size(U,1)
+            @inbounds dest[row,col] = U[row,col]
+        end
+    end
+    return dest
+end
+@inline function copyto_unaliased!(dest::StridedMatrix, L::LowerOrUnitLowerTriangular{<:Any, <:StridedMatrix})
+    @boundscheck checkbounds(dest, axes(L)...)
+    isunit = L isa UnitLowerTriangular
+    for col in axes(dest,2)
+        for row in 1:col-!isunit
+            @inbounds dest[row,col] = L[row,col]
+        end
+        for row in col+isunit:size(L,1)
+            @inbounds dest[row,col] = L.data[row,col]
+        end
+    end
+    return dest
+end
+
 @inline _rscale_add!(A::AbstractTriangular, B::AbstractTriangular, C::Number, alpha::Number, beta::Number) =
-    _triscale!(A, B, C, MulAddMul(alpha, beta))
+    @stable_muladdmul _triscale!(A, B, C, MulAddMul(alpha, beta))
 @inline _lscale_add!(A::AbstractTriangular, B::Number, C::AbstractTriangular, alpha::Number, beta::Number) =
-    _triscale!(A, B, C, MulAddMul(alpha, beta))
+    @stable_muladdmul _triscale!(A, B, C, MulAddMul(alpha, beta))
 
 function checksize1(A, B)
     szA, szB = size(A), size(B)
-    szA == szB || throw(DimensionMismatch("size of A, $szA, does not match size of B, $szB"))
+    szA == szB || throw(DimensionMismatch(lazy"size of A, $szA, does not match size of B, $szB"))
     checksquare(B)
 end
 
@@ -629,6 +720,43 @@ function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriang
     return A
 end
 
+function _trirdiv!(A::UpperTriangular, B::UpperOrUnitUpperTriangular, c::Number)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in 1:j
+            @inbounds A[i, j] = B[i, j] / c
+        end
+    end
+    return A
+end
+function _trirdiv!(A::LowerTriangular, B::LowerOrUnitLowerTriangular, c::Number)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in j:n
+            @inbounds A[i, j] = B[i, j] / c
+        end
+    end
+    return A
+end
+function _trildiv!(A::UpperTriangular, c::Number, B::UpperOrUnitUpperTriangular)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in 1:j
+            @inbounds A[i, j] = c \ B[i, j]
+        end
+    end
+    return A
+end
+function _trildiv!(A::LowerTriangular, c::Number, B::LowerOrUnitLowerTriangular)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in j:n
+            @inbounds A[i, j] = c \ B[i, j]
+        end
+    end
+    return A
+end
+
 rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
 lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
 
@@ -752,6 +880,78 @@ for op in (:+, :-)
     end
 end
 
+function kron(A::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
+    C = UpperTriangular(Matrix{promote_op(*, eltype(A), eltype(B))}(undef, _kronsize(A, B)))
+    return kron!(C, A, B)
+end
+function kron(A::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
+    C = LowerTriangular(Matrix{promote_op(*, eltype(A), eltype(B))}(undef, _kronsize(A, B)))
+    return kron!(C, A, B)
+end
+
+function kron!(C::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, A::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
+    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
+    _triukron!(C.data, A.data, B.data)
+    return C
+end
+function kron!(C::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, A::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
+    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
+    _trilkron!(C.data, A.data, B.data)
+    return C
+end
+
+function _triukron!(C, A, B)
+    n_A = size(A, 1)
+    n_B = size(B, 1)
+    @inbounds for j = 1:n_A
+        jnB = (j - 1) * n_B
+        for i = 1:(j-1)
+            Aij = A[i, j]
+            inB = (i - 1) * n_B
+            for l = 1:n_B
+                for k = 1:l
+                    C[inB+k, jnB+l] = Aij * B[k, l]
+                end
+                for k = 1:(l-1)
+                    C[inB+l, jnB+k] = zero(eltype(C))
+                end
+            end
+        end
+        Ajj = A[j, j]
+        for l = 1:n_B
+            for k = 1:l
+                C[jnB+k, jnB+l] = Ajj * B[k, l]
+            end
+        end
+    end
+end
+
+function _trilkron!(C, A, B)
+    n_A = size(A, 1)
+    n_B = size(B, 1)
+    @inbounds for j = 1:n_A
+        jnB = (j - 1) * n_B
+        Ajj = A[j, j]
+        for l = 1:n_B
+            for k = l:n_B
+                C[jnB+k, jnB+l] = Ajj * B[k, l]
+            end
+        end
+        for i = (j+1):n_A
+            Aij = A[i, j]
+            inB = (i - 1) * n_B
+            for l = 1:n_B
+                for k = l:n_B
+                    C[inB+k, jnB+l] = Aij * B[k, l]
+                end
+                for k = (l+1):n_B
+                    C[inB+l, jnB+k] = zero(eltype(C))
+                end
+            end
+        end
+    end
+end
+
 ######################
 # BlasFloat routines #
 ######################
@@ -771,8 +971,6 @@ isunit_char(::UnitUpperTriangular) = 'U'
 isunit_char(::LowerTriangular) = 'N'
 isunit_char(::UnitLowerTriangular) = 'U'
 
-lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
-
 # generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
 _trimul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) =
     lmul!(A, copyto!(C, B))
@@ -797,16 +995,27 @@ _trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractTriangular) =
 _trimul!(C::AbstractMatrix, A::AbstractTriangular, B::UpperOrLowerTriangular) =
     generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
 
-lmul!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _trimul!(B, A, B)
-rmul!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _trimul!(A, A, B)
-
+function lmul!(A::AbstractTriangular, B::AbstractVecOrMat)
+    if istriu(A)
+        _trimul!(B, UpperTriangular(A), B)
+    else
+        _trimul!(B, LowerTriangular(A), B)
+    end
+end
+function rmul!(A::AbstractMatrix, B::AbstractTriangular)
+    if istriu(B)
+        _trimul!(A, A, UpperTriangular(B))
+    else
+        _trimul!(A, A, LowerTriangular(B))
+    end
+end
 
 for TC in (:AbstractVector, :AbstractMatrix)
     @eval @inline function _mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
         if isone(alpha) && iszero(beta)
             return _trimul!(C, A, B)
         else
-            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
+            return @stable_muladdmul generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
         end
     end
 end
@@ -818,7 +1027,7 @@ for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
         if isone(alpha) && iszero(beta)
             return _trimul!(C, A, B)
         else
-            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+            return generic_matmatmul!(C, 'N', 'N', A, B, alpha, beta)
         end
     end
 end
@@ -835,8 +1044,20 @@ _ldiv!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVecOrMat) =
 _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
     generic_mattridiv!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
 
-ldiv!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _ldiv!(B, A, B)
-rdiv!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _rdiv!(A, A, B)
+function ldiv!(A::AbstractTriangular, B::AbstractVecOrMat)
+    if istriu(A)
+        _ldiv!(B, UpperTriangular(A), B)
+    else
+        _ldiv!(B, LowerTriangular(A), B)
+    end
+end
+function rdiv!(A::AbstractMatrix, B::AbstractTriangular)
+    if istriu(B)
+        _rdiv!(A, A, UpperTriangular(B))
+    else
+        _rdiv!(A, A, LowerTriangular(B))
+    end
+end
 
 # preserve triangular structure in in-place multiplication/division
 for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
@@ -950,7 +1171,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
     tstrided = t{<:Any, <:StridedMaybeAdjOrTransMat}
     @eval begin
         (*)(A::$t, x::Number) = $t(A.data*x)
-        (*)(A::$tstrided, x::Number) = A .* x
+        function (*)(A::$tstrided, x::Number)
+            eltype_dest = promote_op(*, eltype(A), typeof(x))
+            dest = $t(similar(parent(A), eltype_dest))
+            _triscale!(dest, x, A, MulAddMul())
+        end
 
         function (*)(A::$unitt, x::Number)
             B = $t(A.data)*x
@@ -961,7 +1186,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
         end
 
         (*)(x::Number, A::$t) = $t(x*A.data)
-        (*)(x::Number, A::$tstrided) = x .* A
+        function (*)(x::Number, A::$tstrided)
+            eltype_dest = promote_op(*, typeof(x), eltype(A))
+            dest = $t(similar(parent(A), eltype_dest))
+            _triscale!(dest, x, A, MulAddMul())
+        end
 
         function (*)(x::Number, A::$unitt)
             B = x*$t(A.data)
@@ -972,7 +1201,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
         end
 
         (/)(A::$t, x::Number) = $t(A.data/x)
-        (/)(A::$tstrided, x::Number) = A ./ x
+        function (/)(A::$tstrided, x::Number)
+            eltype_dest = promote_op(/, eltype(A), typeof(x))
+            dest = $t(similar(parent(A), eltype_dest))
+            _trirdiv!(dest, A,  x)
+        end
 
         function (/)(A::$unitt, x::Number)
             B = $t(A.data)/x
@@ -984,7 +1217,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
         end
 
         (\)(x::Number, A::$t) = $t(x\A.data)
-        (\)(x::Number, A::$tstrided) = x .\ A
+        function (\)(x::Number, A::$tstrided)
+            eltype_dest = promote_op(\, typeof(x), eltype(A))
+            dest = $t(similar(parent(A), eltype_dest))
+            _trildiv!(dest, x, A)
+        end
 
         function (\)(x::Number, A::$unitt)
             B = x\$t(A.data)
@@ -1003,11 +1240,11 @@ function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function,
     m, n = size(B, 1), size(B, 2)
     N = size(A, 1)
     if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
     mc, nc = size(C, 1), size(C, 2)
     if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+        throw(DimensionMismatch(lazy"output has dimensions ($mc,$nc), should have ($N,$n)"))
     end
     oA = oneunit(eltype(A))
     unit = isunitc == 'U'
@@ -1065,11 +1302,11 @@ function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA:
     m, n = size(B, 1), size(B, 2)
     N = size(A, 1)
     if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
     mc, nc = size(C, 1), size(C, 2)
     if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+        throw(DimensionMismatch(lazy"output has dimensions ($mc,$nc), should have ($N,$n)"))
     end
     oA = oneunit(eltype(A))
     unit = isunitc == 'U'
@@ -1102,11 +1339,11 @@ function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A
     m, n = size(A, 1), size(A, 2)
     N = size(B, 1)
     if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $n, has size $N"))
     end
     mc, nc = size(C, 1), size(C, 2)
     if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+        throw(DimensionMismatch(lazy"output has dimensions ($mc,$nc), should have ($m,$N)"))
     end
     oB = oneunit(eltype(B))
     unit = isunitc == 'U'
@@ -1164,11 +1401,11 @@ function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, ::Function, A::Ab
     m, n = size(A, 1), size(A, 2)
     N = size(B, 1)
     if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $n, has size $N"))
     end
     mc, nc = size(C, 1), size(C, 2)
     if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+        throw(DimensionMismatch(lazy"output has dimensions ($mc,$nc), should have ($m,$N)"))
     end
     oB = oneunit(eltype(B))
     unit = isunitc == 'U'
@@ -1212,11 +1449,12 @@ function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function,
     mA, nA = size(A)
     m, n = size(B, 1), size(B,2)
     if nA != m
-        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
+        throw(DimensionMismatch(lazy"second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
     end
     if size(C) != size(B)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
+        throw(DimensionMismatch(lazy"size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
     end
+    iszero(mA) && return C
     oA = oneunit(eltype(A))
     @inbounds if uploc == 'U'
         if isunitc == 'N'
@@ -1348,11 +1586,12 @@ function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA:
     mA, nA = size(A)
     m, n = size(B, 1), size(B,2)
     if nA != m
-        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
+        throw(DimensionMismatch(lazy"second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
     end
     if size(C) != size(B)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
+        throw(DimensionMismatch(lazy"size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
     end
+    iszero(mA) && return C
     oA = oneunit(eltype(A))
     @inbounds if uploc == 'U'
         if isunitc == 'N'
@@ -1430,10 +1669,10 @@ function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A
     require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
     if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+        throw(DimensionMismatch(lazy"size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
     end
     oB = oneunit(eltype(B))
     unit = isunitc == 'U'
@@ -1493,10 +1732,10 @@ function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, ::Function, A::Ab
     require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+        throw(DimensionMismatch(lazy"right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
     if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+        throw(DimensionMismatch(lazy"size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
     end
     oB = oneunit(eltype(B))
     unit = isunitc == 'U'
@@ -1618,7 +1857,7 @@ end
 #     34(3), (2013) 1341–1360.
 function powm!(A0::UpperTriangular, p::Real)
     if abs(p) >= 1
-        throw(ArgumentError("p must be a real number in (-1,1), got $p"))
+        throw(ArgumentError(lazy"p must be a real number in (-1,1), got $p"))
     end
 
     normA0 = opnorm(A0, 1)
@@ -2543,12 +2782,20 @@ end
 
 # Generic eigensystems
 eigvals(A::AbstractTriangular) = diag(A)
+# fallback for unknown types
+function eigvecs(A::AbstractTriangular{<:BlasFloat})
+    if istriu(A)
+        eigvecs(UpperTriangular(Matrix(A)))
+    else # istril(A)
+        eigvecs(LowerTriangular(Matrix(A)))
+    end
+end
 function eigvecs(A::AbstractTriangular{T}) where T
     TT = promote_type(T, Float32)
     if TT <: BlasFloat
         return eigvecs(convert(AbstractMatrix{TT}, A))
     else
-        throw(ArgumentError("eigvecs type $(typeof(A)) not supported. Please submit a pull request."))
+        throw(ArgumentError(lazy"eigvecs type $(typeof(A)) not supported. Please submit a pull request."))
     end
 end
 det(A::UnitUpperTriangular{T}) where {T} = one(T)
@@ -2663,7 +2910,7 @@ end
             Bᵢⱼ⁽¹⁾ = M_Bᵢⱼ⁽¹⁾[1:s₁, 1:s₂]
             # Compute Bᵢⱼ⁽⁰⁾ and Bᵢⱼ⁽¹⁾
             mul!(Bᵢⱼ⁽⁰⁾, A[i₁:i₂,k₁:k₂], A[k₁:k₂,j₁:j₂])
-            # Retreive Rᵢ,ᵢ₊ₖ as A[i+k,i]'
+            # Retrieve Rᵢ,ᵢ₊ₖ as A[i+k,i]'
             mul!(Bᵢⱼ⁽¹⁾, A[i₁:i₂,k₁:k₂], A[j₁:j₂,k₁:k₂]')
             # Solve Uᵢ,ᵢ₊ₖ using Reference [1, (4.10)]
             kron!(L₀, Δ[1:s₂,1:s₂], S[1:s₁,i₁:i₂])
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index 07c2fe410769d..84c79f57debc7 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -9,7 +9,7 @@ struct SymTridiagonal{T, V<:AbstractVector{T}} <: AbstractMatrix{T}
     function SymTridiagonal{T, V}(dv, ev) where {T, V<:AbstractVector{T}}
         require_one_based_indexing(dv, ev)
         if !(length(dv) - 1 <= length(ev) <= length(dv))
-            throw(DimensionMismatch("subdiagonal has wrong length. Has length $(length(ev)), but should be either $(length(dv) - 1) or $(length(dv))."))
+            throw(DimensionMismatch(lazy"subdiagonal has wrong length. Has length $(length(ev)), but should be either $(length(dv) - 1) or $(length(dv))."))
         end
         new{T, V}(dv, ev)
     end
@@ -135,13 +135,17 @@ function Matrix{T}(M::SymTridiagonal) where T
     n = size(M, 1)
     Mf = Matrix{T}(undef, n, n)
     n == 0 && return Mf
-    n > 2 && fill!(Mf, zero(T))
-    @inbounds for i = 1:n-1
-        Mf[i,i] = symmetric(M.dv[i], :U)
-        Mf[i+1,i] = transpose(M.ev[i])
-        Mf[i,i+1] = M.ev[i]
+    if haszero(T) # optimized path for types with zero(T) defined
+        n > 2 && fill!(Mf, zero(T))
+        @inbounds for i = 1:n-1
+            Mf[i,i] = symmetric(M.dv[i], :U)
+            Mf[i+1,i] = transpose(M.ev[i])
+            Mf[i,i+1] = M.ev[i]
+        end
+        Mf[n,n] = symmetric(M.dv[n], :U)
+    else
+        copyto!(Mf, M)
     end
-    Mf[n,n] = symmetric(M.dv[n], :U)
     return Mf
 end
 Matrix(M::SymTridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
@@ -153,7 +157,8 @@ axes(M::SymTridiagonal) = (ax = axes(M.dv, 1); (ax, ax))
 similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T), similar(S.ev, T))
 similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(S.dv, T, dims)
 
-copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
+# copyto! for matching axes
+_copyto_banded!(dest::SymTridiagonal, src::SymTridiagonal) =
     (copyto!(dest.dv, src.dv); copyto!(dest.ev, _evview(src)); dest)
 
 #Elementary operations
@@ -162,11 +167,13 @@ for func in (:conj, :copy, :real, :imag)
 end
 
 transpose(S::SymTridiagonal) = S
-adjoint(S::SymTridiagonal{<:Real}) = S
-adjoint(S::SymTridiagonal) = Adjoint(S)
+adjoint(S::SymTridiagonal{<:Number}) = SymTridiagonal(vec(adjoint(S.dv)), vec(adjoint(S.ev)))
+adjoint(S::SymTridiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}}) =
+    SymTridiagonal(adjoint(parent(S.dv)), adjoint(parent(S.ev)))
+
 permutedims(S::SymTridiagonal) = S
 function permutedims(S::SymTridiagonal, perm)
-    Base.checkdims_perm(S, S, perm)
+    Base.checkdims_perm(axes(S), axes(S), perm)
     NTuple{2}(perm) == (2, 1) ? permutedims(S) : S
 end
 Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
@@ -174,7 +181,13 @@ Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(ad
 ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
 issymmetric(S::SymTridiagonal) = true
 
-tr(S::SymTridiagonal) = sum(S.dv)
+tr(S::SymTridiagonal) = sum(symmetric, S.dv)
+
+@noinline function throw_diag_outofboundserror(n, sz)
+    sz1, sz2 = sz
+    throw(ArgumentError(LazyString(lazy"requested diagonal, $n, must be at least $(-sz1) ",
+            lazy"and at most $sz2 for an $(sz1)-by-$(sz2) matrix")))
+end
 
 function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
     # every branch call similar(..., ::Int) to make sure the
@@ -185,10 +198,13 @@ function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
     elseif absn == 1
         return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
     elseif absn <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-absn), zero(T))
+        v = similar(M.dv, size(M,1)-absn)
+        for i in eachindex(v)
+            v[i] = M[BandIndex(n,i)]
+        end
+        return v
     else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
+        throw_diag_outofboundserror(n, size(M))
     end
 end
 function diag(M::SymTridiagonal, n::Integer=0)
@@ -203,8 +219,7 @@ function diag(M::SymTridiagonal, n::Integer=0)
     elseif n <= size(M,1)
         throw(ArgumentError("requested diagonal contains undefined zeros of an array type"))
     else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
+        throw_diag_outofboundserror(n, size(M))
     end
 end
 
@@ -328,7 +343,7 @@ end
 
 # tril and triu
 
-function istriu(M::SymTridiagonal, k::Integer=0)
+Base.@constprop :aggressive function istriu(M::SymTridiagonal, k::Integer=0)
     if k <= -1
         return true
     elseif k == 0
@@ -337,7 +352,7 @@ function istriu(M::SymTridiagonal, k::Integer=0)
         return iszero(_evview(M)) && iszero(M.dv)
     end
 end
-istril(M::SymTridiagonal, k::Integer) = istriu(M, -k)
+Base.@constprop :aggressive istril(M::SymTridiagonal, k::Integer) = istriu(M, -k)
 iszero(M::SymTridiagonal) =  iszero(_evview(M)) && iszero(M.dv)
 isone(M::SymTridiagonal) =  iszero(_evview(M)) && all(isone, M.dv)
 isdiag(M::SymTridiagonal) =  iszero(_evview(M))
@@ -346,8 +361,8 @@ isdiag(M::SymTridiagonal) =  iszero(_evview(M))
 function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least ",
+            lazy"$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < -1
         fill!(M.ev, zero(T))
         fill!(M.dv, zero(T))
@@ -357,7 +372,7 @@ function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
     elseif k == 0
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
-    elseif k >= 1
+    else # if k >= 1
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     end
 end
@@ -365,8 +380,8 @@ end
 function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least ",
+            lazy"$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 1
         fill!(M.ev, zero(T))
         fill!(M.dv, zero(T))
@@ -376,7 +391,7 @@ function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
     elseif k == 0
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
-    elseif k <= -1
+    else # if k <= -1
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     end
 end
@@ -441,7 +456,7 @@ end
     end
 end
 
-@inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
+@inline function getindex(A::SymTridiagonal{T}, i::Int, j::Int) where T
     @boundscheck checkbounds(A, i, j)
     if i == j
         return symmetric((@inbounds A.dv[i]), :U)::symmetric_type(eltype(A.dv))
@@ -454,14 +469,18 @@ end
     end
 end
 
+Base._reverse(A::SymTridiagonal, dims) = reverse!(Matrix(A); dims)
+Base._reverse(A::SymTridiagonal, dims::Colon) = SymTridiagonal(reverse(A.dv), reverse(A.ev))
+Base._reverse!(A::SymTridiagonal, dims::Colon) = (reverse!(A.dv); reverse!(A.ev); A)
+
 @inline function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
     else
-        throw(ArgumentError("cannot set off-diagonal entry ($i, $j)"))
+        throw(ArgumentError(lazy"cannot set off-diagonal entry ($i, $j)"))
     end
-    return x
+    return A
 end
 
 ## Tridiagonal matrices ##
@@ -474,9 +493,9 @@ struct Tridiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
         require_one_based_indexing(dl, d, du)
         n = length(d)
         if (length(dl) != n-1 || length(du) != n-1) && !(length(d) == 0 && length(dl) == 0 && length(du) == 0)
-            throw(ArgumentError(string("cannot construct Tridiagonal from incompatible ",
+            throw(ArgumentError(LazyString("cannot construct Tridiagonal from incompatible ",
                 "lengths of subdiagonal, diagonal and superdiagonal: ",
-                "($(length(dl)), $(length(d)), $(length(du)))")))
+                lazy"($(length(dl)), $(length(d)), $(length(du)))")))
         end
         new{T,V}(dl, d, Base.unalias(dl, du))
     end
@@ -586,15 +605,14 @@ axes(M::Tridiagonal) = (ax = axes(M.d,1); (ax, ax))
 
 function Matrix{T}(M::Tridiagonal) where {T}
     A = Matrix{T}(undef, size(M))
-    n = length(M.d)
-    n == 0 && return A
-    n > 2 && fill!(A, zero(T))
-    for i in 1:n-1
-        A[i,i] = M.d[i]
-        A[i+1,i] = M.dl[i]
-        A[i,i+1] = M.du[i]
-    end
-    A[n,n] = M.d[n]
+    if haszero(T) # optimized path for types with zero(T) defined
+        size(A,1) > 2 && fill!(A, zero(T))
+        copyto!(view(A, diagind(A)), M.d)
+        copyto!(view(A, diagind(A,1)), M.du)
+        copyto!(view(A, diagind(A,-1)), M.dl)
+    else
+        copyto!(A, M)
+    end
     A
 end
 Matrix(M::Tridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
@@ -604,7 +622,13 @@ similar(M::Tridiagonal, ::Type{T}) where {T} = Tridiagonal(similar(M.dl, T), sim
 similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(M.d, T, dims)
 
 # Operations on Tridiagonal matrices
-copyto!(dest::Tridiagonal, src::Tridiagonal) = (copyto!(dest.dl, src.dl); copyto!(dest.d, src.d); copyto!(dest.du, src.du); dest)
+# copyto! for matching axes
+function _copyto_banded!(dest::Tridiagonal, src::Tridiagonal)
+    copyto!(dest.dl, src.dl)
+    copyto!(dest.d, src.d)
+    copyto!(dest.du, src.du)
+    dest
+end
 
 #Elementary operations
 for func in (:conj, :copy, :real, :imag)
@@ -613,13 +637,13 @@ for func in (:conj, :copy, :real, :imag)
     end
 end
 
-adjoint(S::Tridiagonal) = Adjoint(S)
-transpose(S::Tridiagonal) = Transpose(S)
-adjoint(S::Tridiagonal{<:Real}) = Tridiagonal(S.du, S.d, S.dl)
+adjoint(S::Tridiagonal{<:Number}) = Tridiagonal(vec(adjoint(S.du)), vec(adjoint(S.d)), vec(adjoint(S.dl)))
+adjoint(S::Tridiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}}) =
+    Tridiagonal(adjoint(parent(S.du)), adjoint(parent(S.d)), adjoint(parent(S.dl)))
 transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
 permutedims(T::Tridiagonal) = Tridiagonal(T.du, T.d, T.dl)
 function permutedims(T::Tridiagonal, perm)
-    Base.checkdims_perm(T, T, perm)
+    Base.checkdims_perm(axes(T), axes(T), perm)
     NTuple{2}(perm) == (2, 1) ? permutedims(T) : T
 end
 Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
@@ -640,10 +664,14 @@ function diag(M::Tridiagonal{T}, n::Integer=0) where T
     elseif n == 1
         return copyto!(similar(M.du, length(M.du)), M.du)
     elseif abs(n) <= size(M,1)
-        return fill!(similar(M.d, size(M,1)-abs(n)), zero(T))
+        v = similar(M.d, size(M,1)-abs(n))
+        for i in eachindex(v)
+            v[i] = M[BandIndex(n,i)]
+        end
+        return v
     else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
+        throw(ArgumentError(LazyString(lazy"requested diagonal, $n, must be at least $(-size(M, 1)) ",
+            lazy"and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
     end
 end
 
@@ -673,7 +701,7 @@ end
     end
 end
 
-@inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
+@inline function getindex(A::Tridiagonal{T}, i::Int, j::Int) where T
     @boundscheck checkbounds(A, i, j)
     if i == j
         return @inbounds A.d[i]
@@ -686,6 +714,19 @@ end
     end
 end
 
+@inline function getindex(A::Tridiagonal{T}, b::BandIndex) where T
+    @boundscheck checkbounds(A, b)
+    if b.band == 0
+        return @inbounds A.d[b.index]
+    elseif b.band == -1
+        return @inbounds A.dl[b.index]
+    elseif b.band == 1
+        return @inbounds A.du[b.index]
+    else
+        return zero(T)
+    end
+end
+
 @inline function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
@@ -695,10 +736,10 @@ end
     elseif j - i == 1
         @inbounds A.du[i] = x
     elseif !iszero(x)
-        throw(ArgumentError(string("cannot set entry ($i, $j) off ",
-            "the tridiagonal band to a nonzero value ($x)")))
+        throw(ArgumentError(LazyString(lazy"cannot set entry ($i, $j) off ",
+            lazy"the tridiagonal band to a nonzero value ($x)")))
     end
-    return x
+    return A
 end
 
 ## structured matrix methods ##
@@ -706,12 +747,25 @@ function Base.replace_in_print_matrix(A::Tridiagonal,i::Integer,j::Integer,s::Ab
     i==j-1||i==j||i==j+1 ? s : Base.replace_with_centered_mark(s)
 end
 
+# reverse
+
+Base._reverse(A::Tridiagonal, dims) = reverse!(Matrix(A); dims)
+Base._reverse(A::Tridiagonal, dims::Colon) = Tridiagonal(reverse(A.du), reverse(A.d), reverse(A.dl))
+function Base._reverse!(A::Tridiagonal, dims::Colon)
+    n = length(A.du) # == length(A.dl), & always 1-based
+    # reverse and swap A.dl and A.du:
+    @inbounds for i in 1:n
+        A.dl[i], A.du[n+1-i] = A.du[n+1-i], A.dl[i]
+    end
+    reverse!(A.d)
+    return A
+end
 
 #tril and triu
 
 iszero(M::Tridiagonal) = iszero(M.dl) && iszero(M.d) && iszero(M.du)
 isone(M::Tridiagonal) = iszero(M.dl) && all(isone, M.d) && iszero(M.du)
-function istriu(M::Tridiagonal, k::Integer=0)
+Base.@constprop :aggressive function istriu(M::Tridiagonal, k::Integer=0)
     if k <= -1
         return true
     elseif k == 0
@@ -722,7 +776,7 @@ function istriu(M::Tridiagonal, k::Integer=0)
         return iszero(M.dl) && iszero(M.d) && iszero(M.du)
     end
 end
-function istril(M::Tridiagonal, k::Integer=0)
+Base.@constprop :aggressive function istril(M::Tridiagonal, k::Integer=0)
     if k >= 1
         return true
     elseif k == 0
@@ -738,8 +792,8 @@ isdiag(M::Tridiagonal) = iszero(M.dl) && iszero(M.du)
 function tril!(M::Tridiagonal{T}, k::Integer=0) where T
     n = length(M.d)
     if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least ",
+            lazy"$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < -1
         fill!(M.dl, zero(T))
         fill!(M.d, zero(T))
@@ -756,8 +810,8 @@ end
 function triu!(M::Tridiagonal{T}, k::Integer=0) where T
     n = length(M.d)
     if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
+        throw(ArgumentError(LazyString(lazy"the requested diagonal, $k, must be at least ",
+            lazy"$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 1
         fill!(M.dl, zero(T))
         fill!(M.d, zero(T))
@@ -907,7 +961,7 @@ function cholesky(S::SymTridiagonal, ::NoPivot = NoPivot(); check::Bool = true)
         check && checkpositivedefinite(-1)
         return Cholesky(S, 'U', convert(BlasInt, -1))
     end
-    T = choltype(eltype(S))
+    T = choltype(S)
     cholesky!(Hermitian(Bidiagonal{T}(diag(S, 0), diag(S, 1), :U)), NoPivot(); check = check)
 end
 
@@ -925,7 +979,7 @@ function ldiv!(A::Tridiagonal, B::AbstractVecOrMat)
     LinearAlgebra.require_one_based_indexing(B)
     n = size(A, 1)
     if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
+        throw(DimensionMismatch(lazy"matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
     end
     nrhs = size(B, 2)
 
@@ -981,3 +1035,39 @@ function ldiv!(A::Tridiagonal, B::AbstractVecOrMat)
     end
     return B
 end
+
+# combinations of Tridiagonal and Symtridiagonal
+# copyto! for matching axes
+function _copyto_banded!(A::Tridiagonal, B::SymTridiagonal)
+    Bev = _evview(B)
+    A.du .= Bev
+    # Broadcast identity for numbers to access the faster copyto! path
+    # This uses the fact that transpose(x::Number) = x and symmetric(x::Number) = x
+    A.dl .= (eltype(B) <: Number ? identity : transpose).(Bev)
+    A.d .= (eltype(B) <: Number ? identity : symmetric).(B.dv)
+    return A
+end
+function _copyto_banded!(A::SymTridiagonal, B::Tridiagonal)
+    issymmetric(B) || throw(ArgumentError("cannot copy an asymmetric Tridiagonal matrix to a SymTridiagonal"))
+    A.dv .= B.d
+    _evview(A) .= B.du
+    return A
+end
+
+# display
+function show(io::IO, T::Tridiagonal)
+    print(io, "Tridiagonal(")
+    show(io, T.dl)
+    print(io, ", ")
+    show(io, T.d)
+    print(io, ", ")
+    show(io, T.du)
+    print(io, ")")
+end
+function show(io::IO, S::SymTridiagonal)
+    print(io, "SymTridiagonal(")
+    show(io, eltype(S) <: Number ? S.dv : view(S, diagind(S, IndexStyle(S))))
+    print(io, ", ")
+    show(io, S.ev)
+    print(io, ")")
+end
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index 5c052df1ff48e..b75886b8d99fb 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-import Base: copy, adjoint, getindex, show, transpose, one, zero, inv,
+import Base: copy, adjoint, getindex, show, transpose, one, zero, inv, float,
              hcat, vcat, hvcat, ^
 
 """
@@ -124,6 +124,8 @@ conj(J::UniformScaling) = UniformScaling(conj(J.λ))
 real(J::UniformScaling) = UniformScaling(real(J.λ))
 imag(J::UniformScaling) = UniformScaling(imag(J.λ))
 
+float(J::UniformScaling) = UniformScaling(float(J.λ))
+
 transpose(J::UniformScaling) = J
 adjoint(J::UniformScaling) = UniformScaling(conj(J.λ))
 
@@ -159,7 +161,7 @@ isposdef(J::UniformScaling) = isposdef(J.λ)
 (-)(A::AbstractMatrix, J::UniformScaling)   = A + (-J)
 
 # matrix functions
-for f in ( :exp,   :log,
+for f in ( :exp,   :log, :cis,
            :expm1, :log1p,
            :sqrt,  :cbrt,
            :sin,   :cos,   :tan,
@@ -172,6 +174,9 @@ for f in ( :exp,   :log,
            :acsch, :asech, :acoth )
     @eval Base.$f(J::UniformScaling) = UniformScaling($f(J.λ))
 end
+for f in (:sincos, :sincosd)
+    @eval Base.$f(J::UniformScaling) = map(UniformScaling, $f(J.λ))
+end
 
 # Unit{Lower/Upper}Triangular matrices become {Lower/Upper}Triangular under
 # addition with a UniformScaling
diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl
index 19b872d685668..5bfd62b467718 100644
--- a/stdlib/LinearAlgebra/test/abstractq.jl
+++ b/stdlib/LinearAlgebra/test/abstractq.jl
@@ -39,6 +39,12 @@ n = 5
         @test Q'*I ≈ Q.Q'*I rtol=2eps(real(T))
         @test I*Q ≈ Q.Q*I rtol=2eps(real(T))
         @test I*Q' ≈ I*Q.Q' rtol=2eps(real(T))
+        @test Q^3 ≈ Q*Q*Q
+        @test Q^2 ≈ Q*Q
+        @test Q^1 == Q
+        @test Q^(-1) == Q'
+        @test (Q')^(-1) == Q
+        @test (Q')^2 ≈ Q'*Q'
         @test abs(det(Q)) ≈ 1
         @test logabsdet(Q)[1] ≈ 0 atol=2n*eps(real(T))
         y = rand(T, n)
@@ -85,7 +91,7 @@ n = 5
         @test Q * x ≈ Q.Q * x
         @test Q' * x ≈ Q.Q' * x
     end
-    A = rand(Float64, 5, 3)
+    A = randn(Float64, 5, 3)
     F = qr(A)
     Q = MyQ(F.Q)
     Prect = Matrix(F.Q)
@@ -96,6 +102,55 @@ n = 5
     @test Q ≈ Prect
     @test Q ≈ Psquare
     @test Q ≈ F.Q*I
+
+    @testset "similar" begin
+        QS = similar(Q)
+        @test QS isa Matrix{eltype(Q)}
+        @test size(QS) == size(Q)
+
+        QS = similar(Q, Int8)
+        @test QS isa Matrix{Int8}
+        @test size(QS) == size(Q)
+
+        QS = similar(Q, 1)
+        @test QS isa Vector{eltype(Q)}
+        @test size(QS) == (1,)
+
+        QS = similar(Q, Int8, 2)
+        @test QS isa Vector{Int8}
+        @test size(QS) == (2,)
+
+        QS = similar(Q, Int8, ())
+        @test QS isa Array{Int8,0}
+
+        QS = similar(Q, ())
+        @test QS isa Array{eltype(Q),0}
+    end
+
+    # matrix division
+    q, r = F
+    R = randn(Float64, 5, 5)
+    @test q / r ≈ Matrix(q) / r
+    @test_throws DimensionMismatch MyQ(q) / r # doesn't have size flexibility
+    @test q / R ≈ collect(q) / R
+    @test copy(r') \ q' ≈ (q / r)'
+    @test_throws DimensionMismatch copy(r') \ MyQ(q')
+    @test r \ q' ≈ r \ Matrix(q)'
+    @test R \ q' ≈ R \ MyQ(q') ≈ R \ collect(q')
+    @test R \ q ≈ R \ MyQ(q) ≈ R \ collect(q)
+    B = copy(A')
+    G = lq(B)
+    l, q = G
+    L = R
+    @test l \ q ≈ l \ Matrix(q)
+    @test_throws DimensionMismatch l \ MyQ(q)
+    @test L \ q ≈ L \ collect(q)
+    @test q' / copy(l') ≈ (l \ q)'
+    @test_throws DimensionMismatch MyQ(q') / copy(l')
+    @test q' / l ≈ Matrix(q)' / l
+    @test q' / L ≈ MyQ(q') / L ≈ collect(q)' / L
+    @test q / L ≈ Matrix(q) / L
+    @test MyQ(q) / L ≈ collect(q) / L
 end
 
 end # module
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
index 2c533af37f912..6cf2ff9ada09c 100644
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ b/stdlib/LinearAlgebra/test/adjtrans.jl
@@ -532,6 +532,11 @@ end
     @test String(take!(io)) == "transpose(::Matrix{Float64})"
 end
 
+@testset "show" begin
+    @test repr(adjoint([1,2,3])) == "adjoint([1, 2, 3])"
+    @test repr(transpose([1f0,2f0])) == "transpose(Float32[1.0, 2.0])"
+end
+
 @testset "strided transposes" begin
     for t in (Adjoint, Transpose)
         @test strides(t(rand(3))) == (3, 1)
@@ -703,4 +708,14 @@ end
     @test B == At
 end
 
+@testset "error message in transpose" begin
+    v = zeros(2)
+    A = zeros(1,1)
+    B = zeros(2,3)
+    for (t1, t2) in Any[(A, v), (v, A), (A, B)]
+        @test_throws "axes of the destination are incompatible with that of the source" transpose!(t1, t2)
+        @test_throws "axes of the destination are incompatible with that of the source" adjoint!(t1, t2)
+    end
+end
+
 end # module TestAdjointTranspose
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index 8d48c42a7f7ea..ef50658a642fb 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -124,6 +124,9 @@ Random.seed!(1)
         Bl = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'L')
         @test_throws ArgumentError Bu[5, 4] = 1
         @test_throws ArgumentError Bl[4, 5] = 1
+
+        # setindex should return the destination
+        @test setindex!(ubd, 1, 1, 1) === ubd
     end
 
     @testset "isstored" begin
@@ -143,11 +146,9 @@ Random.seed!(1)
 
     @testset "show" begin
         BD = Bidiagonal(dv, ev, :U)
-        dstring = sprint(Base.print_matrix,BD.dv')
-        estring = sprint(Base.print_matrix,BD.ev')
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n super:$estring"
+        @test sprint(show,BD) == "Bidiagonal($(repr(dv)), $(repr(ev)), :U)"
         BD = Bidiagonal(dv,ev,:L)
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n sub:$estring"
+        @test sprint(show,BD) == "Bidiagonal($(repr(dv)), $(repr(ev)), :L)"
     end
 
     @testset for uplo in (:U, :L)
@@ -166,6 +167,9 @@ Random.seed!(1)
 
         @testset for func in (conj, transpose, adjoint)
             @test func(func(T)) == T
+            if func ∈ (transpose, adjoint)
+                @test func(func(T)) === T
+            end
         end
 
         @testset "permutedims(::Bidiagonal)" begin
@@ -825,6 +829,9 @@ end
         end
     end
 
+    @test diag(BU, -1) == [zeros(size(dv[i+1], 1), size(dv[i],2)) for i in 1:length(dv)-1]
+    @test diag(BL, 1) == [zeros(size(dv[i], 1), size(dv[i+1],2)) for i in 1:length(dv)-1]
+
     M = ones(2,2)
     for n in 0:1
         dv = fill(M, n)
@@ -834,6 +841,26 @@ end
     end
 end
 
+@testset "copyto!" begin
+    ev, dv = [1:4;], [1:5;]
+    B = Bidiagonal(dv, ev, :U)
+    B2 = copyto!(zero(B), B)
+    @test B2 == B
+    for (ul1, ul2) in ((:U, :L), (:L, :U))
+        B3 = Bidiagonal(dv, zero(ev), ul1)
+        B2 = Bidiagonal(zero(dv), zero(ev), ul2)
+        @test copyto!(B2, B3) == B3
+    end
+
+    @testset "mismatched sizes" begin
+        dv2 = [4; @view dv[2:end]]
+        @test copyto!(B, Bidiagonal([4], Int[], :U)) == Bidiagonal(dv2, ev, :U)
+        @test copyto!(B, Bidiagonal([4], Int[], :L)) == Bidiagonal(dv2, ev, :U)
+        @test copyto!(B, Bidiagonal(Int[], Int[], :U)) == Bidiagonal(dv, ev, :U)
+        @test copyto!(B, Bidiagonal(Int[], Int[], :L)) == Bidiagonal(dv, ev, :U)
+    end
+end
+
 @testset "copyto! with UniformScaling" begin
     @testset "Fill" begin
         for len in (4, InfiniteArrays.Infinity())
@@ -884,4 +911,141 @@ end
     @test mul!(C1, B, sv, 1, 2) == mul!(C2, B, v, 1 ,2)
 end
 
+@testset "Reverse operation on Bidiagonal" begin
+    n = 5
+    d = randn(n)
+    e = randn(n - 1)
+    for uplo in (:U, :L)
+        B = Bidiagonal(d, e, uplo)
+        @test reverse(B, dims=1) == reverse(Matrix(B), dims=1)
+        @test reverse(B, dims=2) == reverse(Matrix(B), dims=2)
+        @test reverse(B)::Bidiagonal == reverse(Matrix(B))
+    end
+end
+
+@testset "Matrix conversion for non-numeric" begin
+    B = Bidiagonal(fill(Diagonal([1,3]), 3), fill(Diagonal([1,3]), 2), :U)
+    M = Matrix{eltype(B)}(B)
+    @test M isa Matrix{eltype(B)}
+    @test M == B
+end
+
+@testset "getindex with Integers" begin
+    dv, ev = 1:4, 1:3
+    B = Bidiagonal(dv, ev, :U)
+    @test_throws "invalid index" B[3, true]
+    @test B[1,2] == B[Int8(1),UInt16(2)] == B[big(1), Int16(2)]
+end
+
+@testset "rmul!/lmul! with banded matrices" begin
+    dv, ev = rand(4), rand(3)
+    for A in (Bidiagonal(dv, ev, :U), Bidiagonal(dv, ev, :L))
+        @testset "$(nameof(typeof(B)))" for B in (
+                                Bidiagonal(dv, ev, :U),
+                                Bidiagonal(dv, ev, :L),
+                                Diagonal(dv)
+                        )
+            @test_throws ArgumentError rmul!(B, A)
+            @test_throws ArgumentError lmul!(A, B)
+        end
+    end
+    @testset "non-commutative" begin
+        S32 = SizedArrays.SizedArray{(3,2)}(rand(3,2))
+        S33 = SizedArrays.SizedArray{(3,3)}(rand(3,3))
+        S22 = SizedArrays.SizedArray{(2,2)}(rand(2,2))
+        for uplo in (:L, :U)
+            B = Bidiagonal(fill(S32, 4), fill(S32, 3), uplo)
+            D = Diagonal(fill(S22, size(B,2)))
+            @test rmul!(copy(B), D) ≈ B * D
+            D = Diagonal(fill(S33, size(B,1)))
+            @test lmul!(D, copy(B)) ≈ D * B
+        end
+
+        B = Bidiagonal(fill(S33, 4), fill(S33, 3), :U)
+        D = Diagonal(fill(S32, 4))
+        @test lmul!(B, Array(D)) ≈ B * D
+        B = Bidiagonal(fill(S22, 4), fill(S22, 3), :U)
+        @test rmul!(Array(D), B) ≈ D * B
+    end
+end
+
+@testset "mul with Diagonal" begin
+    for n in 0:4
+        dv, ev = rand(n), rand(max(n-1,0))
+        d = rand(n)
+        for uplo in (:U, :L)
+            A = Bidiagonal(dv, ev, uplo)
+            D = Diagonal(d)
+            M = Matrix(A)
+            S = similar(A, size(A))
+            @test A * D ≈ mul!(S, A, D) ≈ M * D
+            @test D * A ≈ mul!(S, D, A) ≈ D * M
+            @test mul!(copy(S), D, A, 2, 2) ≈ D * M * 2 + S * 2
+            @test mul!(copy(S), A, D, 2, 2) ≈ M * D * 2 + S * 2
+
+            A2 = Bidiagonal(dv, zero(ev), uplo)
+            M2 = Array(A2)
+            S2 = Bidiagonal(copy(dv), copy(ev), uplo == (:U) ? (:L) : (:U))
+            MS2 = Array(S2)
+            @test mul!(copy(S2), D, A2) ≈ D * M2
+            @test mul!(copy(S2), A2, D) ≈ M2 * D
+            @test mul!(copy(S2), A2, D, 2, 2) ≈ M2 * D * 2 + MS2 * 2
+            @test mul!(copy(S2), D, A2, 2, 2) ≈ D * M2 * 2 + MS2 * 2
+        end
+    end
+
+    t1 = SizedArrays.SizedArray{(2,3)}([1 2 3; 3 4 5])
+    t2 = SizedArrays.SizedArray{(3,2)}([1 2; 3 4; 5 6])
+    dv, ev, d = fill(t1, 4), fill(2t1, 3), fill(t2, 4)
+    for uplo in (:U, :L)
+        A = Bidiagonal(dv, ev, uplo)
+        D = Diagonal(d)
+        @test A * D ≈ Array(A) * Array(D)
+        @test D * A ≈ Array(D) * Array(A)
+    end
+end
+
+@testset "conversion to Tridiagonal for immutable bands" begin
+    n = 4
+    dv = FillArrays.Fill(3, n)
+    ev = FillArrays.Fill(2, n-1)
+    z = FillArrays.Fill(0, n-1)
+    dvf = FillArrays.Fill(Float64(3), n)
+    evf = FillArrays.Fill(Float64(2), n-1)
+    zf = FillArrays.Fill(Float64(0), n-1)
+    B = Bidiagonal(dv, ev, :U)
+    @test Tridiagonal{Int}(B) === Tridiagonal(B) === Tridiagonal(z, dv, ev)
+    @test Tridiagonal{Float64}(B) === Tridiagonal(zf, dvf, evf)
+    B = Bidiagonal(dv, ev, :L)
+    @test Tridiagonal{Int}(B) === Tridiagonal(B) === Tridiagonal(ev, dv, z)
+    @test Tridiagonal{Float64}(B) === Tridiagonal(evf, dvf, zf)
+end
+
+@testset "off-band indexing error" begin
+    B = Bidiagonal(Vector{BigInt}(undef, 4), Vector{BigInt}(undef,3), :L)
+    @test_throws "cannot set entry" B[1,2] = 4
+end
+
+@testset "mul with empty arrays" begin
+    A = zeros(5,0)
+    B = Bidiagonal(zeros(0), zeros(0), :U)
+    BL = Bidiagonal(zeros(5), zeros(4), :U)
+    @test size(A * B) == size(A)
+    @test size(BL * A) == size(A)
+    @test size(B * B) == size(B)
+    C = similar(A)
+    @test mul!(C, A, B) == A * B
+    @test mul!(C, BL, A) == BL * A
+    @test mul!(similar(B), B, B) == B * B
+    @test mul!(similar(B, size(B)), B, B) == B * B
+
+    v = zeros(size(B,2))
+    @test size(B * v) == size(v)
+    @test mul!(similar(v), B, v) == B * v
+
+    D = Diagonal(zeros(size(B,2)))
+    @test size(B * D) == size(D * B) == size(D)
+    @test mul!(similar(D), B, D) == mul!(similar(D), D, B) == B * D
+end
+
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
index d2305844db63e..68c519d1197ed 100644
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/test/bunchkaufman.jl
@@ -114,7 +114,7 @@ bimint = rand(1:5, n, 2)
                 bc1 = bunchkaufman(Symmetric(asym, uplo))
                 @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ asym[bc1.p, bc1.p]
                 @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ bc1.P*asym*transpose(bc1.P)
-                @test_throws ErrorException bc1.Z
+                @test_throws FieldError bc1.Z
                 @test_throws ArgumentError uplo === :L ? bc1.U : bc1.L
             end
             # test Base.iterate
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index a795eb8d44a03..00bfc18a21638 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -6,20 +6,26 @@ using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted,
     PosDefException, RankDeficientException, chkfullrank
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+
+isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
+using .Main.Quaternions
+
 function unary_ops_tests(a, ca, tol; n=size(a, 1))
     @test inv(ca)*a ≈ Matrix(I, n, n)
     @test a*inv(ca) ≈ Matrix(I, n, n)
     @test abs((det(ca) - det(a))/det(ca)) <= tol # Ad hoc, but statistically verified, revisit
-    @test logdet(ca) ≈ logdet(a)
-    @test logdet(ca) ≈ log(det(ca))  # logdet is less likely to overflow
+    @test logdet(ca) ≈ logdet(a) broken = eltype(a) <: Quaternion
+    @test logdet(ca) ≈ log(det(ca)) # logdet is less likely to overflow
     logabsdet_ca = logabsdet(ca)
     logabsdet_a = logabsdet(a)
     @test logabsdet_ca[1] ≈ logabsdet_a[1]
     @test logabsdet_ca[2] ≈ logabsdet_a[2]
     @test isposdef(ca)
-    @test_throws ErrorException ca.Z
+    @test_throws FieldError ca.Z
     @test size(ca) == size(a)
     @test Array(copy(ca)) ≈ a
+    @test tr(ca) ≈ tr(a) skip=ca isa CholeskyPivoted
 end
 
 function factor_recreation_tests(a_U, a_L)
@@ -52,16 +58,22 @@ end
     breal = randn(n,2)/2
     bimg  = randn(n,2)/2
 
-    for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-        a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-        a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
+    for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Complex{BigFloat}, Quaternion{Float64}, Int)
+        a = if eltya == Int
+            rand(1:7, n, n)
+        elseif eltya <: Real
+            convert(Matrix{eltya}, areal)
+        elseif eltya <: Complex
+            convert(Matrix{eltya}, complex.(areal, aimg))
+        else
+            convert(Matrix{eltya}, Quaternion.(areal, aimg, a2real, a2img))
+        end
 
         ε = εa = eps(abs(float(one(eltya))))
 
         # Test of symmetric pos. def. strided matrix
-        apd  = a'*a
-        @inferred cholesky(apd)
-        capd  = factorize(apd)
+        apd  = Matrix(Hermitian(a'*a))
+        capd  = @inferred cholesky(apd)
         r     = capd.U
         κ     = cond(apd, 1) #condition number
 
@@ -69,7 +81,7 @@ end
         if eltya != Int
             @test Factorization{eltya}(capd) === capd
             if eltya <: Real
-                @test Array(Factorization{complex(eltya)}(capd)) ≈ Array(factorize(complex(apd)))
+                @test Array(Factorization{complex(eltya)}(capd)) ≈ Array(cholesky(complex(apd)))
                 @test eltype(Factorization{complex(eltya)}(capd)) == complex(eltya)
             end
         end
@@ -86,16 +98,16 @@ end
         #but only with Random.seed!(1234321) set before the loops.
         E = abs.(apd - r'*r)
         for i=1:n, j=1:n
-            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
+            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*sqrt(real(apd[i,i]*apd[j,j]))
         end
         E = abs.(apd - Matrix(capd))
         for i=1:n, j=1:n
-            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
+            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*sqrt(real(apd[i,i]*apd[j,j]))
         end
         @test LinearAlgebra.issuccess(capd)
         @inferred(logdet(capd))
 
-        apos = apd[1,1]
+        apos = real(apd[1,1])
         @test all(x -> x ≈ √apos, cholesky(apos).factors)
 
         # Test cholesky with Symmetric/Hermitian upper/lower
@@ -131,7 +143,7 @@ end
                 @test Matrix(@inferred cholesky(Symmetric(S, uplo))) ≈ S
             end
         end
-        @test Matrix(cholesky(S).U) ≈ [2 -1; 0 sqrt(eltya(3))] / sqrt(eltya(2))
+        @test Matrix(cholesky(S).U) ≈ [2 -1; 0 float(eltya)(sqrt(real(eltya)(3)))] / float(eltya)(sqrt(real(eltya)(2)))
         @test Matrix(cholesky(S)) ≈ S
 
         # test extraction of factor and re-creating original matrix
@@ -142,17 +154,25 @@ end
         end
 
         #pivoted upper Cholesky
-        if eltya != BigFloat
-            cpapd = cholesky(apdh, RowMaximum())
-            unary_ops_tests(apdh, cpapd, ε*κ*n)
+        for tol in (0.0, -1.0), APD in (apdh, apdhL)
+            cpapd = cholesky(APD, RowMaximum(), tol=tol)
+            unary_ops_tests(APD, cpapd, ε*κ*n)
             @test rank(cpapd) == n
-            @test all(diff(diag(real(cpapd.factors))).<=0.) # diagonal should be non-increasing
+            @test all(diff(real(diag(cpapd.factors))).<=0.) # diagonal should be non-increasing
 
             @test cpapd.P*cpapd.L*cpapd.U*cpapd.P' ≈ apd
         end
 
         for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-            b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
+            b = if eltya <: Quaternion
+                convert(Matrix{eltya}, Quaternion.(breal, bimg, bimg, bimg))
+            elseif eltyb == Int
+                rand(1:5, n, 2)
+            elseif eltyb <: Complex
+                convert(Matrix{eltyb}, complex.(breal, bimg))
+            elseif eltyb <: Real
+                convert(Matrix{eltyb}, breal)
+            end
             εb = eps(abs(float(one(eltyb))))
             ε = max(εa,εb)
 
@@ -166,29 +186,30 @@ end
 
                 @test norm(a*(capd\(a'*b)) - b,1)/norm(b,1) <= ε*κ*n # Ad hoc, revisit
 
-                if eltya != BigFloat && eltyb != BigFloat
-                    lapd = cholesky(apdhL)
-                    @test norm(apd * (lapd\b) - b)/norm(b) <= ε*κ*n
-                    @test norm(apd * (lapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-                end
+                lapd = cholesky(apdhL)
+                @test norm(apd * (lapd\b) - b)/norm(b) <= ε*κ*n
+                @test norm(apd * (lapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
 
-                if eltya != BigFloat && eltyb != BigFloat # Note! Need to implement pivoted Cholesky decomposition in julia
+                cpapd = cholesky(apdh, RowMaximum())
+                @test norm(apd * (cpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
+                @test norm(apd * (cpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
 
-                    cpapd = cholesky(apdh, RowMaximum())
-                    @test norm(apd * (cpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
-                    @test norm(apd * (cpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-
-                    lpapd = cholesky(apdhL, RowMaximum())
-                    @test norm(apd * (lpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
-                    @test norm(apd * (lpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-                end
+                lpapd = cholesky(apdhL, RowMaximum())
+                @test norm(apd * (lpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
+                @test norm(apd * (lpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
             end
         end
 
         for eltyb in (Float64, ComplexF64)
             Breal = convert(Matrix{BigFloat}, randn(n,n)/2)
             Bimg  = convert(Matrix{BigFloat}, randn(n,n)/2)
-            B = (eltya <: Complex || eltyb <: Complex) ? complex.(Breal, Bimg) : Breal
+            B = if eltya <: Quaternion
+                Quaternion.(Float64.(Breal), Float64.(Bimg), Float64.(Bimg), Float64.(Bimg))
+            elseif eltya <: Complex || eltyb <: Complex
+                complex.(Breal, Bimg)
+            else
+                Breal
+            end
             εb = eps(abs(float(one(eltyb))))
             ε = max(εa,εb)
 
@@ -200,20 +221,24 @@ end
                 ldiv!(capd, BB)
                 @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                if eltya != BigFloat
-                    cpapd = cholesky(apdh, RowMaximum())
-                    BB = copy(B)
-                    ldiv!(cpapd, BB)
-                    @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                end
+                cpapd = cholesky(apdh, RowMaximum())
+                BB = copy(B)
+                ldiv!(cpapd, BB)
+                @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
+                @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
             end
         end
 
         @testset "solve with generic Cholesky" begin
             Breal = convert(Matrix{BigFloat}, randn(n,n)/2)
             Bimg  = convert(Matrix{BigFloat}, randn(n,n)/2)
-            B = eltya <: Complex ? complex.(Breal, Bimg) : Breal
+            B = if eltya <: Quaternion
+                eltya.(Breal, Bimg, Bimg, Bimg)
+            elseif eltya <: Complex
+                complex.(Breal, Bimg)
+            else
+                Breal
+            end
             εb = eps(abs(float(one(eltype(B)))))
             ε = max(εa,εb)
 
@@ -221,28 +246,26 @@ end
 
                 # Test error bound on linear solver: LAWNS 14, Theorem 2.1
                 # This is a surprisingly loose bound
-                cpapd = cholesky(eltya <: Complex ? apdh : apds)
+                cpapd = cholesky(eltya <: Real ? apds : apdh)
                 BB = copy(B)
                 rdiv!(BB, cpapd)
                 @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                cpapd = cholesky(eltya <: Complex ? apdhL : apdsL)
+                cpapd = cholesky(eltya <: Real ? apdsL : apdhL)
+                BB = copy(B)
+                rdiv!(BB, cpapd)
+                @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
+                @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
+                cpapd = cholesky(eltya <: Real ? apds : apdh, RowMaximum())
+                BB = copy(B)
+                rdiv!(BB, cpapd)
+                @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
+                @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
+                cpapd = cholesky(eltya <: Real ? apdsL : apdhL, RowMaximum())
                 BB = copy(B)
                 rdiv!(BB, cpapd)
                 @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                if eltya != BigFloat
-                    cpapd = cholesky(eltya <: Complex ? apdh : apds, RowMaximum())
-                    BB = copy(B)
-                    rdiv!(BB, cpapd)
-                    @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    cpapd = cholesky(eltya <: Complex ? apdhL : apdsL, RowMaximum())
-                    BB = copy(B)
-                    rdiv!(BB, cpapd)
-                    @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                end
             end
         end
         if eltya <: BlasFloat
@@ -270,22 +293,32 @@ end
         @test_throws PosDefException cholesky!(copy(M))
         @test_throws PosDefException cholesky(M; check = true)
         @test_throws PosDefException cholesky!(copy(M); check = true)
-        @test !LinearAlgebra.issuccess(cholesky(M; check = false))
-        @test !LinearAlgebra.issuccess(cholesky!(copy(M); check = false))
+        @test !issuccess(cholesky(M; check = false))
+        @test !issuccess(cholesky!(copy(M); check = false))
     end
-    if T !== BigFloat # generic pivoted cholesky is not implemented
-        for M in (A, Hermitian(A), B)
-            @test_throws RankDeficientException cholesky(M, RowMaximum())
-            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
-            @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
-            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
-            @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
-            @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
-            C = cholesky(M, RowMaximum(); check = false)
-            @test_throws RankDeficientException chkfullrank(C)
-            C = cholesky!(copy(M), RowMaximum(); check = false)
-            @test_throws RankDeficientException chkfullrank(C)
-        end
+    for M in (A, Hermitian(A)) # hermitian, but not semi-positive definite
+        @test_throws RankDeficientException cholesky(M, RowMaximum())
+        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
+        @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
+        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
+        @test !issuccess(cholesky(M, RowMaximum(); check = false))
+        @test !issuccess(cholesky!(copy(M), RowMaximum(); check = false))
+        C = cholesky(M, RowMaximum(); check = false)
+        @test_throws RankDeficientException chkfullrank(C)
+        C = cholesky!(copy(M), RowMaximum(); check = false)
+        @test_throws RankDeficientException chkfullrank(C)
+    end
+    for M in (B,) # not hermitian
+        @test_throws PosDefException(-1) cholesky(M, RowMaximum())
+        @test_throws PosDefException(-1) cholesky!(copy(M), RowMaximum())
+        @test_throws PosDefException(-1) cholesky(M, RowMaximum(); check = true)
+        @test_throws PosDefException(-1) cholesky!(copy(M), RowMaximum(); check = true)
+        @test !issuccess(cholesky(M, RowMaximum(); check = false))
+        @test !issuccess(cholesky!(copy(M), RowMaximum(); check = false))
+        C = cholesky(M, RowMaximum(); check = false)
+        @test_throws RankDeficientException chkfullrank(C)
+        C = cholesky!(copy(M), RowMaximum(); check = false)
+        @test_throws RankDeficientException chkfullrank(C)
     end
     @test !isposdef(A)
     str = sprint((io, x) -> show(io, "text/plain", x), cholesky(A; check = false))
@@ -358,7 +391,7 @@ end
         0.11192755545114 - 0.1603741874112385im 0.8439562576196216 + 1.0850814110398734im
         -1.0568488936791578 - 0.06025820467086475im 0.12696236014017806 - 0.09853584666755086im]
     cholesky(Hermitian(apd, :L), RowMaximum()) \ b
-    r = factorize(apd).U
+    r = cholesky(apd).U
     E = abs.(apd - r'*r)
     ε = eps(abs(float(one(ComplexF32))))
     n = 10
@@ -367,10 +400,6 @@ end
     end
 end
 
-@testset "fail for non-BLAS element types" begin
-    @test_throws ArgumentError cholesky!(Hermitian(rand(Float16, 5,5)), RowMaximum())
-end
-
 @testset "cholesky Diagonal" begin
     # real
     d = abs.(randn(3)) .+ 0.1
@@ -381,15 +410,28 @@ end
     @test CD.U ≈ Diagonal(.√d) ≈ CM.U
     @test D ≈ CD.L * CD.U
     @test CD.info == 0
+    CD = cholesky(D, RowMaximum())
+    CM = cholesky(Matrix(D), RowMaximum())
+    @test CD isa CholeskyPivoted{Float64}
+    @test CD.U ≈ Diagonal(.√sort(d, rev=true)) ≈ CM.U
+    @test D ≈ Matrix(CD)
+    @test CD.info == 0
 
     F = cholesky(Hermitian(I(3)))
     @test F isa Cholesky{Float64,<:Diagonal}
     @test Matrix(F) ≈ I(3)
+    F = cholesky(I(3), RowMaximum())
+    @test F isa CholeskyPivoted{Float64,<:Diagonal}
+    @test Matrix(F) ≈ I(3)
 
     # real, failing
     @test_throws PosDefException cholesky(Diagonal([1.0, -2.0]))
+    @test_throws RankDeficientException cholesky(Diagonal([1.0, -2.0]), RowMaximum())
     Dnpd = cholesky(Diagonal([1.0, -2.0]); check = false)
     @test Dnpd.info == 2
+    Dnpd = cholesky(Diagonal([1.0, -2.0]), RowMaximum(); check = false)
+    @test Dnpd.info == 1
+    @test Dnpd.rank == 1
 
     # complex
     D = complex(D)
@@ -399,15 +441,33 @@ end
     @test CD.U ≈ Diagonal(.√d) ≈ CM.U
     @test D ≈ CD.L * CD.U
     @test CD.info == 0
+    CD = cholesky(D, RowMaximum())
+    CM = cholesky(Matrix(D), RowMaximum())
+    @test CD isa CholeskyPivoted{ComplexF64,<:Diagonal}
+    @test CD.U ≈ Diagonal(.√sort(d, by=real, rev=true)) ≈ CM.U
+    @test D ≈ Matrix(CD)
+    @test CD.info == 0
 
     # complex, failing
     D[2, 2] = 0.0 + 0im
     @test_throws PosDefException cholesky(D)
+    @test_throws RankDeficientException cholesky(D, RowMaximum())
     Dnpd = cholesky(D; check = false)
     @test Dnpd.info == 2
+    Dnpd = cholesky(D, RowMaximum(); check = false)
+    @test Dnpd.info == 1
+    @test Dnpd.rank == 2
 
     # InexactError for Int
     @test_throws InexactError cholesky!(Diagonal([2, 1]))
+
+    # tolerance
+    D = Diagonal([0.5, 1])
+    @test_throws RankDeficientException cholesky(D, RowMaximum(), tol=nextfloat(0.5))
+    CD = cholesky(D, RowMaximum(), tol=nextfloat(0.5), check=false)
+    @test rank(CD) == 1
+    @test !issuccess(CD)
+    @test Matrix(cholesky(D, RowMaximum(), tol=prevfloat(0.5))) ≈ D
 end
 
 @testset "Cholesky for AbstractMatrix" begin
@@ -546,6 +606,38 @@ end
     @test det(B)  ≈  det(A) atol=eps()
     @test logdet(B)  ==  -Inf
     @test logabsdet(B)[1] == -Inf
- end
+end
+
+@testset "partly initialized factors" begin
+    @testset for uplo in ('U', 'L')
+        M = Matrix{BigFloat}(undef, 2, 2)
+        M[1,1] = M[2,2] = M[1+(uplo=='L'), 1+(uplo=='U')] = 3
+        C = Cholesky(M, uplo, 0)
+        @test C == C
+        @test C.L == C.U'
+        # parameters are arbitrary
+        C = CholeskyPivoted(M, uplo, [1,2], 2, 0.0, 0)
+        @test C.L == C.U'
+    end
+end
+
+@testset "diag" begin
+    for T in (Float64, ComplexF64), k in (0, 1, -3), uplo in (:U, :L)
+        A = randn(T, 100, 100)
+        P = Hermitian(A' * A, uplo)
+        C = cholesky(P)
+        @test diag(P, k) ≈ diag(C, k)
+    end
+end
+
+@testset "cholesky_of_cholesky" begin
+    for T in (Float64, ComplexF64), uplo in (:U, :L)
+        A = randn(T, 100, 100)
+        P = Hermitian(A' * A, uplo)
+        C = cholesky(P)
+        CC = cholesky(C)
+        @test C == CC
+    end
+end
 
 end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index afc1df817a544..1d43d76899392 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -1285,4 +1285,20 @@ end
     @test eltype(A) == eltype(T)
 end
 
+@testset "tr" begin
+    @testset "block matrices" begin
+        S = [1 2; 3 4]
+        M = fill(S, 3, 3)
+        @test tr(M) == 3S
+        @test tr(view(M, :, :)) == 3S
+        @test tr(view(M, axes(M)...)) == 3S
+    end
+    @testset "avoid promotion" begin
+        A = Int8[1 3; 2 4]
+        @test tr(A) === Int8(5)
+        @test tr(view(A, :, :)) === Int8(5)
+        @test tr(view(A, axes(A)...)) === Int8(5)
+    end
+end
+
 end # module TestDense
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index 18abc6940eded..83d5e4fcdf170 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -61,6 +61,16 @@ Random.seed!(1)
         @test_throws MethodError convert(Diagonal, [1,2,3,4])
         @test_throws DimensionMismatch convert(Diagonal, [1 2 3 4])
         @test_throws InexactError convert(Diagonal, ones(2,2))
+
+        # Test reversing
+        # Test reversing along rows
+        @test reverse(D, dims=1) == reverse(Matrix(D), dims=1)
+
+        # Test reversing along columns
+        @test reverse(D, dims=2) == reverse(Matrix(D), dims=2)
+
+        # Test reversing the entire matrix
+        @test reverse(D)::Diagonal == reverse(Matrix(D)) == reverse!(copy(D))
     end
 
     @testset "Basic properties" begin
@@ -607,6 +617,15 @@ end
             @test_throws ArgumentError D[i, j] = 1
         end
     end
+    # setindex should return the destination
+    @test setindex!(D, 1, 1, 1) === D
+end
+
+@testset "Test reverse" begin
+    D = Diagonal(randn(5))
+    @test reverse(D, dims=1) == reverse(Matrix(D), dims=1)
+    @test reverse(D, dims=2) == reverse(Matrix(D), dims=2)
+    @test reverse(D)::Diagonal == reverse(Matrix(D))
 end
 
 @testset "inverse" begin
@@ -762,6 +781,9 @@ end
     @test transpose(Dherm) == Diagonal([[1 1-im; 1+im 1], [1 1-im; 1+im 1]])
     @test adjoint(Dsym) == Diagonal([[1 1-im; 1-im 1], [1 1-im; 1-im 1]])
     @test transpose(Dsym) == Dsym
+    @test diag(D, 0) == diag(D) == [[1 2; 3 4], [1 2; 3 4]]
+    @test diag(D, 1) == diag(D, -1) == [zeros(Int,2,2)]
+    @test diag(D, 2) == diag(D, -2) == []
 
     v = [[1, 2], [3, 4]]
     @test Dherm' * v == Dherm * v
@@ -826,7 +848,7 @@ end
     @test rdiv!(copy(B), D) ≈ B * Diagonal(inv.(D.diag))
 end
 
-@testset "multiplication with Symmetric/Hermitian" begin
+@testset "multiplication/division with Symmetric/Hermitian" begin
     for T in (Float64, ComplexF64)
         D = Diagonal(randn(T, n))
         A = randn(T, n, n); A = A'A
@@ -839,6 +861,10 @@ end
             @test *(transform1(D), transform2(H)) ≈ *(transform1(Matrix(D)), transform2(Matrix(H)))
             @test *(transform1(S), transform2(D)) ≈ *(transform1(Matrix(S)), transform2(Matrix(D)))
             @test *(transform1(S), transform2(H)) ≈ *(transform1(Matrix(S)), transform2(Matrix(H)))
+            @test (transform1(H)/D) * D ≈ transform1(H)
+            @test (transform1(S)/D) * D ≈ transform1(S)
+            @test D * (D\transform2(H)) ≈ transform2(H)
+            @test D * (D\transform2(S)) ≈ transform2(S)
         end
     end
 end
@@ -1210,6 +1236,11 @@ Base.size(::SMatrix1) = (1, 1)
     @test C isa Matrix{SMatrix1{String}}
 end
 
+@testset "show" begin
+    @test repr(Diagonal([1,2])) == "Diagonal([1, 2])"  # 2-arg show
+    @test contains(repr(MIME"text/plain"(), Diagonal([1,2])), "⋅  2")  # 3-arg show
+end
+
 @testset "copyto! with UniformScaling" begin
     @testset "Fill" begin
         for len in (4, InfiniteArrays.Infinity())
@@ -1277,4 +1308,58 @@ end
     @test c == Diagonal([2,2,2,2])
 end
 
+@testset "uppertriangular/lowertriangular" begin
+    D = Diagonal([1,2])
+    @test LinearAlgebra.uppertriangular(D) === D
+    @test LinearAlgebra.lowertriangular(D) === D
+end
+
+@testset "mul/div with an adjoint vector" begin
+    A = [1.0;;]
+    x = [1.0]
+    yadj = Diagonal(A) \ x'
+    @test typeof(yadj) == typeof(x')
+    @test yadj == x'
+    yadj = Diagonal(A) * x'
+    @test typeof(yadj) == typeof(x')
+    @test yadj == x'
+end
+
+@testset "Matrix conversion for non-numeric" begin
+    D = Diagonal(fill(Diagonal([1,3]), 2))
+    M = Matrix{eltype(D)}(D)
+    @test M isa Matrix{eltype(D)}
+    @test M == D
+end
+
+@testset "rmul!/lmul! with banded matrices" begin
+    @testset "$(nameof(typeof(B)))" for B in (
+                            Bidiagonal(rand(4), rand(3), :L),
+                            Tridiagonal(rand(3), rand(4), rand(3))
+                    )
+        BA = Array(B)
+        D = Diagonal(rand(size(B,1)))
+        DA = Array(D)
+        @test rmul!(copy(B), D) ≈ B * D ≈ BA * DA
+        @test lmul!(D, copy(B)) ≈ D * B ≈ DA * BA
+    end
+end
+
+@testset "+/- with block Symmetric/Hermitian" begin
+    for p in ([1 2; 3 4], [1 2+im; 2-im 4+2im])
+        m = SizedArrays.SizedArray{(2,2)}(p)
+        D = Diagonal(fill(m, 2))
+        for T in (Symmetric, Hermitian)
+            S = T(fill(m, 2, 2))
+            @test D + S == Array(D) + Array(S)
+            @test S + D == Array(S) + Array(D)
+        end
+    end
+end
+
+@testset "bounds-check with CartesianIndex ranges" begin
+    D = Diagonal(1:typemax(Int))
+    @test checkbounds(Bool, D, diagind(D, IndexCartesian()))
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
index 73d9147a82a09..a82c745436009 100644
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ b/stdlib/LinearAlgebra/test/eigen.jl
@@ -80,7 +80,7 @@ aimg  = randn(n,n)/2
             @test eigvecs(asym_sg, ASG2) == f.vectors
             @test eigvals(f) === f.values
             @test eigvecs(f) === f.vectors
-            @test_throws ErrorException f.Z
+            @test_throws FieldError f.Z
 
             d,v = eigen(asym_sg, ASG2)
             @test d == f.values
@@ -141,7 +141,7 @@ aimg  = randn(n,n)/2
             @test f.values ≈ eigvals(a1_nsg, a2_nsg; sortby = sortfunc)
             @test prod(f.values) ≈ prod(eigvals(a1_nsg/a2_nsg, sortby = sortfunc)) atol=50000ε
             @test eigvecs(a1_nsg, a2_nsg; sortby = sortfunc) == f.vectors
-            @test_throws ErrorException f.Z
+            @test_throws FieldError f.Z
 
             g = eigen(a1_nsg, Diagonal(1:n1))
             @test a1_nsg*g.vectors ≈ (Diagonal(1:n1)*g.vectors) * Diagonal(g.values)
@@ -212,10 +212,22 @@ end
 end
 
 @testset "equality of eigen factorizations" begin
-    A = randn(3, 3)
-    @test eigen(A) == eigen(A)
-    @test hash(eigen(A)) == hash(eigen(A))
-    @test isequal(eigen(A), eigen(A))
+    A1 = Float32[1 0; 0 2]
+    A2 = Float64[1 0; 0 2]
+    EA1 = eigen(A1)
+    EA2 = eigen(A2)
+    @test EA1 == EA2
+    @test hash(EA1) == hash(EA2)
+    @test isequal(EA1, EA2)
+
+    # trivial RHS to ensure that values match exactly
+    B1 = Float32[1 0; 0 1]
+    B2 = Float64[1 0; 0 1]
+    EA1B1 = eigen(A1, B1)
+    EA2B2 = eigen(A2, B2)
+    @test EA1B1 == EA2B2
+    @test hash(EA1B1) == hash(EA2B2)
+    @test isequal(EA1B1, EA2B2)
 end
 
 @testset "Float16" begin
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
index 72233293ff515..f80c5197836a1 100644
--- a/stdlib/LinearAlgebra/test/factorization.jl
+++ b/stdlib/LinearAlgebra/test/factorization.jl
@@ -37,8 +37,8 @@ using Test, LinearAlgebra
         return x isa AbstractArray{Float64} ? Float64.(Float32.(x)) : x
     end...)
 
-    @test F == G broken=!(f === eigen || f === qr)
-    @test isequal(F, G) broken=!(f === eigen || f === qr)
+    @test F == G broken=!(f === eigen || f === qr || f == bunchkaufman || f == cholesky || F isa CholeskyPivoted)
+    @test isequal(F, G) broken=!(f === eigen || f === qr || f == bunchkaufman || f == cholesky || F isa CholeskyPivoted)
     @test hash(F) == hash(G)
 end
 
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 6318b8e405ede..e0a1704913f78 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -571,6 +571,13 @@ end
     @test_broken ismissing(norm(x, 0))
 end
 
+@testset "avoid stackoverflow of norm on AbstractChar" begin
+    @test_throws ArgumentError norm('a')
+    @test_throws ArgumentError norm(['a', 'b'])
+    @test_throws ArgumentError norm("s")
+    @test_throws ArgumentError norm(["s", "t"])
+end
+
 @testset "peakflops" begin
     @test LinearAlgebra.peakflops(1024, eltype=Float32, ntrials=2) > 0
 end
@@ -647,12 +654,64 @@ end
 
 @testset "copytrito!" begin
     n = 10
-    A = rand(n, n)
-    for uplo in ('L', 'U')
-        B = zeros(n, n)
-        copytrito!(B, A, uplo)
-        C = uplo == 'L' ? tril(A) : triu(A)
-        @test B ≈ C
+    @testset "square" begin
+        for A in (rand(n, n), rand(Int8, n, n)), uplo in ('L', 'U')
+            for AA in (A, view(A, reverse.(axes(A))...))
+                C = uplo == 'L' ? tril(AA) : triu(AA)
+                for B in (zeros(n, n), zeros(n+1, n+2))
+                    copytrito!(B, AA, uplo)
+                    @test view(B, 1:n, 1:n) == C
+                end
+            end
+        end
+    end
+    @testset "wide" begin
+        for A in (rand(n, 2n), rand(Int8, n, 2n))
+            for AA in (A, view(A, reverse.(axes(A))...))
+                C = tril(AA)
+                for (M, N) in ((n, n), (n+1, n), (n, n+1), (n+1, n+1))
+                    B = zeros(M, N)
+                    copytrito!(B, AA, 'L')
+                    @test view(B, 1:n, 1:n) == view(C, 1:n, 1:n)
+                end
+                @test_throws DimensionMismatch copytrito!(zeros(n-1, 2n), AA, 'L')
+                C = triu(AA)
+                for (M, N) in ((n, 2n), (n+1, 2n), (n, 2n+1), (n+1, 2n+1))
+                    B = zeros(M, N)
+                    copytrito!(B, AA, 'U')
+                    @test view(B, 1:n, 1:2n) == view(C, 1:n, 1:2n)
+                end
+                @test_throws DimensionMismatch copytrito!(zeros(n+1, 2n-1), AA, 'U')
+            end
+        end
+    end
+    @testset "tall" begin
+        for A in (rand(2n, n), rand(Int8, 2n, n))
+            for AA in (A, view(A, reverse.(axes(A))...))
+                C = triu(AA)
+                for (M, N) in ((n, n), (n+1, n), (n, n+1), (n+1, n+1))
+                    B = zeros(M, N)
+                    copytrito!(B, AA, 'U')
+                    @test view(B, 1:n, 1:n) == view(C, 1:n, 1:n)
+                end
+                @test_throws DimensionMismatch copytrito!(zeros(n-1, n+1), AA, 'U')
+                C = tril(AA)
+                for (M, N) in ((2n, n), (2n, n+1), (2n+1, n), (2n+1, n+1))
+                    B = zeros(M, N)
+                    copytrito!(B, AA, 'L')
+                    @test view(B, 1:2n, 1:n) == view(C, 1:2n, 1:n)
+                end
+                @test_throws DimensionMismatch copytrito!(zeros(n-1, n+1), AA, 'L')
+            end
+        end
+    end
+    @testset "aliasing" begin
+        M = Matrix(reshape(1:36, 6, 6))
+        A = view(M, 1:5, 1:5)
+        A2 = Matrix(A)
+        B = view(M, 2:6, 2:6)
+        copytrito!(B, A, 'U')
+        @test UpperTriangular(B) == UpperTriangular(A2)
     end
 end
 
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index 39ae7ec83a5c3..54dbb70aa2065 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -148,7 +148,7 @@ let n = 10
         @test size(H.Q, 2) == size(A, 2)
         @test size(H.Q) == size(A)
         @test size(H) == size(A)
-        @test_throws ErrorException H.Z
+        @test_throws FieldError H.Z
         @test convert(Array, H) ≈ A
         @test (H.Q * H.H) * H.Q' ≈ A ≈ (Matrix(H.Q) * Matrix(H.H)) * Matrix(H.Q)'
         @test (H.Q' * A) * H.Q ≈ H.H
@@ -202,6 +202,13 @@ let n = 10
     end
 end
 
+@testset "Reverse operation on UpperHessenberg" begin
+    A = UpperHessenberg(randn(5, 5))
+    @test reverse(A, dims=1) == reverse(Matrix(A), dims=1)
+    @test reverse(A, dims=2) == reverse(Matrix(A), dims=2)
+    @test reverse(A) == reverse(Matrix(A))
+end
+
 @testset "hessenberg(::AbstractMatrix)" begin
     n = 10
     A = Tridiagonal(rand(n-1), rand(n), rand(n-1))
@@ -250,4 +257,26 @@ end
     @test axes(S) === (r,r)
 end
 
+@testset "copyto! with aliasing (#39460)" begin
+    M = Matrix(reshape(1:36, 6, 6))
+    A = UpperHessenberg(view(M, 1:5, 1:5))
+    A2 = copy(A)
+    B = UpperHessenberg(view(M, 2:6, 2:6))
+    @test copyto!(B, A) == A2
+end
+
+@testset "getindex with Integers" begin
+    M = reshape(1:9, 3, 3)
+    S = UpperHessenberg(M)
+    @test_throws "invalid index" S[3, true]
+    @test S[1,2] == S[Int8(1),UInt16(2)] == S[big(1), Int16(2)]
+end
+
+@testset "complex Symmetric" begin
+    D = diagm(0=>ComplexF64[1,2])
+    S = Symmetric(D)
+    H = hessenberg(S)
+    @test H.H == D
+end
+
 end # module TestHessenberg
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index 6e12c85204a78..f05d7d99c2437 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -9,6 +9,7 @@ using LinearAlgebra: BlasInt
 @test_throws ArgumentError LinearAlgebra.LAPACK.chkside('Z')
 @test_throws ArgumentError LinearAlgebra.LAPACK.chkdiag('Z')
 @test_throws ArgumentError LinearAlgebra.LAPACK.chktrans('Z')
+@test_throws ArgumentError LinearAlgebra.LAPACK.chkvalidparam(1, "job", 2, (0,1))
 
 @testset "syevr" begin
     Random.seed!(123)
@@ -35,6 +36,12 @@ using LinearAlgebra: BlasInt
         @test vals_test ≈ vals
         @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
         @test_throws DimensionMismatch LAPACK.sygvd!(1, 'V', 'U', copy(Asym), zeros(elty, 6, 6))
+
+        @test_throws "jobz must be one of ('N', 'V'), but 'X' was passed" LAPACK.syevr!('X', Asym)
+        @test_throws "jobz must be one of ('N', 'V'), but 'X' was passed" LAPACK.syev!('X', 'U', Asym)
+        @test_throws "uplo argument must be 'U' (upper) or 'L' (lower), got 'M'" LAPACK.syev!('N', 'M', Asym)
+        @test_throws "jobz must be one of ('N', 'V'), but 'X' was passed" LAPACK.syevd!('X', 'U', Asym)
+        @test_throws "uplo argument must be 'U' (upper) or 'L' (lower), got 'M'" LAPACK.syevd!('N', 'M', Asym)
     end
 end
 
@@ -112,7 +119,9 @@ end
         D = LAPACK.gbtrs!('N',2,1,6,AB,ipiv,D)
         A = diagm(-2 => dl2, -1 => dl, 0 => d, 1 => du)
         @test A\C ≈ D
-        @test_throws DimensionMismatch LAPACK.gbtrs!('N',2,1,6,AB,ipiv,Matrix{elty}(undef,7,6))
+        M = Matrix{elty}(undef,7,6)
+        @test_throws DimensionMismatch LAPACK.gbtrs!('N',2,1,6,AB,ipiv,M)
+        @test_throws ArgumentError LAPACK.gbtrs!('M',2,1,6,AB,ipiv,M)
         @test_throws LinearAlgebra.LAPACKException LAPACK.gbtrf!(2,1,6,zeros(elty,6,6))
     end
 end
@@ -141,9 +150,11 @@ end
         x10, x11 = Vector{LinearAlgebra.BlasInt}.(undef, (10, 11))
         @test_throws DimensionMismatch LAPACK.gels!('N',A10x10,B11x11)
         @test_throws DimensionMismatch LAPACK.gels!('T',A10x10,B11x11)
+        @test_throws ArgumentError LAPACK.gels!('X',A10x10,B11x11)
         @test_throws DimensionMismatch LAPACK.gesv!(A10x10,B11x11)
         @test_throws DimensionMismatch LAPACK.getrs!('N',A10x10,x10,B11x11)
         @test_throws DimensionMismatch LAPACK.getrs!('T',A10x10,x10,B11x11)
+        @test_throws ArgumentError LAPACK.getrs!('X',A10x10,x10,B11x11)
         @test_throws DimensionMismatch LAPACK.getri!(A10x10,x11)
     end
 end
@@ -177,12 +188,20 @@ end
         @test U ≈ lU
         @test S ≈ lS
         @test V' ≈ lVt
+        @test_throws ArgumentError LAPACK.gesvd!('X','S',A)
+        @test_throws ArgumentError LAPACK.gesvd!('S','X',A)
         B = rand(elty,10,10)
         # xggsvd3 replaced xggsvd in LAPACK 3.6.0
         if LAPACK.version() < v"3.6.0"
-            @test_throws DimensionMismatch LAPACK.ggsvd!('S','S','S',A,B)
+            @test_throws DimensionMismatch LAPACK.ggsvd!('N','N','N',A,B)
+            @test_throws ArgumentError LAPACK.ggsvd!('X','N','N',A,B)
+            @test_throws ArgumentError LAPACK.ggsvd!('N','X','N',A,B)
+            @test_throws ArgumentError LAPACK.ggsvd!('N','N','X',A,B)
         else
-            @test_throws DimensionMismatch LAPACK.ggsvd3!('S','S','S',A,B)
+            @test_throws DimensionMismatch LAPACK.ggsvd3!('N','N','N',A,B)
+            @test_throws ArgumentError LAPACK.ggsvd3!('X','N','N',A,B)
+            @test_throws ArgumentError LAPACK.ggsvd3!('N','X','N',A,B)
+            @test_throws ArgumentError LAPACK.ggsvd3!('N','N','X',A,B)
         end
     end
 end
@@ -224,6 +243,7 @@ end
         X = rand(elty,10)
         B,Y,z = LAPACK.gels!('N',copy(A),copy(X))
         @test A\X ≈ Y
+        @test_throws ArgumentError LAPACK.gels!('X',A,X)
     end
 end
 
@@ -252,6 +272,9 @@ end
         fA = eigen(A, sortby=nothing)
         @test fA.values  ≈ Aw
         @test fA.vectors ≈ Avr
+
+        @test_throws ArgumentError LAPACK.geev!('X','V',A)
+        @test_throws ArgumentError LAPACK.geev!('N','X',A)
     end
 end
 
@@ -284,6 +307,7 @@ end
         @test_throws DimensionMismatch LAPACK.gttrs!('N', x11, d, du, x9, y10, b)
         @test_throws DimensionMismatch LAPACK.gttrs!('N', dl, d, x11, x9, y10, b)
         @test_throws DimensionMismatch LAPACK.gttrs!('N', dl, d, du, x9, y10, x11)
+        @test_throws ArgumentError LAPACK.gttrs!('X', dl, d, du, x9, y10, x11)
         A = lu(Tridiagonal(dl,d,du))
         b  = rand(elty,10,5)
         c = copy(b)
@@ -298,10 +322,17 @@ end
         A = rand(elty,10,10)
         A,tau = LAPACK.gelqf!(A)
         @test_throws DimensionMismatch LAPACK.orglq!(A,tau,11)
-        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,zeros(elty,11),rand(elty,10,10))
+        temp = rand(elty,11,11)
+        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,tau,temp)
+        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormlq!('X','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormlq!('R','X',A,tau,temp)
+        temp = zeros(elty,11)
+        B = copy(A)
+        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,temp,B)
+        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormlq!('X','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormlq!('L','X',A,temp,B)
 
         B = copy(A)
         C = LAPACK.orglq!(B,tau)
@@ -312,30 +343,51 @@ end
         @test_throws DimensionMismatch LAPACK.orgqr!(A,tau,11)
         B = copy(A)
         @test LAPACK.orgqr!(B,tau) ≈ LAPACK.ormqr!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,zeros(elty,11),rand(elty,10,10))
+        temp = rand(elty,11,11)
+        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,tau,temp)
+        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormqr!('X','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormqr!('L','X',A,tau,temp)
+        B = copy(A)
+        temp = zeros(elty,11)
+        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,temp,B)
+        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormqr!('X','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormqr!('L','X',A,temp,B)
 
         A = rand(elty,10,10)
         A,tau = LAPACK.geqlf!(A)
         @test_throws DimensionMismatch LAPACK.orgql!(A,tau,11)
         B = copy(A)
         @test LAPACK.orgql!(B,tau) ≈ LAPACK.ormql!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,zeros(elty,11),rand(elty,10,10))
+        temp = rand(elty,11,11)
+        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,tau,temp)
+        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormql!('X','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormql!('L','X',A,tau,temp)
+        temp = zeros(elty,11)
+        B = copy(A)
+        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,temp,B)
+        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormql!('X','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormql!('L','X',A,temp,B)
 
         A = rand(elty,10,10)
         A,tau = LAPACK.gerqf!(A)
         @test_throws DimensionMismatch LAPACK.orgrq!(A,tau,11)
         B = copy(A)
         @test LAPACK.orgrq!(B,tau) ≈ LAPACK.ormrq!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,zeros(elty,11),rand(elty,10,10))
+        temp = rand(elty,11,11)
+        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,tau,temp)
+        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormrq!('X','N',A,tau,temp)
+        @test_throws ArgumentError LAPACK.ormrq!('L','X',A,tau,temp)
+        B = copy(A)
+        temp = zeros(elty,11)
+        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,temp,B)
+        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormrq!('X','N',A,temp,B)
+        @test_throws ArgumentError LAPACK.ormrq!('L','X',A,temp,B)
 
         A = rand(elty,10,11)
         Q = copy(A)
@@ -351,21 +403,29 @@ end
         T = zeros(elty,10,11)
         @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
         @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('X','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('R','X',V,T,C)
 
         C = rand(elty,10,10)
         V = rand(elty,11,10)
         T = zeros(elty,10,10)
         @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
         @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('X','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('L','X',V,T,C)
 
         # test size(T) = (nb,k) ensures 1 <= nb <= k
         T = zeros(elty,10,10)
         V = rand(elty,5,10)
         @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('X','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('L','X',V,T,C)
         C = rand(elty,10,10)
         V = rand(elty,10,10)
         T = zeros(elty,11,10)
         @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('X','N',V,T,C)
+        @test_throws ArgumentError LAPACK.gemqrt!('R','X',V,T,C)
 
         @test_throws DimensionMismatch LAPACK.orghr!(1, 10, C, zeros(elty,11))
     end
@@ -377,8 +437,12 @@ end
         A = A + transpose(A) #symmetric!
         B = copy(A)
         B,ipiv = LAPACK.sytrf!('U',B)
+        @test_throws ArgumentError LAPACK.sytrf!('X',B)
         @test triu(inv(A)) ≈ triu(LAPACK.sytri!('U',B,ipiv)) rtol=eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.sytrs!('U',B,ipiv,rand(elty,11,5))
+        @test_throws ArgumentError LAPACK.sytri!('X',B,ipiv)
+        temp = rand(elty,11,5)
+        @test_throws DimensionMismatch LAPACK.sytrs!('U',B,ipiv,temp)
+        @test_throws ArgumentError LAPACK.sytrs!('X',B,ipiv,temp)
         @test LAPACK.sytrf!('U',zeros(elty,0,0)) == (zeros(elty,0,0),zeros(BlasInt,0),zero(BlasInt))
     end
 
@@ -389,7 +453,10 @@ end
         B = copy(A)
         B,ipiv = LAPACK.sytrf_rook!('U', B)
         @test triu(inv(A)) ≈ triu(LAPACK.sytri_rook!('U', B, ipiv)) rtol=eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.sytrs_rook!('U', B, ipiv, rand(elty, 11, 5))
+        @test_throws ArgumentError LAPACK.sytri_rook!('X', B, ipiv)
+        temp = rand(elty, 11, 5)
+        @test_throws DimensionMismatch LAPACK.sytrs_rook!('U', B, ipiv, temp)
+        @test_throws ArgumentError LAPACK.sytrs_rook!('X', B, ipiv, temp)
         @test LAPACK.sytrf_rook!('U',zeros(elty, 0, 0)) == (zeros(elty, 0, 0),zeros(BlasInt, 0),zero(BlasInt))
         A = rand(elty, 10, 10)
         A = A + transpose(A) #symmetric!
@@ -398,7 +465,9 @@ end
         cnd = cond(A)
         b,A = LAPACK.sysv_rook!('U', A, b)
         @test b ≈ c rtol=eps(cnd)
-        @test_throws DimensionMismatch LAPACK.sysv_rook!('U',A,rand(elty,11))
+        temp = rand(elty,11)
+        @test_throws DimensionMismatch LAPACK.sysv_rook!('U',A,temp)
+        @test_throws ArgumentError LAPACK.sysv_rook!('X',A,temp)
 
         # syconvf_rook error handling
         # way argument is wrong
@@ -416,8 +485,11 @@ end
         A = A + A' #hermitian!
         B = copy(A)
         B,ipiv = LAPACK.hetrf!('U',B)
-        @test_throws DimensionMismatch LAPACK.hetrs!('U',B,ipiv,rand(elty,11,5))
-        @test_throws DimensionMismatch LAPACK.hetrs_rook!('U',B,ipiv,rand(elty,11,5))
+        temp = rand(elty,11,5)
+        @test_throws DimensionMismatch LAPACK.hetrs!('U',B,ipiv,temp)
+        @test_throws ArgumentError LAPACK.hetrs!('X',B,ipiv,temp)
+        @test_throws DimensionMismatch LAPACK.hetrs_rook!('U',B,ipiv,temp)
+        @test_throws ArgumentError LAPACK.hetrs_rook!('X',B,ipiv,temp)
     end
 end
 
@@ -425,11 +497,21 @@ end
     @testset for elty in (Float32, Float64)
         d = rand(elty,10)
         e = rand(elty,9)
-        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,11))
-        @test_throws DimensionMismatch LAPACK.stebz!('A','B',zero(elty),zero(elty),0,0,-1.,d,rand(elty,10))
-        @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,rand(elty,11),zero(elty),zero(elty),0,0)
-        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,11),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
-        @test_throws DimensionMismatch LAPACK.stein!(d,e,zeros(elty,11),zeros(BlasInt,10),zeros(BlasInt,10))
+        temp = rand(elty,11)
+        @test_throws DimensionMismatch LAPACK.stev!('N',d,temp)
+        @test_throws ArgumentError LAPACK.stev!('X',d,temp)
+        temp = rand(elty,10)
+        @test_throws DimensionMismatch LAPACK.stebz!('A','B',zero(elty),zero(elty),0,0,-1.,d,temp)
+        @test_throws ArgumentError LAPACK.stebz!('X','B',zero(elty),zero(elty),0,0,-1.,d,temp)
+        @test_throws ArgumentError LAPACK.stebz!('A','X',zero(elty),zero(elty),0,0,-1.,d,temp)
+        temp11 = rand(elty,11)
+        @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,temp11,zero(elty),zero(elty),0,0)
+        @test_throws ArgumentError LAPACK.stegr!('X','A',d,temp11,zero(elty),zero(elty),0,0)
+        @test_throws ArgumentError LAPACK.stegr!('N','X',d,temp11,zero(elty),zero(elty),0,0)
+        tempblasint10 = zeros(BlasInt,10)
+        tempblasint10_2 = zeros(BlasInt,10)
+        @test_throws DimensionMismatch LAPACK.stein!(d,temp11,temp,tempblasint10,tempblasint10_2)
+        @test_throws DimensionMismatch LAPACK.stein!(d,e,temp11,tempblasint10,tempblasint10_2)
     end
 end
 
@@ -439,7 +521,13 @@ end
         A = triu(A)
         B = copy(A)
         @test inv(A) ≈ LAPACK.trtri!('U','N',B)
-        @test_throws DimensionMismatch LAPACK.trtrs!('U','N','N',B,zeros(elty,11,10))
+        @test_throws ArgumentError LAPACK.trtri!('X','N',B)
+        @test_throws ArgumentError LAPACK.trtri!('U','X',B)
+        temp = zeros(elty,11,10)
+        @test_throws DimensionMismatch LAPACK.trtrs!('U','N','N',B,temp)
+        @test_throws ArgumentError LAPACK.trtrs!('X','N','N',B,temp)
+        @test_throws ArgumentError LAPACK.trtrs!('U','X','N',B,temp)
+        @test_throws ArgumentError LAPACK.trtrs!('U','N','X',B,temp)
     end
 end
 
@@ -479,6 +567,8 @@ end
         LinearAlgebra.LAPACK.larf!('L', v,      τ, C1)
         LinearAlgebra.LAPACK.larf!('R', v, conj(τ), C2)
         @test C ≈ C2*C1
+
+        @test_throws ArgumentError LAPACK.larf!('X', v,      τ, C1)
     end
 end
 
@@ -489,6 +579,8 @@ end
         @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,Z,zeros(elty,11,11),Z)
         @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,Z,Z,zeros(elty,11,11))
         @test_throws DimensionMismatch LAPACK.trsyl!('N','N',Z,Z,zeros(elty,11,11))
+        @test_throws ArgumentError LAPACK.trsyl!('X','N',Z,Z,zeros(elty,11,11))
+        @test_throws ArgumentError LAPACK.trsyl!('N','X',Z,Z,zeros(elty,11,11))
         @test_throws DimensionMismatch LAPACK.tzrzf!(zeros(elty,10,5))
 
         A = triu(rand(elty,4,4))
@@ -508,6 +600,7 @@ end
         b,A = LAPACK.sysv!('U',A,b)
         @test b ≈ c
         @test_throws DimensionMismatch LAPACK.sysv!('U',A,rand(elty,11))
+        @test_throws ArgumentError LAPACK.sysv!('X',A,rand(elty,11))
     end
 end
 
@@ -520,14 +613,17 @@ end
         c = A \ b
         b,A = LAPACK.hesv!('U',A,b)
         @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.hesv!('U',A,rand(elty,11))
+        temp = rand(elty,11)
+        @test_throws DimensionMismatch LAPACK.hesv!('U',A,temp)
+        @test_throws ArgumentError LAPACK.hesv!('X',A,temp)
         A = rand(elty,10,10)
         A = A + A' #hermitian!
         b = rand(elty,10)
         c = A \ b
         b,A = LAPACK.hesv_rook!('U',A,b)
         @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.hesv_rook!('U',A,rand(elty,11))
+        @test_throws DimensionMismatch LAPACK.hesv_rook!('U',A,temp)
+        @test_throws ArgumentError LAPACK.hesv_rook!('X',A,temp)
     end
 end
 
@@ -563,8 +659,12 @@ end
         C = copy(B)
         if elty <: Complex
             @test A\B ≈ LAPACK.pttrs!('U',rdv,ev,C)
-            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,Vector{elty}(undef,10),C)
-            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,ev,Matrix{elty}(undef,11,11))
+            tempvec = Vector{elty}(undef,10)
+            tempmat = Matrix{elty}(undef,11,11)
+            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,tempvec,C)
+            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,ev,tempmat)
+            @test_throws ArgumentError LAPACK.pttrs!('X',rdv,tempvec,C)
+            @test_throws ArgumentError LAPACK.pttrs!('X',rdv,ev,tempmat)
         else
             @test A\B ≈ LAPACK.pttrs!(rdv,ev,C)
             @test_throws DimensionMismatch LAPACK.pttrs!(rdv,Vector{elty}(undef,10),C)
@@ -591,6 +691,8 @@ end
         offsizemat = Matrix{elty}(undef, n+1, n+1)
         @test_throws DimensionMismatch LAPACK.posv!('U', D, offsizemat)
         @test_throws DimensionMismatch LAPACK.potrs!('U', D, offsizemat)
+        @test_throws ArgumentError LAPACK.posv!('X', D, offsizemat)
+        @test_throws ArgumentError LAPACK.potrs!('X', D, offsizemat)
 
         @test LAPACK.potrs!('U',Matrix{elty}(undef,0,0),elty[]) == elty[]
     end
@@ -613,6 +715,10 @@ end
         B = rand(elty,11,11)
         @test_throws DimensionMismatch LAPACK.gges!('V','V',A,B)
         @test_throws DimensionMismatch LAPACK.gges3!('V','V',A,B)
+        @test_throws ArgumentError LAPACK.gges!('X','V',A,B)
+        @test_throws ArgumentError LAPACK.gges3!('X','V',A,B)
+        @test_throws ArgumentError LAPACK.gges!('V','X',A,B)
+        @test_throws ArgumentError LAPACK.gges3!('V','X',A,B)
     end
 end
 
@@ -632,8 +738,14 @@ end
         select,Vln,Vrn = LAPACK.trevc!('B','S',select,copy(T))
         @test Vrn ≈ v
         @test Vln ≈ Vl
-        @test_throws ArgumentError LAPACK.trevc!('V','S',select,copy(T))
-        @test_throws DimensionMismatch LAPACK.trrfs!('U','N','N',T,rand(elty,10,10),rand(elty,10,11))
+        @test_throws ArgumentError LAPACK.trevc!('V','S',select,T)
+        @test_throws ArgumentError LAPACK.trevc!('R','X',select,T)
+        temp1010 = rand(elty,10,10)
+        temp1011 = rand(elty,10,11)
+        @test_throws DimensionMismatch LAPACK.trrfs!('U','N','N',T,temp1010,temp1011)
+        @test_throws ArgumentError LAPACK.trrfs!('X','N','N',T,temp1010,temp1011)
+        @test_throws ArgumentError LAPACK.trrfs!('U','X','N',T,temp1010,temp1011)
+        @test_throws ArgumentError LAPACK.trrfs!('U','N','X',T,temp1010,temp1011)
     end
 end
 
@@ -693,8 +805,26 @@ end
             B = zeros(elty, n, n)
             LinearAlgebra.LAPACK.lacpy!(B, A, uplo)
             C = uplo == 'L' ? tril(A) : (uplo == 'U' ? triu(A) : A)
-            @test B ≈ C
+            @test B == C
+            B = zeros(elty, n+1, n+1)
+            LinearAlgebra.LAPACK.lacpy!(B, A, uplo)
+            C = uplo == 'L' ? tril(A) : (uplo == 'U' ? triu(A) : A)
+            @test view(B, 1:n, 1:n) == C
         end
+        A = rand(elty, n, n+1)
+        B = zeros(elty, n, n)
+        LinearAlgebra.LAPACK.lacpy!(B, A, 'L')
+        @test B == view(tril(A), 1:n, 1:n)
+        B = zeros(elty, n, n+1)
+        LinearAlgebra.LAPACK.lacpy!(B, A, 'U')
+        @test B == triu(A)
+        A = rand(elty, n+1, n)
+        B = zeros(elty, n, n)
+        LinearAlgebra.LAPACK.lacpy!(B, A, 'U')
+        @test B == view(triu(A), 1:n, 1:n)
+        B = zeros(elty, n+1, n)
+        LinearAlgebra.LAPACK.lacpy!(B, A, 'L')
+        @test B == tril(A)
     end
 end
 
@@ -749,4 +879,24 @@ a = zeros(2,0), zeros(0)
     @test_throws DimensionMismatch LinearAlgebra.LAPACK.getrs!('N', A, ipiv, b)
 end
 
+@testset "hetrd ignore non-filled half" begin
+    A = rand(3,3)
+    B = copy(A)
+    B[2,1] = NaN
+    B[3,1] = Inf
+    LAPACK.hetrd!('U', A)
+    LAPACK.hetrd!('U', B)
+    @test UpperTriangular(A) == UpperTriangular(B)
+end
+
+@testset "inference in syev!/syevd!" begin
+    for T in (Float32, Float64), CT in (T, Complex{T})
+        A = rand(CT, 4,4)
+        @inferred (A -> LAPACK.syev!('N', 'U', A))(A)
+        @inferred (A -> LAPACK.syev!('V', 'U', A))(A)
+        @inferred (A -> LAPACK.syevd!('N', 'U', A))(A)
+        @inferred (A -> LAPACK.syevd!('V', 'U', A))(A)
+    end
+end
+
 end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
index 44f920db25557..c3499f7f46fa6 100644
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ b/stdlib/LinearAlgebra/test/lq.jl
@@ -50,7 +50,7 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                     for (ii, lq_obj) in enumerate(lqa)
                         @test ref_obs[ii] == lq_obj
                     end
-                    @test_throws ErrorException lqa.Z
+                    @test_throws FieldError lqa.Z
                     @test Array(copy(adjoint(lqa))) ≈ a'
                     @test q*squareQ(q)' ≈ Matrix(I, n, n)
                     @test l*q ≈ a
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
index a4cbdbe3eb9b9..56a402d70493e 100644
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ b/stdlib/LinearAlgebra/test/lu.jl
@@ -59,7 +59,7 @@ dimg  = randn(n)/2
     κ  = cond(a,1)
     @testset "(Automatic) Square LU decomposition" begin
         lua   = factorize(a)
-        @test_throws ErrorException lua.Z
+        @test_throws FieldError lua.Z
         l,u,p = lua.L, lua.U, lua.p
         ll,ul,pl = @inferred lu(a)
         @test ll * ul ≈ a[pl,:]
@@ -88,7 +88,7 @@ dimg  = randn(n)/2
         lud = @inferred lu(d)
         @test LinearAlgebra.issuccess(lud)
         @test @inferred(lu(lud)) == lud
-        @test_throws ErrorException lud.Z
+        @test_throws FieldError lud.Z
         @test lud.L*lud.U ≈ lud.P*Array(d)
         @test lud.L*lud.U ≈ Array(d)[lud.p,:]
         @test AbstractArray(lud) ≈ d
@@ -486,4 +486,17 @@ end
     LinearAlgebra.generic_lufact!(fill(Inf, 2, 2), check=false)
 end
 
+@testset "lu for empty matrices" begin
+    for T in (Float64, BigFloat)
+        A = fill(T(0.0), 0, 0)
+        v = fill(T(1.0), 0, 10)
+        @test A \ v ≈ lu(A) \ v
+        vt = permutedims(v)
+        @test vt / A ≈ vt / lu(A)
+        B = UpperTriangular(transpose(fill(complex(T(0.0)), 0, 0)'))
+        @test B \ v ≈ v
+        @test vt / B ≈ vt
+    end
+end
+
 end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 5bcbb99a314fd..4c79451ebfc8b 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -30,6 +30,30 @@ mul_wrappers = [
     h(A) = LinearAlgebra.wrap(LinearAlgebra._unwrap(A), LinearAlgebra.wrapper_char(A))
     @test @inferred(h(transpose(A))) === transpose(A)
     @test @inferred(h(adjoint(A))) === transpose(A)
+
+    M = rand(2,2)
+    for S in (Symmetric(M), Hermitian(M))
+        @test @inferred((A -> LinearAlgebra.wrap(parent(A), LinearAlgebra.wrapper_char(A)))(S)) === Symmetric(M)
+    end
+    M = rand(ComplexF64,2,2)
+    for S in (Symmetric(M), Hermitian(M))
+        @test @inferred((A -> LinearAlgebra.wrap(parent(A), LinearAlgebra.wrapper_char(A)))(S)) === S
+    end
+
+    @testset "WrapperChar" begin
+        @test LinearAlgebra.WrapperChar('c') == 'c'
+        @test LinearAlgebra.WrapperChar('C') == 'C'
+        @testset "constant propagation in uppercase/lowercase" begin
+            v = @inferred (() -> Val(uppercase(LinearAlgebra.WrapperChar('C'))))()
+            @test v isa Val{'C'}
+            v = @inferred (() -> Val(uppercase(LinearAlgebra.WrapperChar('s'))))()
+            @test v isa Val{'S'}
+            v = @inferred (() -> Val(lowercase(LinearAlgebra.WrapperChar('C'))))()
+            @test v isa Val{'c'}
+            v = @inferred (() -> Val(lowercase(LinearAlgebra.WrapperChar('s'))))()
+            @test v isa Val{'s'}
+        end
+    end
 end
 
 @testset "matrices with zero dimensions" begin
@@ -184,6 +208,18 @@ end
         C .= C0 = rand(eltype(C), size(C))
         @test mul!(C, vf, transpose(vf), 2, 3) ≈ 2vf * vf' .+ 3C0
     end
+
+    @testset "zero stride" begin
+        for AAv in (view(AA, StepRangeLen(2,0,size(AA,1)), :),
+                    view(AA, StepRangeLen.(2,0,size(AA))...),
+                    view(complex.(AA, AA), StepRangeLen.(2,0,size(AA))...),)
+            for BB2 in (BB, complex.(BB, BB))
+                C = AAv * BB2
+                @test allequal(C)
+                @test C ≈ Array(AAv) * BB2
+            end
+        end
+    end
 end
 
 @testset "generic_matvecmul for vectors of vectors" begin
@@ -216,6 +252,18 @@ end
     end
 end
 
+@testset "generic_matvecmul for vectors of matrices" begin
+    x = [1 2 3; 4 5 6]
+    A = reshape([x,2x,3x,4x],2,2)
+    b = [x, 2x]
+    for f in (adjoint, transpose)
+        c = f(A) * b
+        for i in eachindex(c)
+            @test c[i] == sum(f(A)[i, j] * b[j] for j in eachindex(b))
+        end
+    end
+end
+
 @testset "generic_matmatmul for matrices of vectors" begin
     B = Matrix{Vector{Int}}(undef, 2, 2)
     B[1, 1] = [1, 2]
@@ -1058,7 +1106,8 @@ end
     end
 end
 
-@testset "Issue #46865: mul!() with non-const alpha, beta" begin
+#46865
+@testset "mul!() with non-const alpha, beta" begin
     f!(C,A,B,alphas,betas) = mul!(C, A, B, alphas[1], betas[1])
     alphas = [1.0]
     betas = [0.5]
@@ -1067,8 +1116,18 @@ end
         B = copy(A)
         C = copy(A)
         f!(C, A, B, alphas, betas)
-        @test_broken (@allocated f!(C, A, B, alphas, betas)) == 0
+        @test (@allocated f!(C, A, B, alphas, betas)) == 0
     end
 end
 
+@testset "vector-matrix multiplication" begin
+    a = [1,2]
+    A = reshape([1,2], 2, 1)
+    B = [1 2]
+    @test a * B ≈ A * B
+    B = reshape([1,2], 2, 1)
+    @test a * B' ≈ A * B'
+    @test a * transpose(B) ≈ A * transpose(B)
+end
+
 end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
index 184971da304f7..b6e9ce3a82743 100644
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ b/stdlib/LinearAlgebra/test/qr.jl
@@ -50,7 +50,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
             @testset "QR decomposition (without pivoting)" begin
                 qra   = @inferred qr(a)
                 q, r  = qra.Q, qra.R
-                @test_throws ErrorException qra.Z
+                @test_throws FieldError qra.Z
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
                 @test q*squareQ(q)' ≈ Matrix(I, a_1, a_1)
                 @test q'*Matrix(1.0I, a_1, a_1)' ≈ squareQ(q)'
@@ -79,7 +79,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
             @testset "Thin QR decomposition (without pivoting)" begin
                 qra   = @inferred qr(a[:, 1:n1], NoPivot())
                 q,r   = qra.Q, qra.R
-                @test_throws ErrorException qra.Z
+                @test_throws FieldError qra.Z
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
                 @test q'*rectangularQ(q) ≈ Matrix(I, a_1, n1)
                 @test q*r ≈ a[:, 1:n1]
@@ -106,7 +106,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
 
                 qrpa  = factorize(a[1:n1,:])
                 q,r = qrpa.Q, qrpa.R
-                @test_throws ErrorException qrpa.Z
+                @test_throws FieldError qrpa.Z
                 p = qrpa.p
                 @test q'*squareQ(q) ≈ Matrix(I, n1, n1)
                 @test q*squareQ(q)' ≈ Matrix(I, n1, n1)
@@ -134,7 +134,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
             @testset "(Automatic) Thin (pivoted) QR decomposition" begin
                 qrpa  = factorize(a[:,1:n1])
                 q,r = qrpa.Q, qrpa.R
-                @test_throws ErrorException qrpa.Z
+                @test_throws FieldError qrpa.Z
                 p = qrpa.p
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
                 @test q*squareQ(q)' ≈ Matrix(I, a_1, a_1)
@@ -504,4 +504,40 @@ end
     @test x ≈ xf
 end
 
+@testset "issue #53451" begin
+    # in the issue it was noted that QR factorizations of zero-column matrices
+    # were possible, but zero row-matrices errored, because LAPACK does not
+    # accept these empty matrices. now, the `geqrt!` call should be forwarded only
+    # if both matrix dimensions are positive.
+
+    for dimA in (0, 1, 2, 4)
+        for F in (Float32, Float64, ComplexF32, ComplexF64, BigFloat)
+            # this should have worked before, Q is square, and R is 0 × 0:
+            A_zero_cols = rand(F, dimA, 0)
+            qr_zero_cols = qr(A_zero_cols)
+            @test size(qr_zero_cols.Q) == (dimA, dimA)
+            @test size(qr_zero_cols.R) == (0, 0)
+            @test qr_zero_cols.Q == LinearAlgebra.I(dimA)
+
+            # this should work now, Q is 0 × 0, and R has `dimA` columns:
+            A_zero_rows = rand(F, 0, dimA)
+            qr_zero_rows = qr(A_zero_rows)
+            @test size(qr_zero_rows.Q) == (0, 0)
+            @test size(qr_zero_rows.R) == (0, dimA)
+        end
+    end
+end
+
+@testset "issue #53214" begin
+    # Test that the rank of a QRPivoted matrix is computed correctly
+    @test rank(qr([1.0 0.0; 0.0 1.0], ColumnNorm())) == 2
+    @test rank(qr([1.0 0.0; 0.0 0.9], ColumnNorm()), rtol=0.95) == 1
+    @test rank(qr([1.0 0.0; 0.0 0.9], ColumnNorm()), atol=0.95) == 1
+    @test rank(qr([1.0 0.0; 0.0 1.0], ColumnNorm()), rtol=1.01) == 0
+    @test rank(qr([1.0 0.0; 0.0 1.0], ColumnNorm()), atol=1.01) == 0
+
+    @test rank(qr([1.0 2.0; 2.0 4.0], ColumnNorm())) == 1
+    @test rank(qr([1.0 2.0 3.0; 4.0 5.0 6.0 ; 7.0 8.0 9.0], ColumnNorm())) == 2
+end
+
 end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/schur.jl b/stdlib/LinearAlgebra/test/schur.jl
index c9a5d92dbdae8..f3d494fba7942 100644
--- a/stdlib/LinearAlgebra/test/schur.jl
+++ b/stdlib/LinearAlgebra/test/schur.jl
@@ -33,7 +33,7 @@ aimg  = randn(n,n)/2
         @test sort(imag(f.values)) ≈ sort(imag(d))
         @test istriu(f.Schur) || eltype(a)<:Real
         @test convert(Array, f) ≈ a
-        @test_throws ErrorException f.A
+        @test_throws FieldError f.A
 
         sch, vecs, vals = schur(UpperTriangular(triu(a)))
         @test vecs*sch*vecs' ≈ triu(a)
@@ -68,7 +68,7 @@ aimg  = randn(n,n)/2
             O = ordschur(S, select)
             sum(select) != 0 && @test S.values[findall(select)] ≈ O.values[1:sum(select)]
             @test O.vectors*O.Schur*O.vectors' ≈ ordschura
-            @test_throws ErrorException f.A
+            @test_throws FieldError f.A
             Snew = LinearAlgebra.Schur(S.T, S.Z, S.values)
             SchurNew = ordschur!(copy(Snew), select)
             @test O.vectors ≈ SchurNew.vectors
@@ -88,7 +88,7 @@ aimg  = randn(n,n)/2
             @test f.Q*f.T*f.Z' ≈ a2_sf
             @test istriu(f.S) || eltype(a)<:Real
             @test istriu(f.T) || eltype(a)<:Real
-            @test_throws ErrorException f.A
+            @test_throws FieldError f.A
 
             sstring = sprint((t, s) -> show(t, "text/plain", s), f.S)
             tstring = sprint((t, s) -> show(t, "text/plain", s), f.T)
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index 7e96af369e310..4b91bcfc1a4d5 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -3,7 +3,11 @@
 module TestSpecial
 
 using Test, LinearAlgebra, Random
-using LinearAlgebra: rmul!
+using LinearAlgebra: rmul!, BandIndex
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
 
 n= 10 #Size of matrix to test
 Random.seed!(1)
@@ -111,8 +115,11 @@ Random.seed!(1)
         struct TypeWithZero end
         Base.promote_rule(::Type{TypeWithoutZero}, ::Type{TypeWithZero}) = TypeWithZero
         Base.convert(::Type{TypeWithZero}, ::TypeWithoutZero) = TypeWithZero()
+        Base.zero(x::Union{TypeWithoutZero, TypeWithZero}) = zero(typeof(x))
         Base.zero(::Type{<:Union{TypeWithoutZero, TypeWithZero}}) = TypeWithZero()
         LinearAlgebra.symmetric(::TypeWithoutZero, ::Symbol) = TypeWithoutZero()
+        LinearAlgebra.symmetric_type(::Type{TypeWithoutZero}) = TypeWithoutZero
+        Base.copy(A::TypeWithoutZero) = A
         Base.transpose(::TypeWithoutZero) = TypeWithoutZero()
         d  = fill(TypeWithoutZero(), 3)
         du = fill(TypeWithoutZero(), 2)
@@ -125,6 +132,15 @@ Random.seed!(1)
         for M in (D, Bu, Bl, Tri, Sym)
             @test Matrix(M) == zeros(TypeWithZero, 3, 3)
         end
+
+        mutable struct MTypeWithZero end
+        Base.convert(::Type{MTypeWithZero}, ::TypeWithoutZero) = MTypeWithZero()
+        Base.convert(::Type{MTypeWithZero}, ::TypeWithZero) = MTypeWithZero()
+        Base.zero(x::MTypeWithZero) = zero(typeof(x))
+        Base.zero(::Type{MTypeWithZero}) = MTypeWithZero()
+        U = UpperTriangular(Symmetric(fill(TypeWithoutZero(), 2, 2)))
+        M = Matrix{MTypeWithZero}(U)
+        @test all(x -> x isa MTypeWithZero, M)
     end
 end
 
@@ -536,4 +552,311 @@ end
     @test v * S isa Matrix
 end
 
+@testset "copyto! between matrix types" begin
+    dl, d, du = zeros(Int,4), [1:5;], zeros(Int,4)
+    d_ones = ones(Int,size(du))
+
+    @testset "from Diagonal" begin
+        D = Diagonal(d)
+        @testset "to Bidiagonal" begin
+            BU = Bidiagonal(similar(d, BigInt), similar(du, BigInt), :U)
+            BL = Bidiagonal(similar(d, BigInt), similar(dl, BigInt), :L)
+            for B in (BL, BU)
+                copyto!(B, D)
+                @test B == D
+            end
+
+            @testset "mismatched size" begin
+                for B in (BU, BL)
+                    B .= 0
+                    copyto!(B, Diagonal(Int[1]))
+                    @test B[1,1] == 1
+                    B[1,1] = 0
+                    @test iszero(B)
+                end
+            end
+        end
+        @testset "to Tridiagonal" begin
+            T = Tridiagonal(similar(dl, BigInt), similar(d, BigInt), similar(du, BigInt))
+            copyto!(T, D)
+            @test T == D
+
+            @testset "mismatched size" begin
+                T .= 0
+                copyto!(T, Diagonal([1]))
+                @test T[1,1] == 1
+                T[1,1] = 0
+                @test iszero(T)
+            end
+        end
+        @testset "to SymTridiagonal" begin
+            for du2 in (similar(du, BigInt), similar(d, BigInt))
+                S = SymTridiagonal(similar(d), du2)
+                copyto!(S, D)
+                @test S == D
+            end
+
+            @testset "mismatched size" begin
+                S = SymTridiagonal(zero(d), zero(du))
+                copyto!(S, Diagonal([1]))
+                @test S[1,1] == 1
+                S[1,1] = 0
+                @test iszero(S)
+            end
+        end
+    end
+
+    @testset "from Bidiagonal" begin
+        BU = Bidiagonal(d, du, :U)
+        BUones = Bidiagonal(d, oneunit.(du), :U)
+        BL = Bidiagonal(d, dl, :L)
+        BLones = Bidiagonal(d, oneunit.(dl), :L)
+        @testset "to Diagonal" begin
+            D = Diagonal(zero(d))
+            for B in (BL, BU)
+                @test copyto!(D, B) == B
+                D .= 0
+            end
+            for B in (BLones, BUones)
+                errmsg = "cannot copy a Bidiagonal with a non-zero off-diagonal band to a Diagonal"
+                @test_throws errmsg copyto!(D, B)
+                @test iszero(D)
+            end
+
+            @testset "mismatched size" begin
+                for uplo in (:L, :U)
+                    D .= 0
+                    copyto!(D, Bidiagonal(Int[1], Int[], uplo))
+                    @test D[1,1] == 1
+                    D[1,1] = 0
+                    @test iszero(D)
+                end
+            end
+        end
+        @testset "to Tridiagonal" begin
+            T = Tridiagonal(similar(dl, BigInt), similar(d, BigInt), similar(du, BigInt))
+            for B in (BL, BU, BLones, BUones)
+                copyto!(T, B)
+                @test T == B
+            end
+
+            @testset "mismatched size" begin
+                T = Tridiagonal(oneunit.(dl), zero(d), oneunit.(du))
+                for uplo in (:L, :U)
+                    T .= 0
+                    copyto!(T, Bidiagonal([1], Int[], uplo))
+                    @test T[1,1] == 1
+                    T[1,1] = 0
+                    @test iszero(T)
+                end
+            end
+        end
+        @testset "to SymTridiagonal" begin
+            for du2 in (similar(du, BigInt), similar(d, BigInt))
+                S = SymTridiagonal(similar(d, BigInt), du2)
+                for B in (BL, BU)
+                    copyto!(S, B)
+                    @test S == B
+                end
+                errmsg = "cannot copy a non-symmetric Bidiagonal matrix to a SymTridiagonal"
+                @test_throws errmsg copyto!(S, BUones)
+                @test_throws errmsg copyto!(S, BLones)
+            end
+
+            @testset "mismatched size" begin
+                S = SymTridiagonal(zero(d), zero(du))
+                for uplo in (:L, :U)
+                    copyto!(S, Bidiagonal([1], Int[], uplo))
+                    @test S[1,1] == 1
+                    S[1,1] = 0
+                    @test iszero(S)
+                end
+            end
+        end
+    end
+
+    @testset "from Tridiagonal" begin
+        T = Tridiagonal(dl, d, du)
+        TU = Tridiagonal(dl, d, d_ones)
+        TL = Tridiagonal(d_ones, d, dl)
+        @testset "to Diagonal" begin
+            D = Diagonal(zero(d))
+            @test copyto!(D, T) == Diagonal(d)
+            errmsg = "cannot copy a Tridiagonal with a non-zero off-diagonal band to a Diagonal"
+            D .= 0
+            @test_throws errmsg copyto!(D, TU)
+            @test iszero(D)
+            errmsg = "cannot copy a Tridiagonal with a non-zero off-diagonal band to a Diagonal"
+            @test_throws errmsg copyto!(D, TL)
+            @test iszero(D)
+
+            @testset "mismatched size" begin
+                D .= 0
+                copyto!(D, Tridiagonal(Int[], Int[1], Int[]))
+                @test D[1,1] == 1
+                D[1,1] = 0
+                @test iszero(D)
+            end
+        end
+        @testset "to Bidiagonal" begin
+            BU = Bidiagonal(zero(d), zero(du), :U)
+            BL = Bidiagonal(zero(d), zero(du), :L)
+            @test copyto!(BU, T) == Bidiagonal(d, du, :U)
+            @test copyto!(BL, T) == Bidiagonal(d, du, :L)
+
+            BU .= 0
+            BL .= 0
+            errmsg = "cannot copy a Tridiagonal with a non-zero superdiagonal to a Bidiagonal with uplo=:L"
+            @test_throws errmsg copyto!(BL, TU)
+            @test iszero(BL)
+            @test copyto!(BU, TU) == Bidiagonal(d, d_ones, :U)
+
+            BU .= 0
+            BL .= 0
+            @test copyto!(BL, TL) == Bidiagonal(d, d_ones, :L)
+            errmsg = "cannot copy a Tridiagonal with a non-zero subdiagonal to a Bidiagonal with uplo=:U"
+            @test_throws errmsg copyto!(BU, TL)
+            @test iszero(BU)
+
+            @testset "mismatched size" begin
+                for B in (BU, BL)
+                    B .= 0
+                    copyto!(B, Tridiagonal(Int[], Int[1], Int[]))
+                    @test B[1,1] == 1
+                    B[1,1] = 0
+                    @test iszero(B)
+                end
+            end
+        end
+    end
+
+    @testset "from SymTridiagonal" begin
+        S2 = SymTridiagonal(d, ones(Int,size(d)))
+        for S in (SymTridiagonal(d, du), SymTridiagonal(d, zero(d)))
+            @testset "to Diagonal" begin
+                D = Diagonal(zero(d))
+                @test copyto!(D, S) == Diagonal(d)
+                D .= 0
+                errmsg = "cannot copy a SymTridiagonal with a non-zero off-diagonal band to a Diagonal"
+                @test_throws errmsg copyto!(D, S2)
+                @test iszero(D)
+
+                @testset "mismatched size" begin
+                    D .= 0
+                    copyto!(D, SymTridiagonal(Int[1], Int[]))
+                    @test D[1,1] == 1
+                    D[1,1] = 0
+                    @test iszero(D)
+                end
+            end
+            @testset "to Bidiagonal" begin
+                BU = Bidiagonal(zero(d), zero(du), :U)
+                BL = Bidiagonal(zero(d), zero(du), :L)
+                @test copyto!(BU, S) == Bidiagonal(d, du, :U)
+                @test copyto!(BL, S) == Bidiagonal(d, du, :L)
+
+                BU .= 0
+                BL .= 0
+                errmsg = "cannot copy a SymTridiagonal with a non-zero off-diagonal band to a Bidiagonal"
+                @test_throws errmsg copyto!(BU, S2)
+                @test iszero(BU)
+                @test_throws errmsg copyto!(BL, S2)
+                @test iszero(BL)
+
+                @testset "mismatched size" begin
+                    for B in (BU, BL)
+                        B .= 0
+                        copyto!(B, SymTridiagonal(Int[1], Int[]))
+                        @test B[1,1] == 1
+                        B[1,1] = 0
+                        @test iszero(B)
+                    end
+                end
+            end
+        end
+    end
+end
+
+@testset "BandIndex indexing" begin
+    for D in (Diagonal(1:3), Bidiagonal(1:3, 2:3, :U), Bidiagonal(1:3, 2:3, :L),
+                Tridiagonal(2:3, 1:3, 1:2), SymTridiagonal(1:3, 2:3))
+        M = Matrix(D)
+        for band in -size(D,1)+1:size(D,1)-1
+            for idx in 1:size(D,1)-abs(band)
+                @test D[BandIndex(band, idx)] == M[BandIndex(band, idx)]
+            end
+        end
+        @test_throws BoundsError D[BandIndex(size(D,1),1)]
+    end
+end
+
+@testset "Partly filled Hermitian and Diagonal algebra" begin
+    D = Diagonal([1,2])
+    for S in (Symmetric, Hermitian), uplo in (:U, :L)
+        M = Matrix{BigInt}(undef, 2, 2)
+        M[1,1] = M[2,2] = M[1+(uplo == :L), 1 + (uplo == :U)] = 3
+        H = S(M, uplo)
+        HM = Matrix(H)
+        @test H + D == D + H == HM + D
+        @test H - D == HM - D
+        @test D - H == D - HM
+    end
+end
+
+@testset "block SymTridiagonal" begin
+    m = SizedArrays.SizedArray{(2,2)}(reshape([1:4;;],2,2))
+    S = SymTridiagonal(fill(m,4), fill(m,3))
+    SA = Array(S)
+    D = Diagonal(fill(m,4))
+    DA = Array(D)
+    BU = Bidiagonal(fill(m,4), fill(m,3), :U)
+    BUA = Array(BU)
+    BL = Bidiagonal(fill(m,4), fill(m,3), :L)
+    BLA = Array(BL)
+    T = Tridiagonal(fill(m,3), fill(m,4), fill(m,3))
+    TA = Array(T)
+    IA = Array(Diagonal(fill(one(m), 4)))
+    @test S + D == D + S == SA + DA
+    @test S - D == -(D - S) == SA - DA
+    @test S + BU == SA + BUA
+    @test S - BU == -(BU - S) == SA - BUA
+    @test S + BL == SA + BLA
+    @test S - BL == -(BL - S) == SA - BLA
+    @test S + T == SA + TA
+    @test S - T == -(T - S) == SA - TA
+    @test S + S == SA + SA
+    @test S - S == -(S - S) == SA - SA
+    @test S + I == I + S == SA + IA
+    @test S - I == -(I - S) == SA - IA
+
+    @test S == S
+    @test S != D
+    @test S != BL
+    @test S != BU
+    @test S != T
+
+    @test_throws ArgumentError fill!(S, m)
+    S_small = SymTridiagonal(fill(m,2), fill(m,1))
+    @test_throws "cannot fill a SymTridiagonal with an asymmetric value" fill!(S, m)
+    fill!(S_small, Symmetric(m))
+    @test all(==(Symmetric(m)), S_small)
+
+    @testset "diag" begin
+        m = SizedArrays.SizedArray{(2,2)}([1 3; 3 4])
+        D = Diagonal(fill(m,4))
+        z = fill(zero(m),3)
+        d = fill(m,4)
+        BU = Bidiagonal(d, z, :U)
+        BL = Bidiagonal(d, z, :L)
+        T = Tridiagonal(z, d, z)
+        for ev in (fill(zero(m),3), fill(zero(m),4))
+            SD = SymTridiagonal(fill(m,4), ev)
+            @test SD == D == SD
+            @test SD == BU == SD
+            @test SD == BL == SD
+            @test SD == T == SD
+        end
+    end
+end
+
 end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
index fab6cd80141b2..384ed5b3b60cf 100644
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
@@ -59,6 +59,61 @@ using Test, LinearAlgebra
             @test broadcast!(*, Z, X, Y) == broadcast(*, fX, fY)
         end
     end
+    UU = UnitUpperTriangular(rand(N,N))
+    UL = UnitLowerTriangular(rand(N,N))
+    unittriangulars = (UU, UL)
+    Ttris = typeof.((UpperTriangular(parent(UU)), LowerTriangular(parent(UU))))
+    funittriangulars = map(Array, unittriangulars)
+    for (X, fX, Ttri) in zip(unittriangulars, funittriangulars, Ttris)
+        @test (Q = broadcast(sin, X); typeof(Q) == Ttri && Q == broadcast(sin, fX))
+        @test broadcast!(sin, Z, X) == broadcast(sin, fX)
+        @test (Q = broadcast(cos, X); Q isa Matrix && Q == broadcast(cos, fX))
+        @test broadcast!(cos, Z, X) == broadcast(cos, fX)
+        @test (Q = broadcast(*, s, X); typeof(Q) == Ttri && Q == broadcast(*, s, fX))
+        @test broadcast!(*, Z, s, X) == broadcast(*, s, fX)
+        @test (Q = broadcast(+, fV, fA, X); Q isa Matrix && Q == broadcast(+, fV, fA, fX))
+        @test broadcast!(+, Z, fV, fA, X) == broadcast(+, fV, fA, fX)
+        @test (Q = broadcast(*, s, fV, fA, X); Q isa Matrix && Q == broadcast(*, s, fV, fA, fX))
+        @test broadcast!(*, Z, s, fV, fA, X) == broadcast(*, s, fV, fA, fX)
+
+        @test X .* 2.0 == X .* (2.0,) == fX .* 2.0
+        @test X .* 2.0 isa Ttri
+        @test X .* (2.0,) isa Ttri
+        @test isequal(X .* Inf, fX .* Inf)
+
+        two = 2
+        @test X .^ 2 ==  X .^ (2,) == fX .^ 2 == X .^ two
+        @test X .^ 2 isa typeof(X) # special cased, as isstructurepreserving
+        @test X .^ (2,) isa Ttri
+        @test X .^ two isa Ttri
+        @test X .^ 0 == fX .^ 0
+        @test X .^ -1 == fX .^ -1
+
+        for (Y, fY) in zip(unittriangulars, funittriangulars)
+            @test broadcast(+, X, Y) == broadcast(+, fX, fY)
+            @test broadcast!(+, Z, X, Y) == broadcast(+, fX, fY)
+            @test broadcast(*, X, Y) == broadcast(*, fX, fY)
+            @test broadcast!(*, Z, X, Y) == broadcast(*, fX, fY)
+        end
+    end
+
+    @testset "type-stability in Bidiagonal" begin
+        B2 = @inferred (B -> .- B)(B)
+        @test B2 isa Bidiagonal
+        @test B2 == -1 * B
+        B2 = @inferred (B -> B .* 2)(B)
+        @test B2 isa Bidiagonal
+        @test B2 == B + B
+        B2 = @inferred (B -> 2 .* B)(B)
+        @test B2 isa Bidiagonal
+        @test B2 == B + B
+        B2 = @inferred (B -> B ./ 1)(B)
+        @test B2 isa Bidiagonal
+        @test B2 == B
+        B2 = @inferred (B -> 1 .\ B)(B)
+        @test B2 isa Bidiagonal
+        @test B2 == B
+    end
 end
 
 @testset "broadcast! where the destination is a structured matrix" begin
@@ -243,4 +298,71 @@ end
 # structured broadcast with function returning non-number type
 @test tuple.(Diagonal([1, 2])) == [(1,) (0,); (0,) (2,)]
 
+@testset "Broadcast with missing (#54467)" begin
+    select_first(x, y) = x
+    diag = Diagonal([1,2])
+    @test select_first.(diag, missing) == diag
+    @test select_first.(diag, missing) isa Diagonal{Int}
+    @test isequal(select_first.(missing, diag), fill(missing, 2, 2))
+    @test select_first.(missing, diag) isa Matrix{Missing}
+end
+
+@testset "broadcast over structured matrices with matrix elements" begin
+    function standardbroadcastingtests(D, T)
+        M = [x for x in D]
+        Dsum = D .+ D
+        @test Dsum isa T
+        @test Dsum == M .+ M
+        Dcopy = copy.(D)
+        @test Dcopy isa T
+        @test Dcopy == D
+        Df = float.(D)
+        @test Df isa T
+        @test Df == D
+        @test eltype(eltype(Df)) <: AbstractFloat
+        @test (x -> (x,)).(D) == (x -> (x,)).(M)
+        @test (x -> 1).(D) == ones(Int,size(D))
+        @test all(==(2), ndims.(D))
+        @test_throws MethodError size.(D)
+    end
+    @testset "Diagonal" begin
+        @testset "square" begin
+            A = [1 3; 2 4]
+            D = Diagonal([A, A])
+            standardbroadcastingtests(D, Diagonal)
+            @test sincos.(D) == sincos.(Matrix{eltype(D)}(D))
+            M = [x for x in D]
+            @test cos.(D) == cos.(M)
+        end
+
+        @testset "different-sized square blocks" begin
+            D = Diagonal([ones(3,3), fill(3.0,2,2)])
+            standardbroadcastingtests(D, Diagonal)
+        end
+
+        @testset "rectangular blocks" begin
+            D = Diagonal([ones(Bool,3,4), ones(Bool,2,3)])
+            standardbroadcastingtests(D, Diagonal)
+        end
+
+        @testset "incompatible sizes" begin
+            A = reshape(1:12, 4, 3)
+            B = reshape(1:12, 3, 4)
+            D1 = Diagonal(fill(A, 2))
+            D2 = Diagonal(fill(B, 2))
+            @test_throws DimensionMismatch D1 .+ D2
+        end
+    end
+    @testset "Bidiagonal" begin
+        A = [1 3; 2 4]
+        B = Bidiagonal(fill(A,3), fill(A,2), :U)
+        standardbroadcastingtests(B, Bidiagonal)
+    end
+    @testset "UpperTriangular" begin
+        A = [1 3; 2 4]
+        U = UpperTriangular([(i+j)*A for i in 1:3, j in 1:3])
+        standardbroadcastingtests(U, UpperTriangular)
+    end
+end
+
 end
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
index 7f2aad904a88f..9e8b5d5cda7d2 100644
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ b/stdlib/LinearAlgebra/test/svd.jl
@@ -75,7 +75,7 @@ aimg  = randn(n,n)/2
             @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ a
             @test convert(Array, usv) ≈ a
             @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
+            @test_throws FieldError usv.Z
             b = rand(eltya,n)
             @test usv\b ≈ a\b
             @test Base.propertynames(usv) == (:U, :S, :V, :Vt)
@@ -94,7 +94,7 @@ aimg  = randn(n,n)/2
                 @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ transform(a)
                 @test convert(Array, usv) ≈ transform(a)
                 @test usv.Vt' ≈ usv.V
-                @test_throws ErrorException usv.Z
+                @test_throws FieldError usv.Z
                 b = rand(eltya,n)
                 @test usv\b ≈ transform(a)\b
             end
@@ -106,8 +106,8 @@ aimg  = randn(n,n)/2
             @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ a
             @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ a_svd
             @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            @test_throws ErrorException gsvd.Z
+            @test_throws FieldError usv.Z
+            @test_throws FieldError gsvd.Z
             @test gsvd.vals ≈ svdvals(a,a_svd)
             α = eltya == Int ? -1 : rand(eltya)
             β = svd(α)
@@ -148,7 +148,7 @@ aimg  = randn(n,n)/2
             @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ T(asym)
             @test convert(Array, usv) ≈ T(asym)
             @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
+            @test_throws FieldError usv.Z
             b = rand(eltya,n)
             @test usv\b ≈ T(asym)\b
         end
@@ -271,6 +271,27 @@ end
     @test B.U ≈ B32.U
     @test B.Vt ≈ B32.Vt
     @test B.S ≈ B32.S
+    C = Symmetric(A'A)
+    D = svd(C)
+    D32 = svd(Symmetric(Float32.(C)))
+    @test D isa SVD{Float16, Float16, Matrix{Float16}}
+    @test D.U isa Matrix{Float16}
+    @test D.Vt isa Matrix{Float16}
+    @test D.S isa Vector{Float16}
+    @test D.U ≈ D32.U
+    @test D.Vt ≈ D32.Vt
+    @test D.S ≈ D32.S
+    A = randn(ComplexF16, 3, 3)
+    E = Hermitian(A'A)
+    F = svd(E)
+    F32 = svd(Hermitian(ComplexF32.(E)))
+    @test F isa SVD{ComplexF16, Float16, Matrix{ComplexF16}, Vector{Float16}}
+    @test F.U isa Matrix{ComplexF16}
+    @test F.Vt isa Matrix{ComplexF16}
+    @test F.S isa Vector{Float16}
+    @test F.U ≈ F32.U
+    @test F.Vt ≈ F32.Vt
+    @test F.S ≈ F32.S
 end
 
 end # module TestSVD
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index d3b24ccf78b0b..939e677039dc7 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -223,8 +223,8 @@ end
 
         @testset "linalg unary ops" begin
             @testset "tr" begin
-                @test tr(asym) == tr(Symmetric(asym))
-                @test tr(aherm) == tr(Hermitian(aherm))
+                @test tr(asym) ≈ tr(Symmetric(asym))
+                @test tr(aherm) ≈ tr(Hermitian(aherm))
             end
 
             @testset "isposdef[!]" begin
@@ -264,8 +264,11 @@ end
                     @testset "inverse edge case with complex Hermitian" begin
                         # Hermitian matrix, where inv(lu(A)) generates non-real diagonal elements
                         for T in (ComplexF32, ComplexF64)
-                            A = T[0.650488+0.0im 0.826686+0.667447im; 0.826686-0.667447im 1.81707+0.0im]
-                            H = Hermitian(A)
+                            # data should have nonvanishing imaginary parts on the diagonal
+                            M = T[0.279982+0.988074im  0.770011+0.870555im
+                                    0.138001+0.889728im  0.177242+0.701413im]
+                            H = Hermitian(M)
+                            A = Matrix(H)
                             @test inv(H) ≈ inv(A)
                             @test ishermitian(Matrix(inv(H)))
                         end
@@ -467,6 +470,28 @@ end
                 @test dot(symblockml, symblockml) ≈ dot(msymblockml, msymblockml)
             end
         end
+
+        @testset "kronecker product of symmetric and Hermitian matrices" begin
+            for mtype in (Symmetric, Hermitian)
+                symau = mtype(a, :U)
+                symal = mtype(a, :L)
+                msymau = Matrix(symau)
+                msymal = Matrix(symal)
+                for eltyc in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
+                    creal = randn(n, n)/2
+                    cimag = randn(n, n)/2
+                    c = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(creal, cimag) : creal)
+                    symcu = mtype(c, :U)
+                    symcl = mtype(c, :L)
+                    msymcu = Matrix(symcu)
+                    msymcl = Matrix(symcl)
+                    @test kron(symau, symcu) ≈ kron(msymau, msymcu)
+                    @test kron(symau, symcl) ≈ kron(msymau, msymcl)
+                    @test kron(symal, symcu) ≈ kron(msymal, msymcu)
+                    @test kron(symal, symcl) ≈ kron(msymal, msymcl)
+                end
+            end
+        end
     end
 end
 
@@ -487,6 +512,7 @@ end
         @test S - S == MS - MS
         @test S*2 == 2*S == 2*MS
         @test S/2 == MS/2
+        @test kron(S,S) == kron(MS,MS)
     end
     @testset "mixed uplo" begin
         Mu = Matrix{Complex{BigFloat}}(undef,2,2)
@@ -502,10 +528,56 @@ end
             MSl = Matrix(Sl)
             @test Su + Sl == Sl + Su == MSu + MSl
             @test Su - Sl == -(Sl - Su) == MSu - MSl
+            @test kron(Su,Sl) == kron(MSu,MSl)
+            @test kron(Sl,Su) == kron(MSl,MSu)
+        end
+    end
+    @testset "non-strided" begin
+        @testset "diagonal" begin
+            for ST1 in (Symmetric, Hermitian), uplo1 in (:L, :U)
+                m = ST1(Matrix{BigFloat}(undef,2,2), uplo1)
+                m.data[1,1] = 1
+                m.data[2,2] = 3
+                m.data[1+(uplo1==:L), 1+(uplo1==:U)] = 2
+                A = Array(m)
+                for ST2 in (Symmetric, Hermitian), uplo2 in (:L, :U)
+                    id = ST2(I(2), uplo2)
+                    @test m + id == id + m == A + id
+                end
+            end
+        end
+        @testset "unit triangular" begin
+            for ST1 in (Symmetric, Hermitian), uplo1 in (:L, :U)
+                H1 = ST1(UnitUpperTriangular(big.(rand(Int8,4,4))), uplo1)
+                M1 = Matrix(H1)
+                for ST2 in (Symmetric, Hermitian), uplo2 in (:L, :U)
+                    H2 = ST2(UnitUpperTriangular(big.(rand(Int8,4,4))), uplo2)
+                    @test H1 + H2 == M1 + Matrix(H2)
+                end
+            end
         end
     end
 end
 
+@testset "Reverse operation on Symmetric" begin
+    for uplo in (:U, :L)
+        A = Symmetric(randn(5, 5), uplo)
+        @test reverse(A, dims=1) == reverse(Matrix(A), dims=1)
+        @test reverse(A, dims=2) == reverse(Matrix(A), dims=2)
+        @test reverse(A)::Symmetric == reverse(Matrix(A))
+    end
+end
+
+@testset "Reverse operation on Hermitian" begin
+    for uplo in (:U, :L)
+        A = Hermitian(randn(ComplexF64, 5, 5), uplo)
+        @test reverse(A, dims=1) == reverse(Matrix(A), dims=1)
+        @test reverse(A, dims=2) == reverse(Matrix(A), dims=2)
+        @test reverse(A)::Hermitian == reverse(Matrix(A))
+    end
+end
+
+
 # bug identified in PR #52318: dot products of quaternionic Hermitian matrices,
 # or any number type where conj(a)*conj(b) ≠ conj(a*b):
 @testset "dot Hermitian quaternion #52318" begin
@@ -517,6 +589,25 @@ end
     @test dot(A, B) ≈ dot(Symmetric(A), Symmetric(B))
 end
 
+# let's make sure the analogous bug will not show up with kronecker products
+@testset "kron Hermitian quaternion #52318" begin
+    A, B = [Quaternion.(randn(3,3), randn(3, 3), randn(3, 3), randn(3,3)) |> t -> t + t' for i in 1:2]
+    @test A == Hermitian(A) && B == Hermitian(B)
+    @test kron(A, B) ≈ kron(Hermitian(A), Hermitian(B))
+    A, B = [Quaternion.(randn(3,3), randn(3, 3), randn(3, 3), randn(3,3)) |> t -> t + transpose(t) for i in 1:2]
+    @test A == Symmetric(A) && B == Symmetric(B)
+    @test kron(A, B) ≈ kron(Symmetric(A), Symmetric(B))
+end
+
+@testset "kron with symmetric/hermitian matrices of matrices" begin
+    M = fill(ones(2,2), 2, 2)
+    for W in (Symmetric, Hermitian)
+        for (t1, t2) in ((W(M, :U), W(M, :U)), (W(M, :U), W(M, :L)), (W(M, :L), W(M, :L)))
+            @test kron(t1, t2) ≈ kron(Matrix(t1), Matrix(t2))
+        end
+    end
+end
+
 #Issue #7647: test xsyevr, xheevr, xstevr drivers.
 @testset "Eigenvalues in interval for $(typeof(Mi7647))" for Mi7647 in
         (Symmetric(diagm(0 => 1.0:3.0)),
@@ -629,6 +720,22 @@ end
     end
 end
 
+@testset "eigendecomposition Algorithms" begin
+    using LinearAlgebra: DivideAndConquer, QRIteration, RobustRepresentations
+    for T in (Float64, ComplexF64, Float32, ComplexF32)
+        n = 4
+        A = T <: Real ? Symmetric(randn(T, n, n)) : Hermitian(randn(T, n, n))
+        d, v = eigen(A)
+        for alg in (DivideAndConquer(), QRIteration(), RobustRepresentations())
+            @test (@inferred eigvals(A, alg)) ≈ d
+            d2, v2 = @inferred eigen(A, alg)
+            @test d2 ≈ d
+            @test A * v2 ≈ v2 * Diagonal(d2)
+        end
+    end
+end
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
 using .Main.ImmutableArrays
 
@@ -975,4 +1082,57 @@ end
     @test conj(H) == conj(Array(H))
 end
 
+@testset "copyto! with aliasing (#39460)" begin
+    M = Matrix(reshape(1:36, 6, 6))
+    @testset for T in (Symmetric, Hermitian), uploA in (:U, :L), uploB in (:U, :L)
+        A = T(view(M, 1:5, 1:5), uploA)
+        A2 = copy(A)
+        B = T(view(M, 2:6, 2:6), uploB)
+        @test copyto!(B, A) == A2
+
+        A = view(M, 2:4, 2:4)
+        B = T(view(M, 1:3, 1:3), uploB)
+        B2 = copy(B)
+        @test copyto!(A, B) == B2
+    end
+end
+
+@testset "copyto with incompatible sizes" begin
+    A = zeros(3,3); B = zeros(2,2)
+    @testset "copyto with incompatible sizes" begin
+        for T in (Symmetric, Hermitian)
+            @test_throws BoundsError copyto!(T(B), T(A))
+            @test_throws "Cannot set a non-diagonal index" copyto!(T(A), T(B))
+        end
+    end
+end
+
+@testset "getindex with Integers" begin
+    M = reshape(1:4,2,2)
+    for ST in (Symmetric, Hermitian)
+        S = ST(M)
+        @test_throws "invalid index" S[true, true]
+        @test S[1,2] == S[Int8(1),UInt16(2)] == S[big(1), Int16(2)]
+    end
+end
+
+@testset "tr for block matrices" begin
+    m = [1 2; 3 4]
+    for b in (m, m * (1 + im))
+        M = fill(b, 3, 3)
+        for ST in (Symmetric, Hermitian)
+            S = ST(M)
+            @test tr(S) == sum(diag(S))
+        end
+    end
+end
+
+@testset "setindex! returns the destination" begin
+    M = rand(2,2)
+    for T in (Symmetric, Hermitian)
+        S = T(M)
+        @test setindex!(S, 0, 2, 2) === S
+    end
+end
+
 end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl
index b3a5472c511f4..d55d1deb6bf33 100644
--- a/stdlib/LinearAlgebra/test/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl
@@ -149,6 +149,34 @@ end
     HT = Hermitian(Tridiagonal(ev, dv, ev))
     λ, V = eigen(HT)
     @test HT * V ≈ V * Diagonal(λ)
+    HT = Hermitian(Tridiagonal(ComplexF16.(ev), ComplexF16.(dv), ComplexF16.(ev)))
+    F = eigen(HT)
+    @test F isa Eigen{ComplexF16, Float16, Matrix{ComplexF16}, Vector{Float16}}
+    λ, V = F
+    @test HT * V ≈ V * Diagonal(λ)
+end
+
+@testset "Float16" begin
+    A = rand(Float16, 3, 3)
+    A = Symmetric(A*A')
+    B = eigen(A)
+    B32 = eigen(Symmetric(Float32.(A)))
+    @test B isa Eigen{Float16, Float16, Matrix{Float16}, Vector{Float16}}
+    @test B.values ≈ B32.values
+    @test B.vectors ≈ B32.vectors
+    C = randn(ComplexF16, 3, 3)
+    C = Hermitian(C*C')
+    D = eigen(C)
+    D32 = eigen(Hermitian(ComplexF32.(C)))
+    @test D isa Eigen{ComplexF16, Float16, Matrix{ComplexF16}, Vector{Float16}}
+    @test D.values ≈ D32.values
+    @test D.vectors ≈ D32.vectors
+end
+
+@testset "complex Symmetric" begin
+    S = Symmetric(rand(ComplexF64,2,2))
+    λ, v = eigen(S)
+    @test S * v ≈ v * Diagonal(λ)
 end
 
 end # module TestSymmetricEigen
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
index 7b222234c5fcb..a09d0092e9f39 100644
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ b/stdlib/LinearAlgebra/test/triangular.jl
@@ -25,16 +25,23 @@ debug && println("Test basic type functionality")
 @test_throws DimensionMismatch LowerTriangular(randn(5, 4))
 @test LowerTriangular(randn(3, 3)) |> t -> [size(t, i) for i = 1:3] == [size(Matrix(t), i) for i = 1:3]
 
+struct MyTriangular{T, A<:LinearAlgebra.AbstractTriangular{T}} <: LinearAlgebra.AbstractTriangular{T}
+    data :: A
+end
+Base.size(A::MyTriangular) = size(A.data)
+Base.getindex(A::MyTriangular, i::Int, j::Int) = A.data[i,j]
+
 # The following test block tries to call all methods in base/linalg/triangular.jl in order for a combination of input element types. Keep the ordering when adding code.
 @testset for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
     # Begin loop for first Triangular matrix
-    for (t1, uplo1) in ((UpperTriangular, :U),
+    @testset for (t1, uplo1) in ((UpperTriangular, :U),
                         (UnitUpperTriangular, :U),
                         (LowerTriangular, :L),
                         (UnitLowerTriangular, :L))
 
         # Construct test matrix
         A1 = t1(elty1 == Int ? rand(1:7, n, n) : convert(Matrix{elty1}, (elty1 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo1 === :U ? t : copy(t')))
+        M1 = Matrix(A1)
         @test t1(A1) === A1
         @test t1{elty1}(A1) === A1
         # test the ctor works for AbstractMatrix
@@ -68,7 +75,7 @@ debug && println("Test basic type functionality")
         @test simA1 == A1
 
         # getindex
-        let mA1 = Matrix(A1)
+        let mA1 = M1
             # linear indexing
             for i in 1:length(A1)
                 @test A1[i] == mA1[i]
@@ -141,8 +148,8 @@ debug && println("Test basic type functionality")
         #tril/triu
         if uplo1 === :L
             @test tril(A1,0)  == A1
-            @test tril(A1,-1) == LowerTriangular(tril(Matrix(A1), -1))
-            @test tril(A1,1)  == t1(tril(tril(Matrix(A1), 1)))
+            @test tril(A1,-1) == LowerTriangular(tril(M1, -1))
+            @test tril(A1,1)  == t1(tril(tril(M1, 1)))
             @test tril(A1, -n - 2) == zeros(size(A1))
             @test tril(A1, n) == A1
             @test triu(A1,0)  == t1(diagm(0 => diag(A1)))
@@ -152,8 +159,8 @@ debug && println("Test basic type functionality")
             @test triu(A1, n + 2) == zeros(size(A1))
         else
             @test triu(A1,0)  == A1
-            @test triu(A1,1)  == UpperTriangular(triu(Matrix(A1), 1))
-            @test triu(A1,-1) == t1(triu(triu(Matrix(A1), -1)))
+            @test triu(A1,1)  == UpperTriangular(triu(M1, 1))
+            @test triu(A1,-1) == t1(triu(triu(M1, -1)))
             @test triu(A1, -n) == A1
             @test triu(A1, n + 2) == zeros(size(A1))
             @test tril(A1,0)  == t1(diagm(0 => diag(A1)))
@@ -169,10 +176,10 @@ debug && println("Test basic type functionality")
         # [c]transpose[!] (test views as well, see issue #14317)
         let vrange = 1:n-1, viewA1 = t1(view(A1.data, vrange, vrange))
             # transpose
-            @test copy(transpose(A1)) == transpose(Matrix(A1))
+            @test copy(transpose(A1)) == transpose(M1)
             @test copy(transpose(viewA1)) == transpose(Matrix(viewA1))
             # adjoint
-            @test copy(A1') == Matrix(A1)'
+            @test copy(A1') == M1'
             @test copy(viewA1') == Matrix(viewA1)'
             # transpose!
             @test transpose!(copy(A1)) == transpose(A1)
@@ -185,15 +192,15 @@ debug && println("Test basic type functionality")
         end
 
         # diag
-        @test diag(A1) == diag(Matrix(A1))
+        @test diag(A1) == diag(M1)
 
         # tr
-        @test tr(A1)::elty1 == tr(Matrix(A1))
+        @test tr(A1)::elty1 == tr(M1)
 
         # real
-        @test real(A1) == real(Matrix(A1))
-        @test imag(A1) == imag(Matrix(A1))
-        @test abs.(A1) == abs.(Matrix(A1))
+        @test real(A1) == real(M1)
+        @test imag(A1) == imag(M1)
+        @test abs.(A1) == abs.(M1)
 
         # zero
         if A1 isa UpperTriangular || A1 isa LowerTriangular
@@ -201,12 +208,12 @@ debug && println("Test basic type functionality")
         end
 
         # Unary operations
-        @test -A1 == -Matrix(A1)
+        @test -A1 == -M1
 
         # copy and copyto! (test views as well, see issue #14317)
         let vrange = 1:n-1, viewA1 = t1(view(A1.data, vrange, vrange))
             # copy
-            @test copy(A1) == copy(Matrix(A1))
+            @test copy(A1) == copy(M1)
             @test copy(viewA1) == copy(Matrix(viewA1))
             # copyto!
             B = similar(A1)
@@ -283,25 +290,25 @@ debug && println("Test basic type functionality")
         end
 
         # Binary operations
-        @test A1*0.5 == Matrix(A1)*0.5
-        @test 0.5*A1 == 0.5*Matrix(A1)
-        @test A1/0.5 == Matrix(A1)/0.5
-        @test 0.5\A1 == 0.5\Matrix(A1)
+        @test A1*0.5 == M1*0.5
+        @test 0.5*A1 == 0.5*M1
+        @test A1/0.5 == M1/0.5
+        @test 0.5\A1 == 0.5\M1
 
         # inversion
-        @test inv(A1) ≈ inv(lu(Matrix(A1)))
-        inv(Matrix(A1)) # issue #11298
+        @test inv(A1) ≈ inv(lu(M1))
+        inv(M1) # issue #11298
         @test isa(inv(A1), t1)
         # make sure the call to LAPACK works right
         if elty1 <: BlasFloat
-            @test LinearAlgebra.inv!(copy(A1)) ≈ inv(lu(Matrix(A1)))
+            @test LinearAlgebra.inv!(copy(A1)) ≈ inv(lu(M1))
         end
 
         # Determinant
-        @test det(A1) ≈ det(lu(Matrix(A1))) atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        @test logdet(A1) ≈ logdet(lu(Matrix(A1))) atol=sqrt(eps(real(float(one(elty1)))))*n*n
+        @test det(A1) ≈ det(lu(M1)) atol=sqrt(eps(real(float(one(elty1)))))*n*n
+        @test logdet(A1) ≈ logdet(lu(M1)) atol=sqrt(eps(real(float(one(elty1)))))*n*n
         lada, ladb = logabsdet(A1)
-        flada, fladb = logabsdet(lu(Matrix(A1)))
+        flada, fladb = logabsdet(lu(M1))
         @test lada ≈ flada atol=sqrt(eps(real(float(one(elty1)))))*n*n
         @test ladb ≈ fladb atol=sqrt(eps(real(float(one(elty1)))))*n*n
 
@@ -324,7 +331,7 @@ debug && println("Test basic type functionality")
             for p in (1.0, Inf)
                 @test cond(A1,p) ≈ cond(A1,p) atol=(cond(A1,p)+cond(A1,p))
             end
-            @test cond(A1,2) == cond(Matrix(A1),2)
+            @test cond(A1,2) == cond(M1,2)
         end
 
         if !(elty1 in (BigFloat, Complex{BigFloat})) # Not implemented yet
@@ -333,13 +340,13 @@ debug && println("Test basic type functionality")
             svdvals(A1)
         end
 
-        @test ((A1*A1)::t1) ≈ Matrix(A1) * Matrix(A1)
-        @test ((A1/A1)::t1) ≈ Matrix(A1) / Matrix(A1)
-        @test ((A1\A1)::t1) ≈ Matrix(A1) \ Matrix(A1)
+        @test ((A1*A1)::t1) ≈ M1 * M1
+        @test ((A1/A1)::t1) ≈ M1 / M1
+        @test ((A1\A1)::t1) ≈ M1 \ M1
 
         # Begin loop for second Triangular matrix
-        for elty2 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-            for (t2, uplo2) in ((UpperTriangular, :U),
+        @testset for elty2 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
+            @testset for (t2, uplo2) in ((UpperTriangular, :U),
                                 (UnitUpperTriangular, :U),
                                 (LowerTriangular, :L),
                                 (UnitLowerTriangular, :L))
@@ -347,7 +354,7 @@ debug && println("Test basic type functionality")
                 debug && println("elty1: $elty1, A1: $t1, elty2: $elty2, A2: $t2")
 
                 A2 = t2(elty2 == Int ? rand(1:7, n, n) : convert(Matrix{elty2}, (elty2 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo2 === :U ? t : copy(t')))
-
+                M2 = Matrix(A2)
                 # Convert
                 if elty1 <: Real && !(elty2 <: Integer)
                     @test convert(AbstractMatrix{elty2}, A1) == t1(convert(Matrix{elty2}, A1.data))
@@ -356,21 +363,22 @@ debug && println("Test basic type functionality")
                 end
 
                 # Binary operations
-                @test A1 + A2 == Matrix(A1) + Matrix(A2)
-                @test A1 - A2 == Matrix(A1) - Matrix(A2)
+                @test A1 + A2 == M1 + M2
+                @test A1 - A2 == M1 - M2
+                @test kron(A1,A2) == kron(M1,M2)
 
                 # Triangular-Triangular multiplication and division
-                @test A1*A2 ≈ Matrix(A1)*Matrix(A2)
-                @test transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                @test transpose(A1)*adjoint(A2) ≈ transpose(Matrix(A1))*adjoint(Matrix(A2))
-                @test adjoint(A1)*transpose(A2) ≈ adjoint(Matrix(A1))*transpose(Matrix(A2))
-                @test A1'A2 ≈ Matrix(A1)'Matrix(A2)
-                @test A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                @test A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                @test transpose(A1)*transpose(A2) ≈ transpose(Matrix(A1))*transpose(Matrix(A2))
-                @test A1'A2' ≈ Matrix(A1)'Matrix(A2)'
-                @test A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                @test A1\A2 ≈ Matrix(A1)\Matrix(A2)
+                @test A1*A2 ≈ M1*M2
+                @test transpose(A1)*A2 ≈ transpose(M1)*M2
+                @test transpose(A1)*adjoint(A2) ≈ transpose(M1)*adjoint(M2)
+                @test adjoint(A1)*transpose(A2) ≈ adjoint(M1)*transpose(M2)
+                @test A1'A2 ≈ M1'M2
+                @test A1*transpose(A2) ≈ M1*transpose(M2)
+                @test A1*A2' ≈ M1*M2'
+                @test transpose(A1)*transpose(A2) ≈ transpose(M1)*transpose(M2)
+                @test A1'A2' ≈ M1'M2'
+                @test A1/A2 ≈ M1/M2
+                @test A1\A2 ≈ M1\M2
                 if uplo1 === :U && uplo2 === :U
                     if t1 === UnitUpperTriangular && t2 === UnitUpperTriangular
                         @test A1*A2 isa UnitUpperTriangular
@@ -411,20 +419,20 @@ debug && println("Test basic type functionality")
                 @test_throws DimensionMismatch A2'  * offsizeA
                 @test_throws DimensionMismatch A2   * offsizeA
                 if (uplo1 == uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rdiv!(copy(A1), A2)::t1 ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                    @test ldiv!(A2, copy(A1))::t1 ≈ A2\A1 ≈ Matrix(A2)\Matrix(A1)
+                    @test rdiv!(copy(A1), A2)::t1 ≈ A1/A2 ≈ M1/M2
+                    @test ldiv!(A2, copy(A1))::t1 ≈ A2\A1 ≈ M2\M1
                 end
                 if (uplo1 != uplo2 && elty1 == elty2 != Int && t2 != UnitLowerTriangular && t2 != UnitUpperTriangular)
-                    @test lmul!(adjoint(A1), copy(A2)) ≈ A1'*A2 ≈ Matrix(A1)'*Matrix(A2)
-                    @test lmul!(transpose(A1), copy(A2)) ≈ transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                    @test ldiv!(adjoint(A1), copy(A2)) ≈ A1'\A2 ≈ Matrix(A1)'\Matrix(A2)
-                    @test ldiv!(transpose(A1), copy(A2)) ≈ transpose(A1)\A2 ≈ transpose(Matrix(A1))\Matrix(A2)
+                    @test lmul!(adjoint(A1), copy(A2)) ≈ A1'*A2 ≈ M1'*M2
+                    @test lmul!(transpose(A1), copy(A2)) ≈ transpose(A1)*A2 ≈ transpose(M1)*M2
+                    @test ldiv!(adjoint(A1), copy(A2)) ≈ A1'\A2 ≈ M1'\M2
+                    @test ldiv!(transpose(A1), copy(A2)) ≈ transpose(A1)\A2 ≈ transpose(M1)\M2
                 end
                 if (uplo1 != uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rmul!(copy(A1), adjoint(A2)) ≈ A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                    @test rmul!(copy(A1), transpose(A2)) ≈ A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                    @test rdiv!(copy(A1), adjoint(A2)) ≈ A1/A2' ≈ Matrix(A1)/Matrix(A2)'
-                    @test rdiv!(copy(A1), transpose(A2)) ≈ A1/transpose(A2) ≈ Matrix(A1)/transpose(Matrix(A2))
+                    @test rmul!(copy(A1), adjoint(A2)) ≈ A1*A2' ≈ M1*M2'
+                    @test rmul!(copy(A1), transpose(A2)) ≈ A1*transpose(A2) ≈ M1*transpose(M2)
+                    @test rdiv!(copy(A1), adjoint(A2)) ≈ A1/A2' ≈ M1/M2'
+                    @test rdiv!(copy(A1), transpose(A2)) ≈ A1/transpose(A2) ≈ M1/transpose(M2)
                 end
             end
         end
@@ -434,56 +442,54 @@ debug && println("Test basic type functionality")
 
             debug && println("elty1: $elty1, A1: $t1, B: $eltyB")
 
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
             Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
             C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
             mul!(C, Tri, A1)
-            @test C ≈ Tri*Matrix(A1)
+            @test C ≈ Tri*M1
             Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
             mul!(C, A1, Tri)
-            @test C ≈ Matrix(A1)*Tri
+            @test C ≈ M1*Tri
 
             # Triangular-dense Matrix/vector multiplication
-            @test A1*B[:,1] ≈ Matrix(A1)*B[:,1]
-            @test A1*B ≈ Matrix(A1)*B
-            @test transpose(A1)*B[:,1] ≈ transpose(Matrix(A1))*B[:,1]
-            @test A1'B[:,1] ≈ Matrix(A1)'B[:,1]
-            @test transpose(A1)*B ≈ transpose(Matrix(A1))*B
-            @test A1'B ≈ Matrix(A1)'B
-            @test A1*transpose(B) ≈ Matrix(A1)*transpose(B)
-            @test adjoint(A1)*transpose(B) ≈ Matrix(A1)'*transpose(B)
-            @test transpose(A1)*adjoint(B) ≈ transpose(Matrix(A1))*adjoint(B)
-            @test A1*B' ≈ Matrix(A1)*B'
-            @test B*A1 ≈ B*Matrix(A1)
-            @test transpose(B[:,1])*A1 ≈ transpose(B[:,1])*Matrix(A1)
-            @test B[:,1]'A1 ≈ B[:,1]'Matrix(A1)
-            @test transpose(B)*A1 ≈ transpose(B)*Matrix(A1)
-            @test transpose(B)*adjoint(A1) ≈ transpose(B)*Matrix(A1)'
-            @test adjoint(B)*transpose(A1) ≈ adjoint(B)*transpose(Matrix(A1))
-            @test B'A1 ≈ B'Matrix(A1)
-            @test B*transpose(A1) ≈ B*transpose(Matrix(A1))
-            @test B*A1' ≈ B*Matrix(A1)'
-            @test transpose(B[:,1])*transpose(A1) ≈ transpose(B[:,1])*transpose(Matrix(A1))
-            @test B[:,1]'A1' ≈ B[:,1]'Matrix(A1)'
-            @test transpose(B)*transpose(A1) ≈ transpose(B)*transpose(Matrix(A1))
-            @test B'A1' ≈ B'Matrix(A1)'
+            @test A1*B[:,1] ≈ M1*B[:,1]
+            @test A1*B ≈ M1*B
+            @test transpose(A1)*B[:,1] ≈ transpose(M1)*B[:,1]
+            @test A1'B[:,1] ≈ M1'B[:,1]
+            @test transpose(A1)*B ≈ transpose(M1)*B
+            @test A1'B ≈ M1'B
+            @test A1*transpose(B) ≈ M1*transpose(B)
+            @test adjoint(A1)*transpose(B) ≈ M1'*transpose(B)
+            @test transpose(A1)*adjoint(B) ≈ transpose(M1)*adjoint(B)
+            @test A1*B' ≈ M1*B'
+            @test B*A1 ≈ B*M1
+            @test transpose(B[:,1])*A1 ≈ transpose(B[:,1])*M1
+            @test B[:,1]'A1 ≈ B[:,1]'M1
+            @test transpose(B)*A1 ≈ transpose(B)*M1
+            @test transpose(B)*adjoint(A1) ≈ transpose(B)*M1'
+            @test adjoint(B)*transpose(A1) ≈ adjoint(B)*transpose(M1)
+            @test B'A1 ≈ B'M1
+            @test B*transpose(A1) ≈ B*transpose(M1)
+            @test B*A1' ≈ B*M1'
+            @test transpose(B[:,1])*transpose(A1) ≈ transpose(B[:,1])*transpose(M1)
+            @test B[:,1]'A1' ≈ B[:,1]'M1'
+            @test transpose(B)*transpose(A1) ≈ transpose(B)*transpose(M1)
+            @test B'A1' ≈ B'M1'
 
             if eltyB == elty1
-                @test mul!(similar(B), A1, B) ≈ Matrix(A1)*B
-                @test mul!(similar(B), A1, adjoint(B)) ≈ Matrix(A1)*B'
-                @test mul!(similar(B), A1, transpose(B)) ≈ Matrix(A1)*transpose(B)
-                @test mul!(similar(B), adjoint(A1), adjoint(B)) ≈ Matrix(A1)'*B'
-                @test mul!(similar(B), transpose(A1), transpose(B)) ≈ transpose(Matrix(A1))*transpose(B)
-                @test mul!(similar(B), transpose(A1), adjoint(B)) ≈ transpose(Matrix(A1))*B'
-                @test mul!(similar(B), adjoint(A1), transpose(B)) ≈ Matrix(A1)'*transpose(B)
-                @test mul!(similar(B), adjoint(A1), B) ≈ Matrix(A1)'*B
-                @test mul!(similar(B), transpose(A1), B) ≈ transpose(Matrix(A1))*B
+                @test mul!(similar(B), A1, B) ≈ M1*B
+                @test mul!(similar(B), A1, adjoint(B)) ≈ M1*B'
+                @test mul!(similar(B), A1, transpose(B)) ≈ M1*transpose(B)
+                @test mul!(similar(B), adjoint(A1), adjoint(B)) ≈ M1'*B'
+                @test mul!(similar(B), transpose(A1), transpose(B)) ≈ transpose(M1)*transpose(B)
+                @test mul!(similar(B), transpose(A1), adjoint(B)) ≈ transpose(M1)*B'
+                @test mul!(similar(B), adjoint(A1), transpose(B)) ≈ M1'*transpose(B)
+                @test mul!(similar(B), adjoint(A1), B) ≈ M1'*B
+                @test mul!(similar(B), transpose(A1), B) ≈ transpose(M1)*B
                 # test also vector methods
                 B1 = vec(B[1,:])
-                @test mul!(similar(B1), A1, B1)  ≈ Matrix(A1)*B1
-                @test mul!(similar(B1), adjoint(A1), B1) ≈ Matrix(A1)'*B1
-                @test mul!(similar(B1), transpose(A1), B1) ≈ transpose(Matrix(A1))*B1
+                @test mul!(similar(B1), A1, B1)  ≈ M1*B1
+                @test mul!(similar(B1), adjoint(A1), B1) ≈ M1'*B1
+                @test mul!(similar(B1), transpose(A1), B1) ≈ transpose(M1)*B1
             end
             #error handling
             Ann, Bmm, bm = A1, Matrix{eltyB}(undef, n+1, n+1), Vector{eltyB}(undef, n+1)
@@ -495,16 +501,16 @@ debug && println("Test basic type functionality")
             @test_throws DimensionMismatch rmul!(Bmm, transpose(Ann))
 
             # ... and division
-            @test A1\B[:,1] ≈ Matrix(A1)\B[:,1]
-            @test A1\B ≈ Matrix(A1)\B
-            @test transpose(A1)\B[:,1] ≈ transpose(Matrix(A1))\B[:,1]
-            @test A1'\B[:,1] ≈ Matrix(A1)'\B[:,1]
-            @test transpose(A1)\B ≈ transpose(Matrix(A1))\B
-            @test A1'\B ≈ Matrix(A1)'\B
-            @test A1\transpose(B) ≈ Matrix(A1)\transpose(B)
-            @test A1\B' ≈ Matrix(A1)\B'
-            @test transpose(A1)\transpose(B) ≈ transpose(Matrix(A1))\transpose(B)
-            @test A1'\B' ≈ Matrix(A1)'\B'
+            @test A1\B[:,1] ≈ M1\B[:,1]
+            @test A1\B ≈ M1\B
+            @test transpose(A1)\B[:,1] ≈ transpose(M1)\B[:,1]
+            @test A1'\B[:,1] ≈ M1'\B[:,1]
+            @test transpose(A1)\B ≈ transpose(M1)\B
+            @test A1'\B ≈ M1'\B
+            @test A1\transpose(B) ≈ M1\transpose(B)
+            @test A1\B' ≈ M1\B'
+            @test transpose(A1)\transpose(B) ≈ transpose(M1)\transpose(B)
+            @test A1'\B' ≈ M1'\B'
             Ann, bm = A1, Vector{elty1}(undef,n+1)
             @test_throws DimensionMismatch Ann\bm
             @test_throws DimensionMismatch Ann'\bm
@@ -512,13 +518,13 @@ debug && println("Test basic type functionality")
             if t1 == UpperTriangular || t1 == LowerTriangular
                 @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
             end
-            @test B/A1 ≈ B/Matrix(A1)
-            @test B/transpose(A1) ≈ B/transpose(Matrix(A1))
-            @test B/A1' ≈ B/Matrix(A1)'
-            @test transpose(B)/A1 ≈ transpose(B)/Matrix(A1)
-            @test B'/A1 ≈ B'/Matrix(A1)
-            @test transpose(B)/transpose(A1) ≈ transpose(B)/transpose(Matrix(A1))
-            @test B'/A1' ≈ B'/Matrix(A1)'
+            @test B/A1 ≈ B/M1
+            @test B/transpose(A1) ≈ B/transpose(M1)
+            @test B/A1' ≈ B/M1'
+            @test transpose(B)/A1 ≈ transpose(B)/M1
+            @test B'/A1 ≈ B'/M1
+            @test transpose(B)/transpose(A1) ≈ transpose(B)/transpose(M1)
+            @test B'/A1' ≈ B'/M1'
 
             # Error bounds
             !(elty1 in (BigFloat, Complex{BigFloat})) && !(eltyB in (BigFloat, Complex{BigFloat})) && errorbounds(A1, A1\B, B)
@@ -726,6 +732,20 @@ end
 # Issue 16196
 @test UpperTriangular(Matrix(1.0I, 3, 3)) \ view(fill(1., 3), [1,2,3]) == fill(1., 3)
 
+@testset "reverse" begin
+    A = randn(5, 5)
+    for (T, Trev) in ((UpperTriangular, LowerTriangular),
+            (UnitUpperTriangular, UnitLowerTriangular),
+            (LowerTriangular, UpperTriangular),
+            (UnitLowerTriangular, UnitUpperTriangular))
+        A = T(randn(5, 5))
+        AM = Matrix(A)
+        @test reverse(A, dims=1) == reverse(AM, dims=1)
+        @test reverse(A, dims=2) == reverse(AM, dims=2)
+        @test reverse(A)::Trev == reverse(AM)
+    end
+end
+
 # dimensional correctness:
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
@@ -881,7 +901,7 @@ end
     function test_one_oneunit_triangular(a)
         b = Matrix(a)
         @test (@inferred a^1) == b^1
-        @test (@inferred a^-1) == b^-1
+        @test (@inferred a^-1) ≈ b^-1
         @test one(a) == one(b)
         @test one(a)*a == a
         @test a*one(a) == a
@@ -968,7 +988,7 @@ end
     end
 end
 
-@testset "arithmetic with an immutable parent" begin
+@testset "immutable and non-strided parent" begin
     F = FillArrays.Fill(2, (4,4))
     for UT in (UnitUpperTriangular, UnitLowerTriangular)
         U = UT(F)
@@ -979,6 +999,13 @@ end
     for U in (UnitUpperTriangular(F), UnitLowerTriangular(F))
         @test imag(F) == imag(collect(F))
     end
+
+    @testset "copyto!" begin
+        for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+            @test Matrix(T(F)) == T(F)
+        end
+        @test copyto!(zeros(eltype(F), length(F)), UpperTriangular(F)) == vec(UpperTriangular(F))
+    end
 end
 
 @testset "error paths" begin
@@ -996,6 +1023,14 @@ end
     end
 end
 
+@testset "uppertriangular/lowertriangular" begin
+    M = rand(2,2)
+    @test LinearAlgebra.uppertriangular(M) === UpperTriangular(M)
+    @test LinearAlgebra.lowertriangular(M) === LowerTriangular(M)
+    @test LinearAlgebra.uppertriangular(UnitUpperTriangular(M)) === UnitUpperTriangular(M)
+    @test LinearAlgebra.lowertriangular(UnitLowerTriangular(M)) === UnitLowerTriangular(M)
+end
+
 @testset "arithmetic with partly uninitialized matrices" begin
     @testset "$(typeof(A))" for A in (Matrix{BigFloat}(undef,2,2), Matrix{Complex{BigFloat}}(undef,2,2)')
         A[2,1] = eltype(A) <: Complex ? 4 + 3im : 4
@@ -1013,6 +1048,7 @@ end
             @test 2\L == 2\B
             @test real(L) == real(B)
             @test imag(L) == imag(B)
+            @test kron(L,L) == kron(B,B)
             @test transpose!(MT(copy(A))) == transpose(L) broken=!(A isa Matrix)
             @test adjoint!(MT(copy(A))) == adjoint(L) broken=!(A isa Matrix)
         end
@@ -1034,10 +1070,204 @@ end
             @test 2\U == 2\B
             @test real(U) == real(B)
             @test imag(U) == imag(B)
+            @test kron(U,U) == kron(B,B)
             @test transpose!(MT(copy(A))) == transpose(U) broken=!(A isa Matrix)
             @test adjoint!(MT(copy(A))) == adjoint(U) broken=!(A isa Matrix)
         end
     end
 end
 
+@testset "kron with triangular matrices of matrices" begin
+    for T in (UpperTriangular, LowerTriangular)
+        t = T(fill(ones(2,2), 2, 2))
+        m = Matrix(t)
+        @test kron(t, t) ≈ kron(m, m)
+    end
+end
+
+@testset "copyto! tests" begin
+    @testset "copyto! with aliasing (#39460)" begin
+        M = Matrix(reshape(1:36, 6, 6))
+        @testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+            A = T(view(M, 1:5, 1:5))
+            A2 = copy(A)
+            B = T(view(M, 2:6, 2:6))
+            @test copyto!(B, A) == A2
+        end
+    end
+
+    @testset "copyto! with different matrix types" begin
+        M1 = Matrix(reshape(1:36, 6, 6))
+        M2 = similar(M1)
+        # these copies always work
+        @testset for (Tdest, Tsrc) in (
+                            (UpperTriangular, UnitUpperTriangular),
+                            (UpperTriangular, UpperTriangular),
+                            (LowerTriangular, UnitLowerTriangular),
+                            (LowerTriangular, LowerTriangular),
+                            (UnitUpperTriangular, UnitUpperTriangular),
+                            (UnitLowerTriangular, UnitLowerTriangular)
+                        )
+
+            M2 .= 0
+            copyto!(Tdest(M2), Tsrc(M1))
+            @test Tdest(M2) == Tsrc(M1)
+        end
+        # these copies only work if the source has a unit diagonal
+        M3 = copy(M1)
+        M3[diagind(M3)] .= 1
+        @testset for (Tdest, Tsrc) in (
+                            (UnitUpperTriangular, UpperTriangular),
+                            (UnitLowerTriangular, LowerTriangular),
+                        )
+
+            M2 .= 0
+            copyto!(Tdest(M2), Tsrc(M3))
+            @test Tdest(M2) == Tsrc(M3)
+            @test_throws ArgumentError copyto!(Tdest(M2), Tsrc(M1))
+        end
+        # these copies work even when the parent of the source isn't initialized along the diagonal
+        @testset for (T, TU) in ((UpperTriangular, UnitUpperTriangular),
+                                    (LowerTriangular, UnitLowerTriangular))
+            M1 = Matrix{BigFloat}(undef, 3, 3)
+            M2 = similar(M1)
+            if TU == UnitUpperTriangular
+                M1[1,2] = M1[1,3] = M1[2,3] = 2
+            else
+                M1[2,1] = M1[3,1] = M1[3,2] = 2
+            end
+            for TD in (T, TU)
+                M2 .= 0
+                copyto!(T(M2), TU(M1))
+                @test T(M2) == TU(M1)
+            end
+        end
+    end
+
+    @testset "copyto! with different sizes" begin
+        Ap = zeros(3,3)
+        Bp = rand(2,2)
+        @testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+            A = T(Ap)
+            B = T(Bp)
+            @test_throws ArgumentError copyto!(A, B)
+        end
+        @testset "error message" begin
+            A = UpperTriangular(Ap)
+            B = UpperTriangular(Bp)
+            @test_throws "cannot set index in the lower triangular part" copyto!(A, B)
+
+            A = LowerTriangular(Ap)
+            B = LowerTriangular(Bp)
+            @test_throws "cannot set index in the upper triangular part" copyto!(A, B)
+        end
+    end
+end
+
+@testset "getindex with Integers" begin
+    M = reshape(1:4,2,2)
+    for Ttype in (UpperTriangular, UnitUpperTriangular)
+        T = Ttype(M)
+        @test_throws "invalid index" T[2, true]
+        @test T[1,2] == T[Int8(1),UInt16(2)] == T[big(1), Int16(2)]
+    end
+    for Ttype in (LowerTriangular, UnitLowerTriangular)
+        T = Ttype(M)
+        @test_throws "invalid index" T[true, 2]
+        @test T[2,1] == T[Int8(2),UInt16(1)] == T[big(2), Int16(1)]
+    end
+end
+
+@testset "type-stable eigvecs" begin
+    D = Float64[1 0; 0 2]
+    V = @inferred eigvecs(UpperTriangular(D))
+    @test V == Diagonal([1, 1])
+end
+
+@testset "preserve structure in scaling by NaN" begin
+    M = rand(Int8,2,2)
+    for (Ts, TD) in (((UpperTriangular, UnitUpperTriangular), UpperTriangular),
+                    ((LowerTriangular, UnitLowerTriangular), LowerTriangular))
+        for T in Ts
+            U = T(M)
+            for V in (U * NaN, NaN * U, U / NaN, NaN \ U)
+                @test V isa TD{Float64, Matrix{Float64}}
+                @test all(isnan, diag(V))
+            end
+        end
+    end
+end
+
+@testset "eigvecs for AbstractTriangular" begin
+    S = SizedArrays.SizedArray{(3,3)}(reshape(1:9,3,3))
+    for T in (UpperTriangular, UnitUpperTriangular,
+                LowerTriangular, UnitLowerTriangular)
+        U = T(S)
+        V = eigvecs(U)
+        λ = eigvals(U)
+        @test U * V ≈ V * Diagonal(λ)
+
+        MU = MyTriangular(U)
+        V = eigvecs(U)
+        λ = eigvals(U)
+        @test MU * V ≈ V * Diagonal(λ)
+    end
+end
+
+@testset "(l/r)mul! and (l/r)div! for generic triangular" begin
+    @testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+        M = MyTriangular(T(rand(4,4)))
+        A = rand(4,4)
+        Ac = similar(A)
+        @testset "lmul!" begin
+            Ac .= A
+            lmul!(M, Ac)
+            @test Ac ≈ M * A
+        end
+        @testset "rmul!" begin
+            Ac .= A
+            rmul!(Ac, M)
+            @test Ac ≈ A * M
+        end
+        @testset "ldiv!" begin
+            Ac .= A
+            ldiv!(M, Ac)
+            @test Ac ≈ M \ A
+        end
+        @testset "rdiv!" begin
+            Ac .= A
+            rdiv!(Ac, M)
+            @test Ac ≈ A / M
+        end
+    end
+end
+
+@testset "istriu/istril forwards to parent" begin
+    @testset "$(nameof(typeof(M)))" for M in [Tridiagonal(rand(n-1), rand(n), rand(n-1)),
+                Tridiagonal(zeros(n-1), zeros(n), zeros(n-1)),
+                Diagonal(randn(n)),
+                Diagonal(zeros(n)),
+                ]
+        @testset for TriT in (UpperTriangular, UnitUpperTriangular, LowerTriangular, UnitLowerTriangular)
+            U = TriT(M)
+            A = Array(U)
+            for k in -n:n
+                @test istriu(U, k) == istriu(A, k)
+                @test istril(U, k) == istril(A, k)
+            end
+        end
+    end
+    z = zeros(n,n)
+    @testset for TriT in (UpperTriangular, UnitUpperTriangular, LowerTriangular, UnitLowerTriangular)
+        P = Matrix{BigFloat}(undef, n, n)
+        copytrito!(P, z, TriT <: Union{UpperTriangular, UnitUpperTriangular} ? 'U' : 'L')
+        U = TriT(P)
+        A = Array(U)
+        @testset for k in -n:n
+            @test istriu(U, k) == istriu(A, k)
+            @test istril(U, k) == istril(A, k)
+        end
+    end
+end
+
 end # module TestTriangular
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index 487f808076707..3330fa682fe5e 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -18,6 +18,9 @@ using .Main.FillArrays
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
+
 include("testutils.jl") # test_approx_eq_modphase
 
 #Test equivalence of eigenvectors/singular vectors taking into account possible phase (sign) differences
@@ -132,27 +135,43 @@ end
         @test_throws ArgumentError tril!(SymTridiagonal(d, dl), n)
         @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), -n - 2)
         @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), n)
-        @test tril(SymTridiagonal(d,dl))    == Tridiagonal(dl,d,zerosdl)
-        @test tril(SymTridiagonal(d,dl),1)  == Tridiagonal(dl,d,dl)
-        @test tril(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,zerosd,zerosdl)
-        @test tril(SymTridiagonal(d,dl),-2) == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test tril(Tridiagonal(dl,d,du))    == Tridiagonal(dl,d,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),1)  == Tridiagonal(dl,d,du)
-        @test tril(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,zerosd,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),-2) == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(tril(SymTridiagonal(d,dl)))    == Tridiagonal(dl,d,zerosdl)
+        @test @inferred(tril(SymTridiagonal(d,dl),1))  == Tridiagonal(dl,d,dl)
+        @test @inferred(tril(SymTridiagonal(d,dl),-1)) == Tridiagonal(dl,zerosd,zerosdl)
+        @test @inferred(tril(SymTridiagonal(d,dl),-2)) == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(tril(Tridiagonal(dl,d,du)))    == Tridiagonal(dl,d,zerosdu)
+        @test @inferred(tril(Tridiagonal(dl,d,du),1))  == Tridiagonal(dl,d,du)
+        @test @inferred(tril(Tridiagonal(dl,d,du),-1)) == Tridiagonal(dl,zerosd,zerosdu)
+        @test @inferred(tril(Tridiagonal(dl,d,du),-2)) == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl))))    == Tridiagonal(dl,d,zerosdl)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl)),1))  == Tridiagonal(dl,d,dl)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl)),-1)) == Tridiagonal(dl,zerosd,zerosdl)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl)),-2)) == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du))))    == Tridiagonal(dl,d,zerosdu)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du)),1))  == Tridiagonal(dl,d,du)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du)),-1)) == Tridiagonal(dl,zerosd,zerosdu)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du)),-2)) == Tridiagonal(zerosdl,zerosd,zerosdu)
 
         @test_throws ArgumentError triu!(SymTridiagonal(d, dl), -n)
         @test_throws ArgumentError triu!(SymTridiagonal(d, dl), n + 2)
         @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), -n)
         @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), n + 2)
-        @test triu(SymTridiagonal(d,dl))    == Tridiagonal(zerosdl,d,dl)
-        @test triu(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,d,dl)
-        @test triu(SymTridiagonal(d,dl),1)  == Tridiagonal(zerosdl,zerosd,dl)
-        @test triu(SymTridiagonal(d,dl),2)  == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test triu(Tridiagonal(dl,d,du))    == Tridiagonal(zerosdl,d,du)
-        @test triu(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,d,du)
-        @test triu(Tridiagonal(dl,d,du),1)  == Tridiagonal(zerosdl,zerosd,du)
-        @test triu(Tridiagonal(dl,d,du),2)  == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(triu(SymTridiagonal(d,dl)))    == Tridiagonal(zerosdl,d,dl)
+        @test @inferred(triu(SymTridiagonal(d,dl),-1)) == Tridiagonal(dl,d,dl)
+        @test @inferred(triu(SymTridiagonal(d,dl),1))  == Tridiagonal(zerosdl,zerosd,dl)
+        @test @inferred(triu(SymTridiagonal(d,dl),2))  == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(triu(Tridiagonal(dl,d,du)))    == Tridiagonal(zerosdl,d,du)
+        @test @inferred(triu(Tridiagonal(dl,d,du),-1)) == Tridiagonal(dl,d,du)
+        @test @inferred(triu(Tridiagonal(dl,d,du),1))  == Tridiagonal(zerosdl,zerosd,du)
+        @test @inferred(triu(Tridiagonal(dl,d,du),2))  == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl))))    == Tridiagonal(zerosdl,d,dl)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl)),-1)) == Tridiagonal(dl,d,dl)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl)),1))  == Tridiagonal(zerosdl,zerosd,dl)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl)),2))  == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du))))    == Tridiagonal(zerosdl,d,du)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du)),-1)) == Tridiagonal(dl,d,du)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du)),1))  == Tridiagonal(zerosdl,zerosd,du)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du)),2))  == Tridiagonal(zerosdl,zerosd,zerosdu)
 
         @test !istril(SymTridiagonal(d,dl))
         @test istril(SymTridiagonal(d,zerosdl))
@@ -259,6 +278,8 @@ end
                 @test_throws ArgumentError A[3, 2] = 1 # test assignment on the subdiagonal
                 @test_throws ArgumentError A[2, 3] = 1 # test assignment on the superdiagonal
             end
+            # setindex! should return the destination
+            @test setindex!(A, A[2,2], 2, 2) === A
         end
         @testset "diag" begin
             @test (@inferred diag(A))::typeof(d) == d
@@ -282,6 +303,9 @@ end
         @testset "Idempotent tests" begin
             for func in (conj, transpose, adjoint)
                 @test func(func(A)) == A
+                if func ∈ (transpose, adjoint)
+                    @test func(func(A)) === A
+                end
             end
         end
         @testset "permutedims(::[Sym]Tridiagonal)" begin
@@ -468,7 +492,7 @@ end
 end
 
 @testset "SymTridiagonal/Tridiagonal block matrix" begin
-    M = [1 2; 2 4]
+    M = [1 2; 3 4]
     n = 5
     A = SymTridiagonal(fill(M, n), fill(M, n-1))
     @test @inferred A[1,1] == Symmetric(M)
@@ -482,6 +506,9 @@ end
     @test_throws ArgumentError diag(A, n+1)
     @test_throws ArgumentError diag(A, -n-1)
 
+    @test tr(A) == sum(diag(A))
+    @test issymmetric(tr(A))
+
     A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1))
     @test @inferred A[1,1] == M
     @test @inferred A[1,2] == M
@@ -788,6 +815,35 @@ using .Main.SizedArrays
     end
 end
 
+@testset "copyto! between SymTridiagonal and Tridiagonal" begin
+    ev, dv = [1:4;], [1:5;]
+    S = SymTridiagonal(dv, ev)
+    T = Tridiagonal(zero(ev), zero(dv), zero(ev))
+    @test copyto!(T, S) == S
+    @test copyto!(zero(S), T) == T
+
+    ev2 = [1:5;]
+    S = SymTridiagonal(dv, ev2)
+    T = Tridiagonal(zeros(length(ev2)-1), zero(dv), zeros(length(ev2)-1))
+    @test copyto!(T, S) == S
+    @test copyto!(zero(S), T) == T
+
+    T2 = Tridiagonal(ones(length(ev)), zero(dv), zero(ev))
+    @test_throws "cannot copy an asymmetric Tridiagonal matrix to a SymTridiagonal" copyto!(zero(S), T2)
+
+    @testset "mismatched sizes" begin
+        dv2 = [4; @view dv[2:end]]
+        @test copyto!(S, SymTridiagonal([4], Int[])) == SymTridiagonal(dv2, ev)
+        @test copyto!(T, SymTridiagonal([4], Int[])) == Tridiagonal(ev, dv2, ev)
+        @test copyto!(S, Tridiagonal(Int[], [4], Int[])) == SymTridiagonal(dv2, ev)
+        @test copyto!(T, Tridiagonal(Int[], [4], Int[])) == Tridiagonal(ev, dv2, ev)
+        @test copyto!(S, SymTridiagonal(Int[], Int[])) == SymTridiagonal(dv, ev)
+        @test copyto!(T, SymTridiagonal(Int[], Int[])) == Tridiagonal(ev, dv, ev)
+        @test copyto!(S, Tridiagonal(Int[], Int[], Int[])) == SymTridiagonal(dv, ev)
+        @test copyto!(T, Tridiagonal(Int[], Int[], Int[])) == Tridiagonal(ev, dv, ev)
+    end
+end
+
 @testset "copyto! with UniformScaling" begin
     @testset "Tridiagonal" begin
         @testset "Fill" begin
@@ -830,4 +886,88 @@ end
     @test axes(B) === (ax, ax)
 end
 
+@testset "Reverse operation on Tridiagonal" begin
+    for n in 5:6
+        d = randn(n)
+        dl = randn(n - 1)
+        du = randn(n - 1)
+        T = Tridiagonal(dl, d, du)
+        @test reverse(T, dims=1) == reverse(Matrix(T), dims=1)
+        @test reverse(T, dims=2) == reverse(Matrix(T), dims=2)
+        @test reverse(T)::Tridiagonal == reverse(Matrix(T)) == reverse!(copy(T))
+    end
+end
+
+@testset "Reverse operation on SymTridiagonal" begin
+    n = 5
+    d = randn(n)
+    dl = randn(n - 1)
+    ST = SymTridiagonal(d, dl)
+    @test reverse(ST, dims=1) == reverse(Matrix(ST), dims=1)
+    @test reverse(ST, dims=2) == reverse(Matrix(ST), dims=2)
+    @test reverse(ST)::SymTridiagonal == reverse(Matrix(ST))
+end
+
+@testset "getindex with Integers" begin
+    dv, ev = 1:4, 1:3
+    for S in (Tridiagonal(ev, dv, ev), SymTridiagonal(dv, ev))
+        @test_throws "invalid index" S[3, true]
+        @test S[1,2] == S[Int8(1),UInt16(2)] == S[big(1), Int16(2)]
+    end
+end
+
+@testset "rmul!/lmul! with banded matrices" begin
+    dl, d, du = rand(3), rand(4), rand(3)
+    A = Tridiagonal(dl, d, du)
+    D = Diagonal(d)
+    @test rmul!(copy(A), D) ≈ A * D
+    @test lmul!(D, copy(A)) ≈ D * A
+
+    @testset "non-commutative" begin
+        S32 = SizedArrays.SizedArray{(3,2)}(rand(3,2))
+        S33 = SizedArrays.SizedArray{(3,3)}(rand(3,3))
+        S22 = SizedArrays.SizedArray{(2,2)}(rand(2,2))
+        T = Tridiagonal(fill(S32,3), fill(S32, 4), fill(S32, 3))
+        D = Diagonal(fill(S22, size(T,2)))
+        @test rmul!(copy(T), D) ≈ T * D
+        D = Diagonal(fill(S33, size(T,1)))
+        @test lmul!(D, copy(T)) ≈ D * T
+    end
+end
+
+@testset "mul with empty arrays" begin
+    A = zeros(5,0)
+    T = Tridiagonal(zeros(0), zeros(0), zeros(0))
+    TL = Tridiagonal(zeros(4), zeros(5), zeros(4))
+    @test size(A * T) == size(A)
+    @test size(TL * A) == size(A)
+    @test size(T * T) == size(T)
+    C = similar(A)
+    @test mul!(C, A, T) == A * T
+    @test mul!(C, TL, A) == TL * A
+    @test mul!(similar(T), T, T) == T * T
+    @test mul!(similar(T, size(T)), T, T) == T * T
+
+    v = zeros(size(T,2))
+    @test size(T * v) == size(v)
+    @test mul!(similar(v), T, v) == T * v
+
+    D = Diagonal(zeros(size(T,2)))
+    @test size(T * D) == size(D * T) == size(D)
+    @test mul!(similar(D), T, D) == mul!(similar(D), D, T) == T * D
+end
+
+@testset "show" begin
+    T = Tridiagonal(1:3, 1:4, 1:3)
+    @test sprint(show, T) == "Tridiagonal(1:3, 1:4, 1:3)"
+    S = SymTridiagonal(1:4, 1:3)
+    @test sprint(show, S) == "SymTridiagonal(1:4, 1:3)"
+
+    m = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2))
+    T = Tridiagonal(fill(m,2), fill(m,3), fill(m,2))
+    @test sprint(show, T) == "Tridiagonal($(repr(diag(T,-1))), $(repr(diag(T))), $(repr(diag(T,1))))"
+    S = SymTridiagonal(fill(m,3), fill(m,2))
+    @test sprint(show, S) == "SymTridiagonal($(repr(diag(S))), $(repr(diag(S,1))))"
+end
+
 end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index 6719714049b1e..92547e8648d8a 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -24,6 +24,7 @@ Random.seed!(1234543)
     @test -one(UniformScaling(2)) == UniformScaling(-1)
     @test opnorm(UniformScaling(1+im)) ≈ sqrt(2)
     @test convert(UniformScaling{Float64}, 2I) === 2.0I
+    @test float(2I) === 2.0*I
 end
 
 @testset "getindex" begin
@@ -67,7 +68,7 @@ end
 
     # on complex plane
     J = UniformScaling(randn(ComplexF64))
-    for f in ( exp,   log,
+    for f in ( exp,   log, cis,
                sqrt,
                sin,   cos,   tan,
                asin,  acos,  atan,
@@ -80,6 +81,10 @@ end
         @test f(J) ≈ f(M(J))
     end
 
+    for f in (sincos, sincosd)
+        @test all(splat(≈), zip(f(J), f(M(J))))
+    end
+
     # on real axis
     for (λ, fs) in (
         # functions defined for x ∈ ℝ
diff --git a/stdlib/Logging/Project.toml b/stdlib/Logging/Project.toml
index 3fc288e25f0b7..ce69112733d5e 100644
--- a/stdlib/Logging/Project.toml
+++ b/stdlib/Logging/Project.toml
@@ -2,9 +2,6 @@ name = "Logging"
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 version = "1.11.0"
 
-[deps]
-StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
-
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index c2bde11720f4c..17d4e71328ac4 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -5,7 +5,7 @@ EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Logging/docs/sr
 # [Logging](@id man-logging)
 
 The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a
-computation as a log of events.  Events are created by inserting a logging
+computation as a log of events. Events are created by inserting a logging
 statement into the source code, for example:
 
 ```julia
@@ -15,17 +15,17 @@ statement into the source code, for example:
 ```
 
 The system provides several advantages over peppering your source code with
-calls to `println()`.  First, it allows you to control the visibility and
-presentation of messages without editing the source code.  For example, in
+calls to `println()`. First, it allows you to control the visibility and
+presentation of messages without editing the source code. For example, in
 contrast to the `@warn` above
 
 ```julia
 @debug "The sum of some values $(sum(rand(100)))"
 ```
 
-will produce no output by default.  Furthermore, it's very cheap to leave debug
+will produce no output by default. Furthermore, it's very cheap to leave debug
 statements like this in the source code because the system avoids evaluating
-the message if it would later be ignored.  In this case `sum(rand(100))` and
+the message if it would later be ignored. In this case `sum(rand(100))` and
 the associated string processing will never be executed unless debug logging is
 enabled.
 
@@ -92,7 +92,7 @@ The system also generates some standard information for each event:
   fairly stable even if the source code of the file changes, as long as the
   logging statement itself remains the same.
 * A `group` for the event, which is set to the base name of the file by default,
-  without extension.  This can be used to group messages into categories more
+  without extension. This can be used to group messages into categories more
   finely than the log level (for example, all deprecation warnings have group
   `:depwarn`), or into logical groupings across or within modules.
 
@@ -124,7 +124,7 @@ user configurable code to see the event. All loggers must be subtypes of
 [`AbstractLogger`](@ref).
 
 When an event is triggered, the appropriate logger is found by looking for a
-task-local logger with the global logger as fallback.  The idea here is that
+task-local logger with the global logger as fallback. The idea here is that
 the application code knows how log events should be processed and exists
 somewhere at the top of the call stack. So we should look up through the call
 stack to discover the logger — that is, the logger should be *dynamically
@@ -134,11 +134,11 @@ simple global variable. In such a system it's awkward to control logging while
 composing functionality from multiple modules.)
 
 The global logger may be set with [`global_logger`](@ref), and task-local
-loggers controlled using [`with_logger`](@ref).  Newly spawned tasks inherit
+loggers controlled using [`with_logger`](@ref). Newly spawned tasks inherit
 the logger of the parent task.
 
 There are three logger types provided by the library.  [`ConsoleLogger`](@ref)
-is the default logger you see when starting the REPL.  It displays events in a
+is the default logger you see when starting the REPL. It displays events in a
 readable text format and tries to give simple but user friendly control over
 formatting and filtering.  [`NullLogger`](@ref) is a convenient way to drop all
 messages where necessary; it is the logging equivalent of the [`devnull`](@ref)
@@ -154,14 +154,14 @@ When an event occurs, a few steps of early filtering occur to avoid generating
 messages that will be discarded:
 
 1. The message log level is checked against a global minimum level (set via
-   [`disable_logging`](@ref)).  This is a crude but extremely cheap global
+   [`disable_logging`](@ref)). This is a crude but extremely cheap global
    setting.
 2. The current logger state is looked up and the message level checked against the
    logger's cached minimum level, as found by calling [`Logging.min_enabled_level`](@ref).
    This behavior can be overridden via environment variables (more on this later).
 3. The [`Logging.shouldlog`](@ref) function is called with the current logger, taking
    some minimal information (level, module, group, id) which can be computed
-   statically.  Most usefully, `shouldlog` is passed an event `id` which can be
+   statically. Most usefully, `shouldlog` is passed an event `id` which can be
    used to discard events early based on a cached predicate.
 
 If all these checks pass, the message and key--value pairs are evaluated in full
@@ -170,9 +170,9 @@ and passed to the current logger via the [`Logging.handle_message`](@ref) functi
 event to the screen, save it to a file, etc.
 
 Exceptions that occur while generating the log event are captured and logged
-by default.  This prevents individual broken events from crashing the
+by default. This prevents individual broken events from crashing the
 application, which is helpful when enabling little-used debug events in a
-production system.  This behavior can be customized per logger type by
+production system. This behavior can be customized per logger type by
 extending [`Logging.catch_exceptions`](@ref).
 
 ## Testing log events
diff --git a/stdlib/Logging/src/Logging.jl b/stdlib/Logging/src/Logging.jl
index 3822bde2e630b..192885f2f94b7 100644
--- a/stdlib/Logging/src/Logging.jl
+++ b/stdlib/Logging/src/Logging.jl
@@ -8,8 +8,6 @@ and available by default.
 """
 module Logging
 
-using StyledStrings
-
 # Import the CoreLogging implementation into Logging as new const bindings.
 # Doing it this way (rather than with import) makes these symbols accessible to
 # tab completion.
@@ -70,7 +68,10 @@ Alias for [`LogLevel(1_000_001)`](@ref LogLevel).
 const AboveMaxLevel = Base.CoreLogging.AboveMaxLevel
 
 using Base.CoreLogging:
-    closed_stream
+    closed_stream, ConsoleLogger, default_metafmt
+
+# Some packages use `Logging.default_logcolor`
+const default_logcolor = Base.CoreLogging.default_logcolor
 
 export
     AbstractLogger,
@@ -94,8 +95,6 @@ export
     Error,
     AboveMaxLevel
 
-include("ConsoleLogger.jl")
-
 # The following are also part of the public API, but not exported:
 #
 # 1. Log levels:
@@ -104,8 +103,4 @@ include("ConsoleLogger.jl")
 # 2. AbstractLogger message related functions:
 #  handle_message, shouldlog, min_enabled_level, catch_exceptions,
 
-function __init__()
-    global_logger(ConsoleLogger())
-end
-
 end
diff --git a/stdlib/Logging/test/runtests.jl b/stdlib/Logging/test/runtests.jl
index a244facee3468..176860fcdec63 100644
--- a/stdlib/Logging/test/runtests.jl
+++ b/stdlib/Logging/test/runtests.jl
@@ -63,24 +63,24 @@ end
 
     @testset "Default metadata formatting" begin
         @test Logging.default_metafmt(Logging.Debug, Base, :g, :i, expanduser("~/somefile.jl"), 42) ==
-            (:log_debug, "Debug:",   "@ Base ~/somefile.jl:42")
+            (:blue,      "Debug:",   "@ Base ~/somefile.jl:42")
         @test Logging.default_metafmt(Logging.Info,  Main, :g, :i, "a.jl", 1) ==
-            (:log_info,  "Info:",    "")
+            (:cyan,      "Info:",    "")
         @test Logging.default_metafmt(Logging.Warn,  Main, :g, :i, "b.jl", 2) ==
-            (:log_warn,  "Warning:", "@ Main b.jl:2")
+            (:yellow,    "Warning:", "@ Main b.jl:2")
         @test Logging.default_metafmt(Logging.Error, Main, :g, :i, "", 0) ==
-            (:log_error, "Error:",   "@ Main :0")
+            (:light_red, "Error:",   "@ Main :0")
         # formatting of nothing
         @test Logging.default_metafmt(Logging.Warn,  nothing, :g, :i, "b.jl", 2) ==
-            (:log_warn,  "Warning:", "@ b.jl:2")
+            (:yellow,    "Warning:", "@ b.jl:2")
         @test Logging.default_metafmt(Logging.Warn,  Main, :g, :i, nothing, 2) ==
-            (:log_warn,  "Warning:", "@ Main")
+            (:yellow,    "Warning:", "@ Main")
         @test Logging.default_metafmt(Logging.Warn,  Main, :g, :i, "b.jl", nothing) ==
-            (:log_warn,  "Warning:", "@ Main b.jl")
+            (:yellow,    "Warning:", "@ Main b.jl")
         @test Logging.default_metafmt(Logging.Warn,  nothing, :g, :i, nothing, 2) ==
-            (:log_warn,  "Warning:", "")
+            (:yellow,    "Warning:", "")
         @test Logging.default_metafmt(Logging.Warn,  Main, :g, :i, "b.jl", 2:5) ==
-            (:log_warn,  "Warning:", "@ Main b.jl:2-5")
+            (:yellow,    "Warning:", "@ Main b.jl:2-5")
     end
 
     function dummy_metafmt(level, _module, group, id, file, line)
@@ -265,9 +265,9 @@ end
     # Basic colorization test
     @test genmsg("line1\nline2", color=true) ==
     """
-    \e[36m\e[1m┌\e[39m\e[22m \e[36m\e[1mPREFIX\e[39m\e[22m line1
-    \e[36m\e[1m│\e[39m\e[22m line2
-    \e[36m\e[1m└\e[39m\e[22m \e[90mSUFFIX\e[39m
+    \e[36m\e[1m┌ \e[22m\e[39m\e[36m\e[1mPREFIX \e[22m\e[39mline1
+    \e[36m\e[1m│ \e[22m\e[39mline2
+    \e[36m\e[1m└ \e[22m\e[39m\e[90mSUFFIX\e[39m
     """
 
 end
diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml
index e4b24d070db55..eaa8d0988b2ca 100644
--- a/stdlib/MPFR_jll/Project.toml
+++ b/stdlib/MPFR_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MPFR_jll"
 uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
-version = "4.2.0+1"
+version = "4.2.1+0"
 
 [deps]
 GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
diff --git a/stdlib/MPFR_jll/src/MPFR_jll.jl b/stdlib/MPFR_jll/src/MPFR_jll.jl
index c184a9801102f..219ab0cad41be 100644
--- a/stdlib/MPFR_jll/src/MPFR_jll.jl
+++ b/stdlib/MPFR_jll/src/MPFR_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/MPFR_jll.jl
 baremodule MPFR_jll
 using Base, Libdl, GMP_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/MPFR_jll/test/runtests.jl b/stdlib/MPFR_jll/test/runtests.jl
index 81b6e06ed7b49..fc931b462fa9c 100644
--- a/stdlib/MPFR_jll/test/runtests.jl
+++ b/stdlib/MPFR_jll/test/runtests.jl
@@ -4,5 +4,5 @@ using Test, Libdl, MPFR_jll
 
 @testset "MPFR_jll" begin
     vn = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Cstring, ())))
-    @test vn == v"4.2.0"
+    @test vn == v"4.2.1"
 end
diff --git a/stdlib/Manifest.toml b/stdlib/Manifest.toml
new file mode 100644
index 0000000000000..f9fb307190838
--- /dev/null
+++ b/stdlib/Manifest.toml
@@ -0,0 +1,300 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "d3a1f6b706609fe0c59521e1d770be6e2b8c489d"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.2"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.CRC32c]]
+uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+version = "1.11.0"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.1.1+0"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
+
+[[deps.DelimitedFiles]]
+deps = ["Mmap"]
+git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+version = "1.9.1"
+
+[[deps.Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+version = "1.11.0"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
+
+[[deps.GMP_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+version = "6.3.0+0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+version = "1.12.0"
+
+[[deps.LLD_jll]]
+deps = ["Artifacts", "Libdl", "Zlib_jll", "libLLVM_jll"]
+uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+version = "18.1.7+2"
+
+[[deps.LLVMLibUnwind_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+version = "12.0.1+0"
+
+[[deps.LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+version = "1.11.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.6.0+0"
+
+[[deps.LibGit2]]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.8.0+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.LibUV_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
+version = "2.0.1+17"
+
+[[deps.LibUnwind_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+version = "1.8.1+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+version = "1.11.0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.MPFR_jll]]
+deps = ["Artifacts", "GMP_jll", "Libdl"]
+uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
+version = "4.2.1+0"
+
+[[deps.Markdown]]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.6+0"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2024.3.11"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.28+2"
+
+[[deps.OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.1+2"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.43.0+0"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.12.0"
+weakdeps = ["REPL"]
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.Profile]]
+deps = ["StyledStrings"]
+uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+version = "1.11.0"
+
+[[deps.REPL]]
+deps = ["InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.SparseArrays]]
+deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+version = "1.12.0"
+
+[[deps.Statistics]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.11.1"
+weakdeps = ["SparseArrays"]
+
+    [deps.Statistics.extensions]
+    SparseArraysExt = ["SparseArrays"]
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.SuiteSparse_jll]]
+deps = ["Artifacts", "Libdl", "libblastrampoline_jll"]
+uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "7.8.0+0"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.3.1+0"
+
+[[deps.dSFMT_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+version = "2.2.5+0"
+
+[[deps.libLLVM_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+version = "18.1.7+2"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.11.0+0"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.60.0+0"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.5.0+0"
diff --git a/stdlib/Markdown/Project.toml b/stdlib/Markdown/Project.toml
index b40de17b9422d..a48a3d1f0b345 100644
--- a/stdlib/Markdown/Project.toml
+++ b/stdlib/Markdown/Project.toml
@@ -4,6 +4,8 @@ version = "1.11.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Markdown/src/Common/Common.jl b/stdlib/Markdown/src/Common/Common.jl
index 3036f2b4b730b..4bd3e5b4af8d6 100644
--- a/stdlib/Markdown/src/Common/Common.jl
+++ b/stdlib/Markdown/src/Common/Common.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+abstract type MarkdownElement end
+
 include("block.jl")
 include("inline.jl")
 
diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl
index bd184b60c40fa..1b5cc1a752bcb 100644
--- a/stdlib/Markdown/src/Common/block.jl
+++ b/stdlib/Markdown/src/Common/block.jl
@@ -4,7 +4,7 @@
 # Paragraphs
 # ––––––––––
 
-mutable struct Paragraph
+mutable struct Paragraph <: MarkdownElement
     content
 end
 
@@ -39,7 +39,7 @@ end
 # Headers
 # –––––––
 
-mutable struct Header{level}
+mutable struct Header{level} <: MarkdownElement
     text
 end
 
@@ -95,7 +95,7 @@ end
 # Code
 # ––––
 
-mutable struct Code
+mutable struct Code <: MarkdownElement
     language::String
     code::String
 end
@@ -124,7 +124,7 @@ end
 # Footnote
 # --------
 
-mutable struct Footnote
+mutable struct Footnote <: MarkdownElement
     id::String
     text
 end
@@ -159,7 +159,7 @@ end
 # Quotes
 # ––––––
 
-mutable struct BlockQuote
+mutable struct BlockQuote <: MarkdownElement
     content
 end
 
@@ -188,7 +188,7 @@ end
 # Admonitions
 # -----------
 
-mutable struct Admonition
+mutable struct Admonition <: MarkdownElement
     category::String
     title::String
     content::Vector
@@ -246,7 +246,7 @@ end
 # Lists
 # –––––
 
-mutable struct List
+mutable struct List <: MarkdownElement
     items::Vector{Any}
     ordered::Int # `-1` is unordered, `>= 0` is ordered.
     loose::Bool # TODO: Renderers should use this field
@@ -332,7 +332,7 @@ pushitem!(list, buffer) = push!(list.items, parse(String(take!(buffer))).content
 # HorizontalRule
 # ––––––––––––––
 
-mutable struct HorizontalRule
+mutable struct HorizontalRule <: MarkdownElement
 end
 
 function horizontalrule(stream::IO, block::MD)
diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index fda716a10fae7..a2a4140f80050 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -4,7 +4,7 @@
 # Emphasis
 # ––––––––
 
-mutable struct Italic
+mutable struct Italic <: MarkdownElement
     text
 end
 
@@ -20,7 +20,7 @@ function underscore_italic(stream::IO, md::MD)
     return result === nothing ? nothing : Italic(parseinline(result, md))
 end
 
-mutable struct Bold
+mutable struct Bold <: MarkdownElement
     text
 end
 
@@ -66,7 +66,7 @@ end
 # Images & Links
 # ––––––––––––––
 
-mutable struct Image
+mutable struct Image <: MarkdownElement
     url::String
     alt::String
 end
@@ -85,7 +85,7 @@ function image(stream::IO, md::MD)
     end
 end
 
-mutable struct Link
+mutable struct Link <: MarkdownElement
     text
     url::String
 end
@@ -156,7 +156,7 @@ end
 # Punctuation
 # –––––––––––
 
-mutable struct LineBreak end
+mutable struct LineBreak <: MarkdownElement end
 
 @trigger '\\' ->
 function linebreak(stream::IO, md::MD)
diff --git a/stdlib/Markdown/src/GitHub/table.jl b/stdlib/Markdown/src/GitHub/table.jl
index 29f956e9a0710..7c174007a75ba 100644
--- a/stdlib/Markdown/src/GitHub/table.jl
+++ b/stdlib/Markdown/src/GitHub/table.jl
@@ -140,15 +140,15 @@ end
 
 function term(io::IO, md::Table, columns)
     margin_str = " "^margin
-    cells = mapmap(x -> terminline_string(io, x), md.rows)
-    padcells!(cells, md.align, len = ansi_length)
+    cells = mapmap(x -> annotprint(terminline, x), md.rows)
+    padcells!(cells, md.align, len = textwidth)
     for i = 1:length(cells)
         print(io, margin_str)
         join(io, cells[i], " ")
         if i == 1
             println(io)
             print(io, margin_str)
-            join(io, ["–"^ansi_length(cells[i][j]) for j = 1:length(cells[1])], " ")
+            join(io, ["–"^textwidth(cells[i][j]) for j = 1:length(cells[1])], " ")
         end
         i < length(cells) && println(io)
     end
diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl
index 93d8dbc39fc59..1832e3a6a6956 100644
--- a/stdlib/Markdown/src/Markdown.jl
+++ b/stdlib/Markdown/src/Markdown.jl
@@ -9,9 +9,12 @@ literals `md"..."` and `doc"..."`.
 """
 module Markdown
 
-import Base: show, ==, with_output_color, mapany
+import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany
 using Base64: stringmime
 
+using StyledStrings: StyledStrings, Face, addface!, @styled_str, styled
+using JuliaSyntaxHighlighting: highlight, highlight!
+
 # Margin for printing in terminal.
 const margin = 2
 
@@ -32,6 +35,27 @@ include("render/terminal/render.jl")
 
 export @md_str, @doc_str
 
+const MARKDOWN_FACES = [
+    :markdown_header => Face(weight=:bold),
+    :markdown_h1 => Face(height=1.25, inherit=:markdown_header),
+    :markdown_h2 => Face(height=1.20, inherit=:markdown_header),
+    :markdown_h3 => Face(height=1.15, inherit=:markdown_header),
+    :markdown_h4 => Face(height=1.12, inherit=:markdown_header),
+    :markdown_h5 => Face(height=1.08, inherit=:markdown_header),
+    :markdown_h6 => Face(height=1.05, inherit=:markdown_header),
+    :markdown_admonition => Face(weight=:bold),
+    :markdown_code => Face(inherit=:code),
+    :markdown_julia_prompt => Face(inherit=:repl_prompt_julia),
+    :markdown_footnote => Face(inherit=:bright_yellow),
+    :markdown_hrule => Face(inherit=:shadow),
+    :markdown_inlinecode => Face(inherit=:markdown_code),
+    :markdown_latex => Face(inherit=:magenta),
+    :markdown_link => Face(underline=:bright_blue),
+    :markdown_list => Face(foreground=:blue),
+]
+
+__init__() = foreach(addface!, MARKDOWN_FACES)
+
 parse(markdown::AbstractString; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
 parse_file(file::AbstractString; flavor = julia) = parse(read(file, String), flavor = flavor)
 
@@ -98,4 +122,25 @@ import Base.Docs: catdoc
 
 catdoc(md::MD...) = MD(md...)
 
+if Base.generating_output()
+    # workload to reduce latency
+    md"""
+    # H1
+    ## H2
+    ### H3
+    **bold text**
+    *italicized text*
+    > blockquote
+    1. First item
+    2. Second item
+    3. Third item
+    - First item
+    - Second item
+    - Third item
+    `code`
+    Horizontal Rule
+    ---
+    """
+end
+
 end
diff --git a/stdlib/Markdown/src/parse/parse.jl b/stdlib/Markdown/src/parse/parse.jl
index 0f3cbe857c2f9..389099b2984f6 100644
--- a/stdlib/Markdown/src/parse/parse.jl
+++ b/stdlib/Markdown/src/parse/parse.jl
@@ -95,7 +95,7 @@ function parse(stream::IO, block::MD, config::Config; breaking = false)
 end
 
 parse(stream::IO, block::MD; breaking = false) =
-  parse(stream, block, config(block), breaking = breaking)
+    parse(stream, block, config(block), breaking = breaking)
 
 function parse(stream::IO; flavor = julia)
     isa(flavor, Symbol) && (flavor = flavors[flavor])
diff --git a/stdlib/Markdown/src/render/html.jl b/stdlib/Markdown/src/render/html.jl
index e7d436f2ccbda..829fa6c7bf986 100644
--- a/stdlib/Markdown/src/render/html.jl
+++ b/stdlib/Markdown/src/render/html.jl
@@ -67,6 +67,9 @@ end
 
 function html(io::IO, code::Code)
     withtag(io, :pre) do
+        if code.language == "styled"
+            code = Code("", String(styled(code.code)))
+        end
         maybe_lang = !isempty(code.language) ? Any[:class=>"language-$(code.language)"] : []
         withtag(io, :code, maybe_lang...) do
             htmlesc(io, code.code)
@@ -134,6 +137,9 @@ function htmlinline(io::IO, content::Vector)
 end
 
 function htmlinline(io::IO, code::Code)
+    if code.language == "styled"
+        code = Code("", String(styled(code.code)))
+    end
     withtag(io, :code) do
         htmlesc(io, code.code)
     end
diff --git a/stdlib/Markdown/src/render/latex.jl b/stdlib/Markdown/src/render/latex.jl
index df52b2849f2b0..fad0508ce0e59 100644
--- a/stdlib/Markdown/src/render/latex.jl
+++ b/stdlib/Markdown/src/render/latex.jl
@@ -33,6 +33,9 @@ function latex(io::IO, header::Header{l}) where l
 end
 
 function latex(io::IO, code::Code)
+    if code.language == "styled"
+        code = Code("", String(styled(code.code)))
+    end
     occursin("\\end{verbatim}", code.code) && error("Cannot include \"\\end{verbatim}\" in a latex code block")
     wrapblock(io, "verbatim") do
         println(io, code.code)
diff --git a/stdlib/Markdown/src/render/rst.jl b/stdlib/Markdown/src/render/rst.jl
index 752916c581a07..e441ee0495da0 100644
--- a/stdlib/Markdown/src/render/rst.jl
+++ b/stdlib/Markdown/src/render/rst.jl
@@ -23,10 +23,16 @@ end
 function rst(io::IO, code::Code)
     if code.language == "jldoctest"
         println(io, ".. doctest::\n")
-    elseif code.language != "rst"
+    elseif code.language in ("", "julia", "julia-repl")
         println(io, ".. code-block:: julia\n")
+    elseif code.language == "rst"
+    elseif code.language == "styled"
+        code = Code("", String(styled(code.code)))
+        println(io, "::\n")
+    else
+        println(io, "::\n")
     end
-    for l in lines(code.code)
+    for l in eachsplit(code.code, '\n')
         println(io, "    ", l)
     end
 end
@@ -90,7 +96,7 @@ end
 
 function rst(io::IO, l::LaTeX)
     println(io, ".. math::\n")
-    for line in lines(l.formula)
+    for line in eachsplit(l.formula, '\n')
         println(io, "    ", line)
     end
 end
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index a031de4d9ad82..3274483801c77 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -1,68 +1,82 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Wrapping
+const AnnotIO = Union{AnnotatedIOBuffer, IOContext{AnnotatedIOBuffer}}
 
-function ansi_length(s)
-    replace(s, r"\e\[[0-9]+m" => "") |> textwidth
+function annotprint(f::Function, args...)
+    buf = AnnotatedIOBuffer()
+    f(buf, args...)
+    read(seekstart(buf), AnnotatedString)
 end
 
-words(s) = split(s, " ")
-lines(s) = split(s, "\n")
+"""
+    with_output_annotations(f::Function, io::AnnotIO, annots::Pair{Symbol, <:Any}...)
 
-function wrapped_line(io::IO, s::AbstractString, width, i)
-    ws = words(s)
-    lines = String[]
-    for word in ws
-        word_length = ansi_length(word)
-        word_length == 0 && continue
-        if isempty(lines) || i + word_length + 1 > width
-            i = word_length
-            if length(lines) > 0
-                last_line = lines[end]
-                maybe_underline = findlast(Base.text_colors[:underline], last_line)
-                if !isnothing(maybe_underline)
-                    # disable underline style at end of line if not already disabled.
-                    maybe_disable_underline = max(
-                        last(something(findlast(Base.disable_text_style[:underline], last_line), -1)),
-                        last(something(findlast(Base.text_colors[:normal], last_line), -1)),
-                    )
+Call `f(io)`, and apply `annots` to the output created by doing so.
+"""
+function with_output_annotations(f::Function, io::AnnotIO, annots::Pair{Symbol, <:Any}...)
+    @nospecialize annots
+    aio = if io isa AnnotatedIOBuffer io else io.io end
+    start = position(aio) + 1
+    f(io)
+    stop = position(aio)
+    sortedindex = searchsortedlast(aio.annotations, (start:stop,), by=first)
+    for (i, annot) in enumerate(annots)
+        insert!(aio.annotations, sortedindex + i, (start:stop, annot))
+    end
+end
 
-                    if maybe_disable_underline < 0 || maybe_disable_underline < last(maybe_underline)
+"""
+    wraplines(content::AnnotatedString, width::Integer = 80, column::Integer = 0)
 
-                        lines[end] = last_line * Base.disable_text_style[:underline]
-                        word = Base.text_colors[:underline] * word
-                    end
+Wrap `content` into a vector of lines of at most `width` (according to
+`textwidth`), with the first line starting at `column`.
+"""
+function wraplines(content::Union{Annot, SubString{<:Annot}}, width::Integer = 80, column::Integer = 0) where { Annot <: AnnotatedString}
+    s, lines = String(content), SubString{Annot}[]
+    i, lastwrap, slen = firstindex(s), 0, ncodeunits(s)
+    most_recent_break_opportunity = 1
+    while i < slen
+        if isspace(s[i]) && s[i] != '\n'
+            most_recent_break_opportunity = i
+        elseif s[i] == '\n'
+            push!(lines, content[nextind(s, lastwrap):prevind(s, i)])
+            lastwrap = i
+            column = 0
+        elseif column >= width && most_recent_break_opportunity > 1
+            if lastwrap == most_recent_break_opportunity
+                nextbreak = findfirst(isspace, @view s[nextind(s, lastwrap):end])
+                if isnothing(nextbreak)
+                    break
+                else
+                    most_recent_break_opportunity = lastwrap + nextbreak
                 end
+                i = most_recent_break_opportunity
+            else
+                i = nextind(s, most_recent_break_opportunity)
             end
-            push!(lines, word)
-        else
-            i += word_length + 1
-            lines[end] *= " " * word   # this could be more efficient
+            push!(lines, content[nextind(s, lastwrap):prevind(s, most_recent_break_opportunity)])
+            lastwrap = most_recent_break_opportunity
+            column = 0
         end
+        column += textwidth(s[i])
+        i = nextind(s, i)
     end
-    return i, lines
-end
-
-function wrapped_lines(io::IO, s::AbstractString; width = 80, i = 0)
-    ls = String[]
-    for ss in lines(s)
-        i, line = wrapped_line(io, ss, width, i)
-        append!(ls, line)
+    if lastwrap < slen
+        push!(lines, content[nextind(s, lastwrap):end])
     end
-    return ls
+    lines
 end
 
-wrapped_lines(io::IO, f::Function, args...; width = 80, i = 0) =
-    wrapped_lines(io, sprint(f, args...; context=io), width = width, i = 0)
-
-function print_wrapped(io::IO, s...; width = 80, pre = "", i = 0)
-    lines = wrapped_lines(io, s..., width = width, i = i)
-    isempty(lines) && return 0, 0
-    print(io, lines[1])
-    for line in lines[2:end]
-        print(io, '\n', pre, line)
+# Print horizontal lines between each docstring if there are multiple docs
+function insert_hlines(docs)
+    if !isa(docs, MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
+        return docs
+    end
+    docs = docs::MD
+    v = Any[]
+    for (n, doc) in enumerate(docs.content)
+        push!(v, doc)
+        n == length(docs.content) || push!(v, HorizontalRule())
     end
-    length(lines), length(pre) + ansi_length(lines[end])
+    return MD(v)
 end
-
-print_wrapped(f::Function, io::IO, args...; kws...) = print_wrapped(io, f, args...; kws...)
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index 20b1ef6d041fc..619b2c8b8ef4a 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -13,121 +13,158 @@ function term(io::IO, content::Vector, cols)
     term(io, content[end], cols)
 end
 
-term(io::IO, md::MD, columns = cols(io)) = term(io, md.content, columns)
+function term(io::IO, md::MD, columns = cols(io))
+    md = insert_hlines(md)
+    return term(io, md.content, columns)
+end
 
 function term(io::IO, md::Paragraph, columns)
-    print(io, ' '^margin)
-    print_wrapped(io, width = columns-2margin, pre = ' '^margin) do io
-        terminline(io, md.content)
+    lines = wraplines(annotprint(terminline, md.content), columns-2margin)
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::BlockQuote, columns)
-    s = sprint(term, md.content, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    print(io, ' '^margin, '│', lines[1])
-    for i = 2:length(lines)
-        print(io, '\n', ' '^margin, '│', lines[i])
+    content = annotprint(term, md.content, columns - 10)
+    lines = wraplines(rstrip(content), columns - 10)
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, '│', line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::Admonition, columns)
-    col = :default
-    # If the types below are modified, the page manual/documentation.md must be updated accordingly.
-    if md.category == "danger"
-        col = Base.error_color()
-    elseif md.category == "warning"
-        col = Base.warn_color()
-    elseif md.category in ("info", "note")
-        col = Base.info_color()
-    elseif md.category == "tip"
-        col = :green
+    accent = if md.category == "danger"
+        :error
+    elseif md.category in ("warning", "info", "note", "tip")
+        Symbol(md.category)
+    elseif md.category == "compat"
+        :bright_cyan
+    elseif md.category == "todo"
+        :magenta
+    else
+        :default
     end
-    printstyled(io, ' '^margin, "│ "; color=col, bold=true)
-    printstyled(io, isempty(md.title) ? md.category : md.title; color=col, bold=true)
-    printstyled(io, '\n', ' '^margin, '│', '\n'; color=col, bold=true)
-    s = sprint(term, md.content, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    for i in eachindex(lines)
-        printstyled(io, ' '^margin, '│'; color=col, bold=true)
-        print(io, lines[i])
-        i < lastindex(lines) && println(io)
+    title = if isempty(md.title) md.category else md.title end
+    print(io, ' '^margin, styled"{$accent,markdown_admonition:│ $title}",
+          '\n', ' '^margin, styled"{$accent,markdown_admonition:│}", '\n')
+    content = annotprint(term, md.content, columns - 10)
+    lines = split(rstrip(content), '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, styled"{$accent,markdown_admonition:│}", line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, f::Footnote, columns)
     print(io, ' '^margin, "│ ")
-    printstyled(io, "[^$(f.id)]", bold=true)
+    print(io, styled"{markdown_footnote:[^$(f.id)]}")
     println(io, '\n', ' '^margin, '│')
-    s = sprint(term, f.text, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    for i in eachindex(lines)
-        print(io, ' '^margin, '│', lines[i])
-        i < lastindex(lines) && println(io)
+    content = annotprint(term, f.text, columns - 10)
+    lines = split(rstrip(content), '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, '│', line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::List, columns)
     for (i, point) in enumerate(md.items)
-        print(io, ' '^2margin, isordered(md) ? "$(i + md.ordered - 1). " : "•  ")
-        print_wrapped(io, width = columns-(4margin+2), pre = ' '^(2margin+3),
-                          i = 2margin+2) do io
-            term(io, point, columns - 10)
-        end
-        i < lastindex(md.items) && print(io, '\n', '\n')
-    end
-end
-
-function _term_header(io::IO, md, char, columns)
-    text = terminline_string(io, md.text)
-    with_output_color(:bold, io) do io
-        pre = ' '^margin
-        print(io, pre)
-        line_no, lastline_width = print_wrapped(io, text,
-                                                width=columns - 4margin; pre)
-        line_width = min(lastline_width, columns)
-        if line_no > 1
-            line_width = max(line_width, div(columns, 3)+length(pre))
+        bullet = isordered(md) ? "$(i + md.ordered - 1)." : "• "
+        print(io, ' '^2margin, styled"{markdown_list:$bullet} ")
+        content = annotprint(term, point, columns - 10)
+        lines = split(rstrip(content), '\n')
+        for (l, line) in enumerate(lines)
+            l > 1 && print(io, ' '^(2margin+3))
+            print(io, lstrip(line))
+            l < length(lines) && println(io)
         end
-        header_width = max(0, line_width-length(pre))
-        char != ' ' && header_width > 0 && print(io, '\n', ' '^(margin), char^header_width)
+        i < length(md.items) && print(io, '\n'^(1 + md.loose))
     end
 end
 
 const _header_underlines = collect("≡=–-⋅ ")
 # TODO settle on another option with unicode e.g. "≡=≃–∼⋅" ?
 
-function term(io::IO, md::Header{l}, columns) where l
+function term(io::AnnotIO, md::Header{l}, columns) where l
+    face = Symbol("markdown_h$l")
     underline = _header_underlines[l]
-    _term_header(io, md, underline, columns)
+    pre = ' '^margin
+    local line_width
+    with_output_annotations(io, :face => face) do io
+        headline = annotprint(terminline, md.text)
+        lines = wraplines(headline, columns - 4margin)
+        for (i, line) in enumerate(lines)
+            print(io, pre, line)
+            i < length(lines) && println(io)
+        end
+        line_width = if length(lines) == 1
+            min(textwidth(lines[end]), columns)
+        elseif length(lines) > 1
+            max(textwidth(lines[end]), div(columns, 3)+length(pre))
+        else
+            0
+        end
+    end
+    header_width = max(0, line_width)
+    if underline != ' ' && header_width > 0
+        print(io, '\n', ' '^(margin))
+        with_output_annotations(io -> print(io, underline^header_width), io, :face => face)
+    end
 end
 
 function term(io::IO, md::Code, columns)
-    with_output_color(:cyan, io) do io
-        L = lines(md.code)
-        for i in eachindex(L)
-            print(io, ' '^margin, L[i])
-            i < lastindex(L) && println(io)
+    code = if md.language ∈ ("", "julia")
+        highlight(md.code)
+    elseif md.language == "julia-repl" || Base.startswith(md.language, "jldoctest")
+        hl = AnnotatedString(md.code)
+        for (; match) in eachmatch(r"(?:^|\n)julia>", hl)
+            StyledStrings.face!(match, :markdown_julia_prompt)
+            afterprompt = match.offset + ncodeunits(match) + 1
+            _, exprend = Meta.parse(md.code, afterprompt, raise = false)
+            highlight!(hl[afterprompt:prevind(md.code, exprend)])
+            if (nextspace = findnext(' ', md.code, exprend)) |> !isnothing
+                nextword = hl[exprend:prevind(hl, nextspace)]
+                if nextword == "ERROR:"
+                    StyledStrings.face!(nextword, :error)
+                end
+            end
         end
+        hl
+    elseif md.language == "styled"
+        styled(md.code)
+    else
+        styled"{markdown_code:$(md.code)}"
+    end
+    lines = split(code, '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, tex::LaTeX, columns)
-    printstyled(io, ' '^margin, tex.formula, color=:magenta)
+    print(io, ' '^margin, styled"{markdown_latex:$(tex.formula)}")
 end
 
 term(io::IO, br::LineBreak, columns) = nothing # line breaks already printed between subsequent elements
 
 function term(io::IO, br::HorizontalRule, columns)
-   print(io, ' '^margin, '─'^(columns - 2margin))
+    print(io, ' '^margin, styled"{markdown_hrule:$('─'^(columns - 2margin))}")
+end
+
+function term(io::IO, md::MarkdownElement, columns)
+    a = IOContext(AnnotatedIOBuffer(), io)
+    term(a, md, columns)
+    print(io, read(seekstart(a.io), AnnotatedString))
 end
 
 term(io::IO, x, _) = show(io, MIME"text/plain"(), x)
 
 # Inline Content
 
-terminline_string(io::IO, md) = sprint(terminline, md; context=io)
-
 terminline(io::IO, content...) = terminline(io, collect(content))
 
 function terminline(io::IO, content::Vector)
@@ -140,12 +177,12 @@ function terminline(io::IO, md::AbstractString)
     print(io, replace(md, r"[\s\t\n]+" => ' '))
 end
 
-function terminline(io::IO, md::Bold)
-    with_output_color(terminline, :bold, io, md.text)
+function terminline(io::AnnotIO, md::Bold)
+    with_output_annotations(io -> terminline(io, md.text), io, :face => :bold)
 end
 
-function terminline(io::IO, md::Italic)
-    with_output_color(terminline, :underline, io, md.text)
+function terminline(io::AnnotIO, md::Italic)
+    with_output_annotations(io -> terminline(io, md.text), io, :face => :italic)
 end
 
 function terminline(io::IO, md::LineBreak)
@@ -156,20 +193,36 @@ function terminline(io::IO, md::Image)
     terminline(io, "(Image: $(md.alt))")
 end
 
-terminline(io::IO, f::Footnote) = with_output_color(terminline, :bold, io, "[^$(f.id)]")
+function terminline(io::IO, f::Footnote)
+    print(io, styled"{markdown_footnote:[^$(f.id)]}")
+end
 
-function terminline(io::IO, md::Link)
-    url = !Base.startswith(md.url, "@ref") ? " ($(md.url))" : ""
-    text = terminline_string(io, md.text)
-    terminline(io, text, url)
+function terminline(io::AnnotIO, md::Link)
+    annots = if occursin(r"^(https?|file)://", md.url)
+        (:face => :markdown_link, :link => md.url)
+    else
+        (:face => :markdown_link,)
+    end
+    with_output_annotations(io -> terminline(io, md.text), io, annots...)
 end
 
 function terminline(io::IO, code::Code)
-    printstyled(io, code.code, color=:cyan)
+    body = if code.language == "styled"
+        styled(code.code)
+    else
+        code.code
+    end
+    print(io, styled"{markdown_inlinecode:$body}")
 end
 
 function terminline(io::IO, tex::LaTeX)
-    printstyled(io, tex.formula, color=:magenta)
+    print(io, styled"{markdown_latex:$(tex.formula)}")
+end
+
+function terminline(io::IO, md::MarkdownElement)
+    a = IOContext(AnnotatedIOBuffer(), io)
+    terminline(a, md)
+    print(io, read(seekstart(a.io), AnnotatedString))
 end
 
 terminline(io::IO, x) = show(io, MIME"text/plain"(), x)
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index 116282a0bea3b..ffdb735f3b7cd 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Markdown
-import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, plain, term, html, rst, Table, Code, LaTeX, Footnote
+using Test, Markdown, StyledStrings
+import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, insert_hlines, plain, term, html, rst, Table, Code, LaTeX, Footnote
 import Base: show
 
 # Basics
@@ -233,7 +233,7 @@ World""" |> plain == "Hello\n\n---\n\nWorld\n"
 
 # multiple whitespace is ignored
 @test sprint(term, md"a  b") == "  a b"
-@test sprint(term, md"[x](https://julialang.org)") == "  x (https://julialang.org)"
+@test sprint(term, md"[x](https://julialang.org)") == "  x"
 @test sprint(term, md"[x](@ref)") == "  x"
 @test sprint(term, md"[x](@ref something)") == "  x"
 @test sprint(term, md"![x](https://julialang.org)") == "  (Image: x)"
@@ -298,6 +298,7 @@ end
 let doc =
     md"""
     1. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
+
     2. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
     """
     str = sprint(term, doc, 50)
@@ -376,8 +377,8 @@ table = md"""
 # mime output
 let out =
     @test sprint(show, "text/plain", book) ==
-        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n\n    •  list2"
-    @test sprint(show, "text/plain", md"#") == "  " # edge case of empty header
+        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n    •  list2"
+    @test sprint(show, "text/plain", md"#") == "" # edge case of empty header
     @test sprint(show, "text/markdown", book) ==
         """
         # Title
@@ -1157,7 +1158,7 @@ let buf = IOBuffer()
     show(buf, "text/markdown", md"*emph*")
     @test String(take!(buf)) == "*emph*\n"
     show(IOContext(buf, :color=>true), "text/plain", md"*emph*")
-    @test String(take!(buf)) == "  \e[4memph\e[24m"
+    @test String(take!(buf)) in ("  \e[3memph\e[23m", "  \e[4memph\e[24m")
 end
 
 let word = "Markdown" # disable underline when wrapping lines
@@ -1166,8 +1167,8 @@ let word = "Markdown" # disable underline when wrapping lines
     long_italic_text = Markdown.parse('_' * join(fill(word, 10), ' ') * '_')
     show(ctx, MIME("text/plain"), long_italic_text)
     lines = split(String(take!(buf)), '\n')
-    @test endswith(lines[begin], Base.disable_text_style[:underline])
-    @test startswith(lines[begin+1], ' '^Markdown.margin * Base.text_colors[:underline])
+    @test endswith(lines[begin], r"\e\[2[34]m")
+    @test startswith(lines[begin+1], Regex(' '^Markdown.margin * "\e\\[[34]m"))
 end
 
 let word = "Markdown" # pre is of size Markdown.margin when wrapping title
@@ -1176,7 +1177,9 @@ let word = "Markdown" # pre is of size Markdown.margin when wrapping title
     long_title = Markdown.parse("# " * join(fill(word, 3)))
     show(ctx, MIME("text/plain"), long_title)
     lines = split(String(take!(buf)), '\n')
-    @test all(startswith(Base.text_colors[:bold] * ' '^Markdown.margin), lines)
+    @test all(l -> startswith(l, ' '^Markdown.margin * StyledStrings.ANSI_STYLE_CODES.bold_weight) ||
+                   startswith(l, StyledStrings.ANSI_STYLE_CODES.bold_weight * ' '^Markdown.margin),
+              lines)
 end
 
 struct Struct49454 end
@@ -1259,8 +1262,9 @@ end
     s = @md_str """
        Misc:\\
        - line\\
+         break
        """
-    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
+    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line\n   break"
 end
 
 @testset "pullrequest #41552: a code block has \\end{verbatim}" begin
@@ -1297,3 +1301,10 @@ end
 @testset "Docstrings" begin
     @test isempty(Docs.undocumented_names(Markdown))
 end
+
+@testset "Non-Markdown" begin
+    # https://github.com/JuliaLang/julia/issues/37765
+    @test isa(insert_hlines(Text("foo")), Text)
+    # https://github.com/JuliaLang/julia/issues/37757
+    @test insert_hlines(nothing) === nothing
+end
diff --git a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
index e46da42a9a638..6367213e2c4ab 100644
--- a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
+++ b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule MbedTLS_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 6d328c40cd7b3..e6987582bf511 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -256,7 +256,7 @@ function mmap(io::IO,
     end # os-test
     # convert mmapped region to Julia Array at `ptr + (offset - offset_page)` since file was mapped at offset_page
     A = unsafe_wrap(Array, convert(Ptr{T}, UInt(ptr) + UInt(offset - offset_page)), dims)
-    finalizer(A) do x
+    finalizer(A.ref.mem) do x
         @static if Sys.isunix()
             systemerror("munmap",  ccall(:munmap, Cint, (Ptr{Cvoid}, Int), ptr, mmaplen) != 0)
         else
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index ebd16a45ba0ed..03e4b48d95f7a 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -100,9 +100,9 @@ if !(Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le)
     s = open(file, "r")
     m = mmap(s)
     @test_throws ReadOnlyMemoryError m[5] = UInt8('x') # tries to setindex! on read-only array
-    finalize(m); m=nothing; GC.gc()
+    finalize(m); m=nothing;
 end
-
+GC.gc()
 write(file, "Hello World\n")
 
 s = open(file, "r")
@@ -336,8 +336,9 @@ open(file, "r+") do s
     finalize(A); A = nothing; GC.gc()
     A = mmap(s, Vector{UInt8}, (10,), 1)
     Mmap.sync!(A)
-    finalize(A); A = nothing; GC.gc()
+    finalize(A); A = nothing;
 end
+GC.gc()
 rm(file)
 
 @testset "Docstrings" begin
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index b4f667b974736..181171a4c04c1 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,7 +1,7 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
 # Keep in sync with `deps/libgit2.version`.
-version = "2023.12.12"
+version = "2024.03.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
index 244c1204563d5..1d5df0236ae9e 100644
--- a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
+++ b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule MozillaCACerts_jll
 using Base
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
index be36f14f526dc..221b9b934f1a1 100644
--- a/stdlib/NetworkOptions.version
+++ b/stdlib/NetworkOptions.version
@@ -1,4 +1,4 @@
 NETWORKOPTIONS_BRANCH = master
-NETWORKOPTIONS_SHA1 = aab83e5dd900c874826d430e25158dff43559d78
+NETWORKOPTIONS_SHA1 = 8eec5cb0acec4591e6db3c017f7499426cd8e352
 NETWORKOPTIONS_GIT_URL := https://github.com/JuliaLang/NetworkOptions.jl.git
 NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index 10e8285027d42..a9a1a04facff5 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.26+2"
+version = "0.3.28+2"
 
 [deps]
 # See note in `src/OpenBLAS_jll.jl` about this dependency.
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
index a0c11ab047142..2f151f63f4413 100644
--- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -13,7 +13,6 @@ using Base, Libdl, Base.BinaryPlatforms
 # using CompilerSupportLibraries_jll
 # Because of this however, we have to manually load the libraries we
 # _do_ care about, namely libgfortran
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/OpenBLAS_jll/test/runtests.jl b/stdlib/OpenBLAS_jll/test/runtests.jl
index 1d944bab8cd67..76242b2e4080e 100644
--- a/stdlib/OpenBLAS_jll/test/runtests.jl
+++ b/stdlib/OpenBLAS_jll/test/runtests.jl
@@ -13,5 +13,5 @@ else
 end
 
 @testset "OpenBLAS_jll" begin
-    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) != nothing
+    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) !== nothing
 end
diff --git a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
index f2dee45a279cd..297cd25512894 100644
--- a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
+++ b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenLibm_jll.jl
 baremodule OpenLibm_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/PCRE2_jll/Project.toml b/stdlib/PCRE2_jll/Project.toml
index 788d6b733234f..f9b3affb51b63 100644
--- a/stdlib/PCRE2_jll/Project.toml
+++ b/stdlib/PCRE2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "PCRE2_jll"
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
-version = "10.42.0+1"
+version = "10.43.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/PCRE2_jll/src/PCRE2_jll.jl b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
index e7f685820830b..d825ac74db5a8 100644
--- a/stdlib/PCRE2_jll/src/PCRE2_jll.jl
+++ b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/PCRE2_jll.jl
 baremodule PCRE2_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/PCRE2_jll/test/runtests.jl b/stdlib/PCRE2_jll/test/runtests.jl
index d593b07af31ce..af0ed9434d2b6 100644
--- a/stdlib/PCRE2_jll/test/runtests.jl
+++ b/stdlib/PCRE2_jll/test/runtests.jl
@@ -6,5 +6,5 @@ using Test, Libdl, PCRE2_jll
     vstr = zeros(UInt8, 32)
     @test ccall((:pcre2_config_8, libpcre2_8), Cint, (UInt32, Ref{UInt8}), 11, vstr) > 0
     vn = VersionNumber(split(unsafe_string(pointer(vstr)), " ")[1])
-    @test vn == v"10.42.0"
+    @test vn == v"10.43.0"
 end
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index ca45bee7ee736..3d4a627d6e472 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = 48eea8dbd7b651cdc932b909c1b718bb9c3f94f4
+PKG_SHA1 = 299a356100f54215388502148979189aff760822
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index 860ef3610f728..fd38b3ebd3573 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -297,11 +297,11 @@ end
 @inline function rmdynamic(spec::Spec{T}, args, argp) where {T}
     zero, width, precision = spec.zero, spec.width, spec.precision
     if spec.dynamic_width
-        width = args[argp]
+        width = args[argp]::Integer
         argp += 1
     end
     if spec.dynamic_precision
-        precision = args[argp]
+        precision = args[argp]::Integer
         if zero && T <: Ints && precision > 0
             zero = false
         end
@@ -310,12 +310,12 @@ end
     (Spec{T}(spec.leftalign, spec.plus, spec.space, zero, spec.hash, width, precision, false, false), argp)
 end
 
-@inline function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
+Base.@constprop :aggressive function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
     spec, argp = rmdynamic(spec, args, argp)
     (fmt(buf, pos, args[argp], spec), argp+1)
 end
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
     c = Char(first(arg))
     w = textwidth(c)
@@ -336,7 +336,7 @@ end
 end
 
 # strings
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
     slen = textwidth(str)::Int + (hash ? arg isa AbstractString ? 2 : 1 : 0)
@@ -383,7 +383,7 @@ toint(x::Rational) = Integer(x)
 fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
     fmt(buf, pos, arg, floatfmt(spec))
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     bs = base(T)
@@ -497,7 +497,7 @@ _snprintf(ptr, siz, str, arg) =
 # seems like a dangerous thing to do.
 const __BIG_FLOAT_MAX__ = 8192
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     x = tofloat(arg)
@@ -654,7 +654,7 @@ const __BIG_FLOAT_MAX__ = 8192
         else
             # right aligned
             n = width - (newpos - pos)
-            if zero
+            if zero && isfinite(x)
                 ex = (arg < 0 || (plus | space)) + (T <: Union{Val{'a'}, Val{'A'}} ? 2 : 0)
                 so = pos + ex
                 len = (newpos - pos) - ex
@@ -931,7 +931,8 @@ for more details on C `printf` support.
 """
 function format end
 
-function format(io::IO, f::Format, args...) # => Nothing
+# Since it will specialize on `f`, which has a Tuple-type often of length(args), we might as well specialize on `args` too.
+function format(io::IO, f::Format, args::Vararg{Any,N}) where N # => Nothing
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
@@ -939,7 +940,7 @@ function format(io::IO, f::Format, args...) # => Nothing
     return
 end
 
-function format(f::Format, args...) # => String
+function format(f::Format, args::Vararg{Any,N}) where N # => String
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index f00d7abe362c5..abe547c00ed0d 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -116,12 +116,15 @@ end
     @test (Printf.@sprintf "%+f" Inf) == "+Inf"
     @test (Printf.@sprintf "% f" Inf) == " Inf"
     @test (Printf.@sprintf "% #f" Inf) == " Inf"
+    @test (Printf.@sprintf "%07f" Inf) == "    Inf"
     @test (Printf.@sprintf "%f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+f" -Inf) == "-Inf"
+    @test (Printf.@sprintf "%07f" -Inf) == "   -Inf"
     @test (Printf.@sprintf "%f" NaN) == "NaN"
     @test (Printf.@sprintf "%+f" NaN) == "+NaN"
     @test (Printf.@sprintf "% f" NaN) == " NaN"
     @test (Printf.@sprintf "% #f" NaN) == " NaN"
+    @test (Printf.@sprintf "%07f" NaN) == "    NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -169,12 +172,15 @@ end
     @test (Printf.@sprintf "%+e" Inf) == "+Inf"
     @test (Printf.@sprintf "% e" Inf) == " Inf"
     @test (Printf.@sprintf "% #e" Inf) == " Inf"
+    @test (Printf.@sprintf "%07e" Inf) == "    Inf"
     @test (Printf.@sprintf "%e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+e" -Inf) == "-Inf"
+    @test (Printf.@sprintf "%07e" -Inf) == "   -Inf"
     @test (Printf.@sprintf "%e" NaN) == "NaN"
     @test (Printf.@sprintf "%+e" NaN) == "+NaN"
     @test (Printf.@sprintf "% e" NaN) == " NaN"
     @test (Printf.@sprintf "% #e" NaN) == " NaN"
+    @test (Printf.@sprintf "%07e" NaN) == "    NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index ad0107ecf9404..13cd11f70d9b4 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -2,6 +2,12 @@ name = "Profile"
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 version = "1.11.0"
 
+[deps]
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+
+[compat]
+StyledStrings = "1.11.0"
+
 [extras]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index 59399b1f21bb3..9d0b18cb468ca 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -40,7 +40,7 @@ end
     Profile.Allocs.@profile [sample_rate=0.1] expr
 
 Profile allocations that happen during `expr`, returning
-both the result and and AllocResults struct.
+both the result and AllocResults struct.
 
 A sample rate of 1.0 will record everything; 0.0 will record nothing.
 
@@ -54,18 +54,17 @@ julia> last(sort(results.allocs, by=x->x.size))
 Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
 ```
 
-The best way to visualize these is currently with the
-[PProf.jl](https://github.com/JuliaPerf/PProf.jl) package,
-by invoking `PProf.Allocs.pprof`.
+See the profiling tutorial in the Julia documentation for more information.
 
-!!! note
-    The current implementation of the Allocations Profiler does not
-    capture types for all allocations. Allocations for which the profiler
-    could not capture the type are represented as having type
-    `Profile.Allocs.UnknownType`.
+!!! compat "Julia 1.11"
 
-    You can read more about the missing types and the plan to improve this, here:
-    <https://github.com/JuliaLang/julia/issues/43688>.
+    Older versions of Julia could not capture types in all cases. In older versions of
+    Julia, if you see an allocation of type `Profile.Allocs.UnknownType`, it means that
+    the profiler doesn't know what type of object was allocated. This mainly happened when
+    the allocation was coming from generated code produced by the compiler. See
+    [issue #43688](https://github.com/JuliaLang/julia/issues/43688) for more info.
+
+    Since Julia 1.11, all allocations should have a type reported.
 
 !!! compat "Julia 1.8"
     The allocation profiler was added in Julia 1.8.
@@ -322,7 +321,7 @@ end
 function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
     fmt.combine || error(ArgumentError("combine=false"))
     lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
-    filenamemap = Dict{Symbol,String}()
+    filenamemap = Profile.FileNameMap()
     if isempty(lilist)
         warning_empty()
         return true
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index e426884f058b9..c7ef1efb35945 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -23,8 +23,8 @@ Profiling support.
 module Profile
 
 global print
-public @profile,
-    clear,
+export @profile
+public clear,
     print,
     fetch,
     retrieve,
@@ -38,14 +38,14 @@ public @profile,
     Allocs
 
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
+import Base: AnnotatedString
+using StyledStrings: @styled_str
 
 const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
 
 # deprecated functions: use `getdict` instead
 lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
 
-export @profile
-
 """
     @profile
 
@@ -65,10 +65,10 @@ end
 
 # An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
 function _peek_report()
-    iob = IOBuffer()
+    iob = Base.AnnotatedIOBuffer()
     ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
     print(ioc, groupby = [:thread, :task])
-    Base.print(stderr, String(take!(iob)))
+    Base.print(stderr, read(seekstart(iob), AnnotatedString))
 end
 # This is a ref so that it can be overridden by other profile info consumers.
 const peek_report = Ref{Function}(_peek_report)
@@ -268,7 +268,7 @@ function print(io::IO,
         end
         any_nosamples = true
         if format === :tree
-            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
             Base.print(io, "=========================================================\n")
         end
         if groupby == [:task, :thread]
@@ -282,7 +282,7 @@ function print(io::IO,
                     nl = length(threadids) > 1 ? "\n" : ""
                     printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
                     for threadid in threadids
-                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
+                        printstyled(io, " Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
                         nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
@@ -298,7 +298,7 @@ function print(io::IO,
                     any_nosamples = true
                 else
                     nl = length(taskids) > 1 ? "\n" : ""
-                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid)))$nl"; bold=true, color=Base.info_color())
                     for taskid in taskids
                         printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
                         nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
@@ -322,7 +322,7 @@ function print(io::IO,
             threadids = intersect(get_thread_ids(data), threads)
             isempty(threadids) && (any_nosamples = true)
             for threadid in threadids
-                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
+                printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
                 nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
                 nosamples && (any_nosamples = true)
                 println(io)
@@ -503,12 +503,23 @@ function flatten(data::Vector, lidict::LineInfoDict)
     return (newdata, newdict)
 end
 
+const SRC_DIR = normpath(joinpath(Sys.BUILD_ROOT_PATH, "src"))
+
 # Take a file-system path and try to form a concise representation of it
 # based on the package ecosystem
-function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
+function short_path(spath::Symbol, filenamecache::Dict{Symbol, Tuple{String,String,String}})
     return get!(filenamecache, spath) do
-        path = string(spath)
-        if isabspath(path)
+        path = Base.fixup_stdlib_path(string(spath))
+        path_norm = normpath(path)
+        possible_base_path = normpath(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+        lib_dir = abspath(Sys.BINDIR, Base.LIBDIR)
+        if startswith(path_norm, SRC_DIR)
+            remainder = only(split(path_norm, SRC_DIR, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@juliasrc", remainder
+        elseif startswith(path_norm, lib_dir)
+            remainder = only(split(path_norm, lib_dir, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@julialib", remainder
+        elseif isabspath(path)
             if ispath(path)
                 # try to replace the file-system prefix with a short "@Module" one,
                 # assuming that profile came from the current machine
@@ -524,20 +535,21 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                             pkgid = Base.project_file_name_uuid(project_file, "")
                             isempty(pkgid.name) && return path # bad Project file
                             # return the joined the module name prefix and path suffix
-                            path = path[nextind(path, sizeof(root)):end]
-                            return string("@", pkgid.name, path)
+                            _short_path = path[nextind(path, sizeof(root)):end]
+                            return path, string("@", pkgid.name), _short_path
                         end
                     end
                 end
             end
-            return path
-        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+            return path, "", path
+        elseif isfile(possible_base_path)
             # do the same mechanic for Base (or Core/Compiler) files as above,
             # but they start from a relative path
-            return joinpath("@Base", normpath(path))
+            return possible_base_path, "@Base", normpath(path)
         else
             # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
-            return normpath(path) # drop leading "./"
+            path = normpath(path)
+            return "", "", path # drop leading "./"
         end
     end
 end
@@ -680,10 +692,10 @@ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
     !isempty(data) && has_meta(data) && error("input already has metadata")
     cpu_clock_cycle = UInt64(99)
     data_with_meta = similar(data, 0)
-    for i = 1:length(data)
+    for i in eachindex(data)
         val = data[i]
         if iszero(val)
-            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+            # META_OFFSET_THREADID, META_OFFSET_TASKID, META_OFFSET_CPUCYCLECLOCK, META_OFFSET_SLEEPSTATE
             push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
         else
             push!(data_with_meta, val)
@@ -758,6 +770,8 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     return (lilist, n, m, totalshots, nsleeping)
 end
 
+const FileNameMap = Dict{Symbol,Tuple{String,String,String}}
+
 function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
@@ -768,7 +782,7 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
         m = m[keep]
     end
     util_perc = (1 - (nsleeping / totalshots)) * 100
-    filenamemap = Dict{Symbol,String}()
+    filenamemap = FileNameMap()
     if isempty(lilist)
         if is_subsection
             Base.print(io, "Total snapshots: ")
@@ -790,9 +804,34 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
     return false
 end
 
+# make a terminal-clickable link to the file and linenum.
+# Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands
+function editor_link(path::String, linenum::Int)
+    editor = get(ENV, "JULIA_EDITOR", "")
+
+    if editor == "code"
+        return "vscode://file/$path:$linenum"
+    elseif editor == "subl" || editor == "sublime_text"
+        return "subl://$path:$linenum"
+    elseif editor == "idea" || occursin("idea", editor)
+        return "idea://open?file=$path&line=$linenum"
+    elseif editor == "pycharm"
+        return "pycharm://open?file=$path&line=$linenum"
+    elseif editor == "atom"
+        return "atom://core/open/file?filename=$path&line=$linenum"
+    elseif editor == "emacsclient"
+        return "emacs://open?file=$path&line=$linenum"
+    elseif editor == "vim" || editor == "nvim"
+        return "vim://open?file=$path&line=$linenum"
+    else
+        # TODO: convert the path to a generic URI (line numbers are not supported by generic URI)
+        return path
+    end
+end
+
 function print_flat(io::IO, lilist::Vector{StackFrame},
         n::Vector{Int}, m::Vector{Int},
-        cols::Int, filenamemap::Dict{Symbol,String},
+        cols::Int, filenamemap::FileNameMap,
         fmt::ProfileFormat)
     if fmt.sortedby === :count
         p = sortperm(n)
@@ -804,18 +843,18 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
     lilist = lilist[p]
     n = n[p]
     m = m[p]
-    filenames = String[short_path(li.file, filenamemap) for li in lilist]
+    pkgnames_filenames = Tuple{String,String,String}[short_path(li.file, filenamemap) for li in lilist]
     funcnames = String[string(li.func) for li in lilist]
     wcounts = max(6, ndigits(maximum(n)))
     wself = max(9, ndigits(maximum(m)))
     maxline = 1
     maxfile = 6
     maxfunc = 10
-    for i in 1:length(lilist)
+    for i in eachindex(lilist)
         li = lilist[i]
         maxline = max(maxline, li.line)
-        maxfunc = max(maxfunc, length(funcnames[i]))
-        maxfile = max(maxfile, length(filenames[i]))
+        maxfunc = max(maxfunc, textwidth(funcnames[i]))
+        maxfile = max(maxfile, sum(textwidth, pkgnames_filenames[i][2:3]) + 1)
     end
     wline = max(5, ndigits(maxline))
     ntext = max(20, cols - wcounts - wself - wline - 3)
@@ -831,7 +870,7 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
             rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
     println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
             rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
-    for i = 1:length(n)
+    for i in eachindex(n)
         n[i] < fmt.mincount && continue
         li = lilist[i]
         Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
@@ -843,16 +882,29 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
                 Base.print(io, "[any unknown stackframes]")
             end
         else
-            file = filenames[i]
+            path, pkgname, file = pkgnames_filenames[i]
             isempty(file) && (file = "[unknown file]")
-            Base.print(io, rpad(rtruncto(file, wfile), wfile, " "), " ")
+            pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+            Base.printstyled(io, pkgname, color=pkgcolor)
+            file_trunc = ltruncate(file, max(1, wfile))
+            wpad = wfile - textwidth(pkgname)
+            if !isempty(pkgname) && !startswith(file_trunc, "/")
+                Base.print(io, "/")
+                wpad -= 1
+            end
+            if isempty(path)
+                Base.print(io, rpad(file_trunc, wpad, " "))
+            else
+                link = editor_link(path, li.line)
+                Base.print(io, rpad(styled"{link=$link:$file_trunc}", wpad, " "))
+            end
             Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
             fname = funcnames[i]
             if !li.from_c && li.linfo !== nothing
                 fname = sprint(show_spec_linfo, li)
             end
             isempty(fname) && (fname = "[unknown function]")
-            Base.print(io, ltruncto(fname, wfunc))
+            Base.print(io, rtruncate(fname, wfunc))
         end
         println(io)
     end
@@ -891,21 +943,24 @@ function indent(depth::Int)
     return indent
 end
 
-function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::Dict{Symbol,String}, showpointer::Bool)
+# mimics Stacktraces
+const PACKAGE_FIXEDCOLORS = Dict{String, Any}("@Base" => :gray, "@Core" => :gray)
+
+function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::FileNameMap, showpointer::Bool)
     nindent = min(cols>>1, level)
     ndigoverhead = ndigits(maxes.overhead)
     ndigcounts = ndigits(maxes.count)
     ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
     ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
     widthfile = 2*ntext÷5 # min 12
-    strs = Vector{String}(undef, length(frames))
+    strs = Vector{AnnotatedString{String}}(undef, length(frames))
     showextra = false
     if level > nindent
         nextra = level - nindent
         nindent -= ndigits(nextra) + 2
         showextra = true
     end
-    for i = 1:length(frames)
+    for i in eachindex(frames)
         frame = frames[i]
         li = frame.frame
         stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
@@ -926,7 +981,7 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                 else
                     fname = string(li.func)
                 end
-                filename = short_path(li.file, filenamemap)
+                path, pkgname, filename = short_path(li.file, filenamemap)
                 if showpointer
                     fname = string(
                         "0x",
@@ -934,17 +989,26 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                         " ",
                         fname)
                 end
-                strs[i] = string(stroverhead, "╎", base, strcount, " ",
-                    rtruncto(filename, widthfile),
-                    ":",
-                    li.line == -1 ? "?" : string(li.line),
-                    "; ",
-                    fname)
+                pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+                remaining_path = ltruncate(filename, max(1, widthfile - textwidth(pkgname) - 1))
+                linenum = li.line == -1 ? "?" : string(li.line)
+                slash = (!isempty(pkgname) && !startswith(remaining_path, "/")) ? "/" : ""
+                styled_path = styled"{$pkgcolor:$pkgname}$slash$remaining_path:$linenum"
+                rich_file = if isempty(path)
+                    styled_path
+                else
+                    link = editor_link(path, li.line)
+                    styled"{link=$link:$styled_path}"
+                end
+                strs[i] = Base.annotatedstring(stroverhead, "╎", base, strcount, " ", rich_file, "  ", fname)
+                if frame.overhead > 0
+                    strs[i] = styled"{bold:$(strs[i])}"
+                end
             end
         else
             strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
         end
-        strs[i] = ltruncto(strs[i], cols)
+        strs[i] = rtruncate(strs[i], cols)
     end
     return strs
 end
@@ -1103,10 +1167,10 @@ end
 # avoid stack overflows.
 function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
-    filenamemap = Dict{Symbol,String}()
-    worklist = [(bt, 0, 0, "")]
+    filenamemap = FileNameMap()
+    worklist = [(bt, 0, 0, AnnotatedString(""))]
     if !is_subsection
-        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
         Base.print(io, "=========================================================\n")
     end
     while !isempty(worklist)
@@ -1137,7 +1201,7 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             count = down.count
             count < fmt.mincount && continue
             count < noisefloor && continue
-            str = strs[i]
+            str = strs[i]::AnnotatedString
             noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
@@ -1198,24 +1262,7 @@ function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
     return [(v[i], k[i]) for i in p]
 end
 
-# Utilities
-function rtruncto(str::String, w::Int)
-    if textwidth(str) <= w
-        return str
-    else
-        return string("…", str[prevind(str, end, w-2):end])
-    end
-end
-function ltruncto(str::String, w::Int)
-    if textwidth(str) <= w
-        return str
-    else
-        return string(str[1:nextind(str, 1, w-2)], "…")
-    end
-end
-
-
-truncto(str::Symbol, w::Int) = truncto(string(str), w)
+## Utilities
 
 # Order alphabetically (file, function) and then by line number
 function liperm(lilist::Vector{StackFrame})
@@ -1252,8 +1299,10 @@ end
 
 
 """
-    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
-    Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
+    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
+                               redact_data::Bool=true, streaming::Bool=false)
+    Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
+                               dir::String=nothing, streaming::Bool=false)
 
 Write a snapshot of the heap, in the JSON format expected by the Chrome
 Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
@@ -1264,6 +1313,8 @@ full file path, or IO stream.
 If `all_one` is true, then report the size of every object as one so they can be easily
 counted. Otherwise, report the actual size.
 
+If `redact_data` is true (default), then do not emit the contents of any object.
+
 If `streaming` is true, we will stream the snapshot data out into four files, using filepath
 as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
 used for any setting where your memory is constrained. These files can then be reassembled
@@ -1279,27 +1330,28 @@ backwards-compatibility) and your process is killed, note that this will always
 parts in the same directory as your provided filepath, so you can still reconstruct the
 snapshot after the fact, via `assemble_snapshot()`.
 """
-function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
+function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
     if streaming
-        _stream_heap_snapshot(filepath, all_one)
+        _stream_heap_snapshot(filepath, all_one, redact_data)
     else
         # Support the legacy, non-streaming mode, by first streaming the parts, then
         # reassembling it after we're done.
         prefix = filepath
-        _stream_heap_snapshot(prefix, all_one)
+        _stream_heap_snapshot(prefix, all_one, redact_data)
         Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
+        Profile.HeapSnapshot.cleanup_streamed_files(prefix)
     end
     return filepath
 end
-function take_heap_snapshot(io::IO, all_one::Bool=false)
+function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
     # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
     # then reassembling it after we're done.
     dir = tempdir()
     prefix = joinpath(dir, "snapshot")
-    _stream_heap_snapshot(prefix, all_one)
+    _stream_heap_snapshot(prefix, all_one, redact_data)
     Profile.HeapSnapshot.assemble_snapshot(prefix, io)
 end
-function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
+function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
     # Nodes and edges are binary files
     open("$prefix.nodes", "w") do nodes
         open("$prefix.edges", "w") do edges
@@ -1312,9 +1364,9 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
                     Base.@_lock_ios(json,
                         ccall(:jl_gc_take_heap_snapshot,
                             Cvoid,
-                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
+                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
                             nodes.handle, edges.handle, strings.handle, json.handle,
-                            Cchar(all_one))
+                            Cchar(all_one), Cchar(redact_data))
                     )
                     )
                     )
@@ -1324,7 +1376,7 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
         end
     end
 end
-function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
+function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
     fname = "$(getpid())_$(time_ns()).heapsnapshot"
     if isnothing(dir)
         wd = pwd()
@@ -1339,7 +1391,7 @@ function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing)
     else
         fpath = joinpath(expanduser(dir), fname)
     end
-    return take_heap_snapshot(fpath, all_one)
+    return take_heap_snapshot(fpath, all_one; kwargs...)
 end
 
 """
diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl
index b39f53a8bda03..2413ae538b8ac 100644
--- a/stdlib/Profile/src/heapsnapshot_reassemble.jl
+++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl
@@ -92,52 +92,55 @@ function assemble_snapshot(in_prefix, io::IO)
     node_count = parse(Int, String(@view preamble[pos:endpos]))
 
     pos = last(findnext("edge_count\":", preamble, endpos)) + 1
-    endpos = findnext(==('}'), preamble, pos) - 1
+    endpos = findnext(==(','), preamble, pos) - 1
     edge_count = parse(Int, String(@view preamble[pos:endpos]))
 
     nodes = Nodes(node_count, edge_count)
 
     orphans = Set{UInt}() # nodes that have no incoming edges
     # Parse nodes with empty edge counts that we need to fill later
-    nodes_file = open(string(in_prefix, ".nodes"), "r")
-    for i in 1:length(nodes)
-        node_type = read(nodes_file, Int8)
-        node_name_idx = read(nodes_file, UInt)
-        id = read(nodes_file, UInt)
-        self_size = read(nodes_file, Int)
-        @assert read(nodes_file, Int) == 0 # trace_node_id
-        @assert read(nodes_file, Int8) == 0 # detachedness
-
-        nodes.type[i] = node_type
-        nodes.name_idx[i] = node_name_idx
-        nodes.id[i] = id
-        nodes.self_size[i] = self_size
-        nodes.edge_count[i] = 0 # edge_count
-        # populate the orphans set with node index
-        push!(orphans, i-1)
+    open(string(in_prefix, ".nodes"), "r") do nodes_file
+        for i in 1:length(nodes)
+            node_type = read(nodes_file, Int8)
+            node_name_idx = read(nodes_file, UInt)
+            id = read(nodes_file, UInt)
+            self_size = read(nodes_file, Int)
+            @assert read(nodes_file, Int) == 0 # trace_node_id
+            @assert read(nodes_file, Int8) == 0 # detachedness
+
+            nodes.type[i] = node_type
+            nodes.name_idx[i] = node_name_idx
+            nodes.id[i] = id
+            nodes.self_size[i] = self_size
+            nodes.edge_count[i] = 0 # edge_count
+            # populate the orphans set with node index
+            push!(orphans, i-1)
+        end
     end
 
     # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets
-    edges_file = open(string(in_prefix, ".edges"), "r")
-    for i in 1:length(nodes.edges)
-        edge_type = read(edges_file, Int8)
-        edge_name_or_index = read(edges_file, UInt)
-        from_node = read(edges_file, UInt)
-        to_node = read(edges_file, UInt)
-
-        nodes.edges.type[i] = edge_type
-        nodes.edges.name_or_index[i] = edge_name_or_index
-        nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
-        nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing
-        push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
-        # remove the node from the orphans if it has at least one incoming edge
-        if to_node in orphans
-            delete!(orphans, to_node)
+    open(string(in_prefix, ".edges"), "r") do edges_file
+        for i in 1:length(nodes.edges)
+            edge_type = read(edges_file, Int8)
+            edge_name_or_index = read(edges_file, UInt)
+            from_node = read(edges_file, UInt)
+            to_node = read(edges_file, UInt)
+
+            nodes.edges.type[i] = edge_type
+            nodes.edges.name_or_index[i] = edge_name_or_index
+            nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
+            nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing
+            push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
+            # remove the node from the orphans if it has at least one incoming edge
+            if to_node in orphans
+                delete!(orphans, to_node)
+            end
         end
     end
 
     _digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
-    println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here
+    println(io, @view(preamble[1:end-1]), ",") # remove trailing "}" to reopen the object
+
     println(io, "\"nodes\":[")
     for i in 1:length(nodes)
         i > 1 && println(io, ",")
@@ -182,12 +185,11 @@ function assemble_snapshot(in_prefix, io::IO)
             str_bytes = read(strings_io, str_size)
             str = String(str_bytes)
             if first
-                print_str_escape_json(io, str)
                 first = false
             else
                 print(io, ",\n")
-                print_str_escape_json(io, str)
             end
+            print_str_escape_json(io, str)
         end
     end
     print(io, "]}")
@@ -202,6 +204,19 @@ function assemble_snapshot(in_prefix, io::IO)
     return nothing
 end
 
+"""
+    cleanup_streamed_files(prefix::AbstractString)
+
+Remove files streamed during `take_heap_snapshot` in streaming mode.
+"""
+function cleanup_streamed_files(prefix::AbstractString)
+    rm(string(prefix, ".metadata.json"))
+    rm(string(prefix, ".nodes"))
+    rm(string(prefix, ".edges"))
+    rm(string(prefix, ".strings"))
+    return nothing
+end
+
 function print_str_escape_json(stream::IO, s::AbstractString)
     print(stream, '"')
     for c in s
@@ -221,6 +236,9 @@ function print_str_escape_json(stream::IO, s::AbstractString)
             print(stream, "\\t")
         elseif '\x00' <= c <= '\x1f'
             print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))
+        elseif !isvalid(c)
+            # we have to do this because vscode's viewer doesn't like the replace character
+            print(stream, "[invalid unicode character]")
         else
             print(stream, c)
         end
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index cbfdde61d7054..1769cbd12da3e 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -168,12 +168,15 @@ let cmd = Base.julia_cmd()
         println("done")
         print(Profile.len_data())
         """
-    p = open(`$cmd -e $script`)
+    # use multiple threads here to ensure that profiling works with threading
+    p = open(`$cmd -t2 -e $script`)
     t = Timer(120) do t
         # should be under 10 seconds, so give it 2 minutes then report failure
         println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")
-        kill(p, Base.SIGTERM)
-        sleep(10)
+        kill(p, Base.SIGQUIT)
+        sleep(30)
+        kill(p, Base.SIGQUIT)
+        sleep(30)
         kill(p, Base.SIGKILL)
     end
     s = read(p, String)
@@ -202,8 +205,10 @@ if Sys.isbsd() || Sys.islinux()
             t = Timer(120) do t
                 # should be under 10 seconds, so give it 2 minutes then report failure
                 println("KILLING siginfo/sigusr1 test BY PROFILE TEST WATCHDOG\n")
-                kill(p, Base.SIGTERM)
-                sleep(10)
+                kill(p, Base.SIGQUIT)
+                sleep(30)
+                kill(p, Base.SIGQUIT)
+                sleep(30)
                 kill(p, Base.SIGKILL)
                 close(notify_exit)
             end
@@ -275,16 +280,31 @@ end
 
 @testset "HeapSnapshot" begin
     tmpdir = mktempdir()
+
+    # ensure that we can prevent redacting data
     fname = cd(tmpdir) do
-        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
     end
 
     @test isfile(fname)
 
-    open(fname) do fs
-        @test readline(fs) != ""
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test contains(sshot, "redact_this")
+
+    rm(fname)
+
+    # ensure that string data is redacted by default
+    fname = cd(tmpdir) do
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
     end
 
+    @test isfile(fname)
+
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test !contains(sshot, "redact_this")
+
     rm(fname)
     rm(tmpdir, force = true, recursive = true)
 end
diff --git a/stdlib/Project.toml b/stdlib/Project.toml
new file mode 100644
index 0000000000000..cc7ba99dd4e4f
--- /dev/null
+++ b/stdlib/Project.toml
@@ -0,0 +1,61 @@
+[deps]
+ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+LLD_jll = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+LLVMLibUnwind_jll = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+LibCURL = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+LibCURL_jll = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LibGit2_jll = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+LibUV_jll = "183b4373-6708-53ba-ad28-60e28bb38547"
+LibUnwind_jll = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+MPFR_jll = "3a97d323-0669-5f0c-9066-3539efd106a3"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
+MozillaCACerts_jll = "14a3606d-f60d-562e-9121-12d972cd8159"
+NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
+OpenLibm_jll = "05823500-19ac-5b8b-9628-191a04bc5112"
+PCRE2_jll = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+SuiteSparse_jll = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+dSFMT_jll = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
+nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/stdlib/REPL/Project.toml b/stdlib/REPL/Project.toml
index 6318bd0258ab3..f60a6a4766093 100644
--- a/stdlib/REPL/Project.toml
+++ b/stdlib/REPL/Project.toml
@@ -4,14 +4,16 @@ version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
 StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [extras]
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Random"]
+test = ["Logging", "Test", "Random"]
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index d2a17e3a6b4a3..6250fc84dc6b2 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -50,14 +50,16 @@ julia> ans
 
 In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting text
 that starts with `julia> ` into the REPL. In that case, only expressions starting with `julia> ` (as
-well as the other REPL mode prompts: `shell> `, `help?> `, `pkg>` ) are parsed, but others are
+well as the other REPL mode prompts: `shell> `, `help?> `, `pkg> ` ) are parsed, but others are
 removed. This makes it possible to paste a chunk of text that has been copied from a REPL session
 without having to scrub away prompts and outputs. This feature is enabled by default but can be
 disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. If it is enabled, you can try it
 out by pasting the code block above this paragraph straight into the REPL. This feature does not
 work on the standard Windows command prompt due to its limitation at detecting when a paste occurs.
 
-Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref).
+A non-[`nothing`](@ref) result of executing an expression is displayed by the REPL using the [`show`](@ref) function
+with a specific [`IOContext`](@ref) (via [`display`](@ref), which defaults to calling
+`show(io, MIME("text/plain"), ans)`, which in turn defaults to `show(io, ans)`).
 In particular, the `:limit` attribute is set to `true`.
 Other attributes can receive in certain `show` methods a default value if it's not already set,
 like `:compact`.
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index 3b63a256dafe2..5af03e0df9b6d 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -3,7 +3,7 @@
 module LineEdit
 
 import ..REPL
-using REPL: AbstractREPL, Options
+using ..REPL: AbstractREPL, Options
 
 using ..Terminals
 import ..Terminals: raw!, width, height, clear_line, beep
@@ -66,6 +66,7 @@ show(io::IO, x::Prompt) = show(io, string("Prompt(\"", prompt_string(x.prompt),
 mutable struct MIState
     interface::ModalInterface
     active_module::Module
+    previous_active_module::Module
     current_mode::TextInterface
     aborted::Bool
     mode_state::IdDict{TextInterface,ModeState}
@@ -75,9 +76,10 @@ mutable struct MIState
     key_repeats::Int
     last_action::Symbol
     current_action::Symbol
+    async_channel::Channel{Function}
 end
 
-MIState(i, mod, c, a, m) = MIState(i, mod, c, a, m, String[], 0, Char[], 0, :none, :none)
+MIState(i, mod, c, a, m) = MIState(i, mod, mod, c, a, m, String[], 0, Char[], 0, :none, :none, Channel{Function}())
 
 const BufferLike = Union{MIState,ModeState,IOBuffer}
 const State = Union{MIState,ModeState}
@@ -179,11 +181,11 @@ struct EmptyHistoryProvider <: HistoryProvider end
 
 reset_state(::EmptyHistoryProvider) = nothing
 
-complete_line(c::EmptyCompletionProvider, s) = String[], "", true
+complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = String[], "", true
 
 # complete_line can be specialized for only two arguments, when the active module
 # doesn't matter (e.g. Pkg does this)
-complete_line(c::CompletionProvider, s, ::Module) = complete_line(c, s)
+complete_line(c::CompletionProvider, s, ::Module; hint::Bool=false) = complete_line(c, s; hint)
 
 terminal(s::IO) = s
 terminal(s::PromptState) = s.terminal
@@ -380,7 +382,7 @@ function check_for_hint(s::MIState)
         # Requires making space for them earlier in refresh_multi_line
         return clear_hint(st)
     end
-    completions, partial, should_complete = complete_line(st.p.complete, st, s.active_module)::Tuple{Vector{String},String,Bool}
+    completions, partial, should_complete = complete_line(st.p.complete, st, s.active_module; hint = true)::Tuple{Vector{String},String,Bool}
     isempty(completions) && return clear_hint(st)
     # Don't complete for single chars, given e.g. `x` completes to `xor`
     if length(partial) > 1 && should_complete
@@ -416,8 +418,8 @@ function clear_hint(s::ModeState)
     end
 end
 
-function complete_line(s::PromptState, repeats::Int, mod::Module)
-    completions, partial, should_complete = complete_line(s.p.complete, s, mod)::Tuple{Vector{String},String,Bool}
+function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=false)
+    completions, partial, should_complete = complete_line(s.p.complete, s, mod; hint)::Tuple{Vector{String},String,Bool}
     isempty(completions) && return false
     if !should_complete
         # should_complete is false for cases where we only want to show
@@ -479,14 +481,12 @@ prompt_string(f::Function) = Base.invokelatest(f)
 function maybe_show_hint(s::PromptState)
     isa(s.hint, String) || return nothing
     # The hint being "" then nothing is used to first clear a previous hint, then skip printing the hint
-    # the clear line cannot be printed each time because it breaks column movement
     if isempty(s.hint)
-        print(terminal(s), "\e[0K") # clear remainder of line which had a hint
         s.hint = nothing
     else
         Base.printstyled(terminal(s), s.hint, color=:light_black)
         cmove_left(terminal(s), textwidth(s.hint))
-        s.hint = "" # being "" signals to do one clear line remainder to clear the hint next time if still empty
+        s.hint = "" # being "" signals to do one clear line remainder to clear the hint next time the screen is refreshed
     end
     return nothing
 end
@@ -496,8 +496,13 @@ function refresh_multi_line(s::PromptState; kw...)
         close(s.refresh_wait)
         s.refresh_wait = nothing
     end
+    if s.hint isa String
+        # clear remainder of line which is unknown here if it had a hint before unbeknownst to refresh_multi_line
+        # the clear line cannot be printed each time because it would break column movement
+        print(terminal(s), "\e[0K")
+    end
     r = refresh_multi_line(terminal(s), s; kw...)
-    maybe_show_hint(s)
+    maybe_show_hint(s) # now maybe write the hint back to the screen
     return r
 end
 refresh_multi_line(s::ModeState; kw...) = refresh_multi_line(terminal(s), s; kw...)
@@ -737,7 +742,26 @@ function edit_move_right(buf::IOBuffer)
     end
     return false
 end
-edit_move_right(s::PromptState) = edit_move_right(s.input_buffer) ? refresh_line(s) : false
+function edit_move_right(m::MIState)
+    s = state(m)
+    buf = s.input_buffer
+    if edit_move_right(s.input_buffer)
+        refresh_line(s)
+        return true
+    else
+        completions, partial, should_complete = complete_line(s.p.complete, s, m.active_module)
+        if should_complete && eof(buf) && length(completions) == 1 && length(partial) > 1
+            # Replace word by completion
+            prev_pos = position(s)
+            push_undo(s)
+            edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1])
+            refresh_line(state(s))
+            return true
+        else
+            return false
+        end
+    end
+end
 
 function edit_move_word_right(s::PromptState)
     if !eof(s.input_buffer)
@@ -1487,13 +1511,11 @@ end
 
 current_word_with_dots(s::MIState) = current_word_with_dots(buffer(s))
 
-previous_active_module::Module = Main
-
 function activate_module(s::MIState)
     word = current_word_with_dots(s);
     empty = isempty(word)
     mod = if empty
-        previous_active_module
+        s.previous_active_module
     else
         try
             Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
@@ -1509,7 +1531,7 @@ function activate_module(s::MIState)
     if Base.active_module() == Main || mod == Main
         # At least one needs to be Main. Disallows toggling between two non-Main modules because it's
         # otherwise hard to get back to Main
-        global previous_active_module = Base.active_module()
+        s.previous_active_module = Base.active_module()
     end
     REPL.activate(mod)
     edit_clear(s)
@@ -2146,8 +2168,8 @@ setmodifiers!(p::Prompt, m::Modifiers) = setmodifiers!(p.complete, m)
 setmodifiers!(c) = nothing
 
 # Search Mode completions
-function complete_line(s::SearchState, repeats, mod::Module)
-    completions, partial, should_complete = complete_line(s.histprompt.complete, s, mod)
+function complete_line(s::SearchState, repeats, mod::Module; hint::Bool=false)
+    completions, partial, should_complete = complete_line(s.histprompt.complete, s, mod; hint)
     # For now only allow exact completions in search mode
     if length(completions) == 1
         prev_pos = position(s)
@@ -2306,7 +2328,7 @@ keymap_data(state, ::Union{HistoryPrompt, PrefixHistoryPrompt}) = state
 
 Base.isempty(s::PromptState) = s.input_buffer.size == 0
 
-on_enter(s::PromptState) = s.p.on_enter(s)
+on_enter(s::MIState) = state(s).p.on_enter(s)
 
 move_input_start(s::BufferLike) = (seek(buffer(s), 0); nothing)
 move_input_end(buf::IOBuffer) = (seekend(buf); nothing)
@@ -2819,44 +2841,61 @@ keymap_data(ms::MIState, m::ModalInterface) = keymap_data(state(ms), mode(ms))
 
 function prompt!(term::TextTerminal, prompt::ModalInterface, s::MIState = init_state(term, prompt))
     Base.reseteof(term)
+    l = Base.ReentrantLock()
+    t1 = Threads.@spawn :interactive while true
+        wait(s.async_channel)
+        status = @lock l begin
+            fcn = take!(s.async_channel)
+            fcn(s)
+        end
+        status ∈ (:ok, :ignore) || break
+    end
     raw!(term, true)
     enable_bracketed_paste(term)
     try
         activate(prompt, s, term, term)
         old_state = mode(s)
-        while true
-            kmap = keymap(s, prompt)
-            fcn = match_input(kmap, s)
-            kdata = keymap_data(s, prompt)
-            s.current_action = :unknown # if the to-be-run action doesn't update this field,
-                                        # :unknown will be recorded in the last_action field
-            local status
-            # errors in keymaps shouldn't cause the REPL to fail, so wrap in a
-            # try/catch block
-            try
-                status = fcn(s, kdata)
-            catch e
-                @error "Error in the keymap" exception=e,catch_backtrace()
-                # try to cleanup and get `s` back to its original state before returning
-                transition(s, :reset)
-                transition(s, old_state)
-                status = :done
-            end
-            status !== :ignore && (s.last_action = s.current_action)
-            if status === :abort
-                s.aborted = true
-                return buffer(s), false, false
-            elseif status === :done
-                return buffer(s), true, false
-            elseif status === :suspend
-                if Sys.isunix()
-                    return buffer(s), true, true
+        # spawn this because the main repl task is sticky (due to use of @async and _wait2)
+        # and we want to not block typing when the repl task thread is busy
+        t2 = Threads.@spawn :interactive while true
+            eof(term) || peek(term) # wait before locking but don't consume
+            @lock l begin
+                kmap = keymap(s, prompt)
+                fcn = match_input(kmap, s)
+                kdata = keymap_data(s, prompt)
+                s.current_action = :unknown # if the to-be-run action doesn't update this field,
+                                            # :unknown will be recorded in the last_action field
+                local status
+                # errors in keymaps shouldn't cause the REPL to fail, so wrap in a
+                # try/catch block
+                try
+                    status = fcn(s, kdata)
+                catch e
+                    @error "Error in the keymap" exception=e,catch_backtrace()
+                    # try to cleanup and get `s` back to its original state before returning
+                    transition(s, :reset)
+                    transition(s, old_state)
+                    status = :done
+                end
+                status !== :ignore && (s.last_action = s.current_action)
+                if status === :abort
+                    s.aborted = true
+                    return buffer(s), false, false
+                elseif status === :done
+                    return buffer(s), true, false
+                elseif status === :suspend
+                    if Sys.isunix()
+                        return buffer(s), true, true
+                    end
+                else
+                    @assert status ∈ (:ok, :ignore)
                 end
-            else
-                @assert status ∈ (:ok, :ignore)
             end
         end
+        return fetch(t2)
     finally
+        put!(s.async_channel, Returns(:done))
+        wait(t1)
         raw!(term, false) && disable_bracketed_paste(term)
     end
     # unreachable
diff --git a/stdlib/REPL/src/Pkg_beforeload.jl b/stdlib/REPL/src/Pkg_beforeload.jl
new file mode 100644
index 0000000000000..ebd0cd255ce19
--- /dev/null
+++ b/stdlib/REPL/src/Pkg_beforeload.jl
@@ -0,0 +1,131 @@
+## Pkg stuff needed before Pkg has loaded
+
+const Pkg_pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg")
+const Pkg_REPLExt_pkgid = Base.PkgId(Base.UUID("ceef7b17-42e7-5b1c-81d4-4cc4a2494ccf"), "REPLExt")
+
+function load_pkg()
+    @lock Base.require_lock begin
+        REPLExt = Base.require_stdlib(Pkg_pkgid, "REPLExt")
+        # require_stdlib does not guarantee that the `__init__` of the package is done when loading is done async
+        # but we need to wait for the repl mode to be set up
+        lock = get(Base.package_locks, Pkg_REPLExt_pkgid.uuid, nothing)
+        lock !== nothing && wait(lock[2])
+        return REPLExt
+    end
+end
+
+## Below here copied/tweaked from Pkg Types.jl so that the dummy Pkg prompt
+# can populate the env correctly before Pkg loads
+
+function safe_realpath(path)
+    isempty(path) && return path
+    if ispath(path)
+        try
+            return realpath(path)
+        catch
+            return path
+        end
+    end
+    a, b = splitdir(path)
+    return joinpath(safe_realpath(a), b)
+end
+
+function find_project_file(env::Union{Nothing,String}=nothing)
+    project_file = nothing
+    if env isa Nothing
+        project_file = Base.active_project()
+        project_file === nothing && return nothing # in the Pkg version these are pkgerrors
+    elseif startswith(env, '@')
+        project_file = Base.load_path_expand(env)
+        project_file === nothing && return nothing
+    elseif env isa String
+        if isdir(env)
+            isempty(readdir(env)) || return nothing
+            project_file = joinpath(env, Base.project_names[end])
+        else
+            project_file = endswith(env, ".toml") ? abspath(env) :
+                abspath(env, Base.project_names[end])
+        end
+    end
+    @assert project_file isa String &&
+        (isfile(project_file) || !ispath(project_file) ||
+         isdir(project_file) && isempty(readdir(project_file)))
+    return safe_realpath(project_file)
+end
+
+function find_root_base_project(start_project::String)
+    project_file = start_project
+    while true
+        base_project_file = Base.base_project(project_file)
+        base_project_file === nothing && return project_file
+        project_file = base_project_file
+    end
+end
+
+function relative_project_path(project_file::String, path::String)
+    # compute path relative the project
+    # realpath needed to expand symlinks before taking the relative path
+    return relpath(safe_realpath(abspath(path)), safe_realpath(dirname(project_file)))
+end
+
+function projname(project_file::String)
+    if isfile(project_file)
+        name = try
+            # The `nothing` here means that this TOML parser does not return proper Dates.jl
+            # objects - but that's OK since we're just checking the name here.
+            p = Base.TOML.Parser{nothing}()
+            Base.TOML.reinit!(p, read(project_file, String); filepath=project_file)
+            proj = Base.TOML.parse(p)
+            get(proj, "name", nothing)
+        catch
+            nothing
+        end
+    else
+        name = nothing
+    end
+    if name === nothing
+        name = basename(dirname(project_file))
+    end
+    for depot in Base.DEPOT_PATH
+        envdir = joinpath(depot, "environments")
+        if startswith(abspath(project_file), abspath(envdir))
+            return "@" * name
+        end
+    end
+    return name
+end
+
+prev_project_file = nothing
+prev_project_timestamp = nothing
+prev_prefix = ""
+
+function Pkg_promptf()
+    global prev_project_timestamp, prev_prefix, prev_project_file
+    project_file = find_project_file()
+    prefix = ""
+    if project_file !== nothing
+        if prev_project_file == project_file && prev_project_timestamp == mtime(project_file)
+            prefix = prev_prefix
+        else
+            project_name = projname(project_file)
+            if project_name !== nothing
+                root = find_root_base_project(project_file)
+                rootname = projname(root)
+                if root !== project_file
+                    path_prefix = "/" * dirname(relative_project_path(root, project_file))
+                else
+                    path_prefix = ""
+                end
+                if textwidth(rootname) > 30
+                    rootname = first(rootname, 27) * "..."
+                end
+                prefix = "($(rootname)$(path_prefix)) "
+                prev_prefix = prefix
+                prev_project_timestamp = mtime(project_file)
+                prev_project_file = project_file
+            end
+        end
+    end
+    # Note no handling of Pkg.offline, as the Pkg version does here
+    return "$(prefix)$(PKG_PROMPT)"
+end
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index bb5915c192b6e..ddf2f55d0b9f7 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -33,35 +33,19 @@ function UndefVarError_hint(io::IO, ex::UndefVarError)
     if isdefined(ex, :scope)
         scope = ex.scope
         if scope isa Module
-            bnd = ccall(:jl_get_module_binding, Any, (Any, Any, Cint), scope, var, true)::Core.Binding
-            if isdefined(bnd, :owner)
-                owner = bnd.owner
-                if owner === bnd
-                    print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
-                end
+            bpart = Base.lookup_binding_partition(Base.get_world_counter(), GlobalRef(scope, var))
+            kind = Base.binding_kind(bpart)
+            if kind === Base.BINDING_KIND_GLOBAL || kind === Base.BINDING_KIND_CONST || kind == Base.BINDING_KIND_DECLARED
+                print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
+            elseif kind === Base.BINDING_KIND_FAILED
+                print(io, "\nHint: It looks like two or more modules export different ",
+                "bindings with this name, resulting in ambiguity. Try explicitly ",
+                "importing it from a particular module, or qualifying the name ",
+                "with the module it should come from.")
+            elseif kind === Base.BINDING_KIND_GUARD
+                print(io, "\nSuggestion: check for spelling errors or missing imports.")
             else
-                owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), scope, var)
-                if C_NULL == owner
-                    # No global of this name exists in this module.
-                    # This is the common case, so do not print that information.
-                    # It could be the binding was exported by two modules, which we can detect
-                    # by the `usingfailed` flag in the binding:
-                    if isdefined(bnd, :flags) && Bool(bnd.flags >> 4 & 1) # magic location of the `usingfailed` flag
-                        print(io, "\nHint: It looks like two or more modules export different ",
-                              "bindings with this name, resulting in ambiguity. Try explicitly ",
-                              "importing it from a particular module, or qualifying the name ",
-                              "with the module it should come from.")
-                    else
-                        print(io, "\nSuggestion: check for spelling errors or missing imports.")
-                    end
-                    owner = bnd
-                else
-                    owner = unsafe_pointer_to_objref(owner)::Core.Binding
-                end
-            end
-            if owner !== bnd
-                # this could use jl_binding_dbgmodule for the exported location in the message too
-                print(io, "\nSuggestion: this global was defined as `$(owner.globalref)` but not assigned a value.")
+                print(io, "\nSuggestion: this global was defined as `$(bpart.restriction.globalref)` but not assigned a value.")
             end
         elseif scope === :static_parameter
             print(io, "\nSuggestion: run Test.detect_unbound_args to detect method arguments that do not fully constrain a type parameter.")
@@ -101,6 +85,7 @@ function __init__()
 end
 
 using Base.Meta, Sockets, StyledStrings
+using JuliaSyntaxHighlighting
 import InteractiveUtils
 
 export
@@ -109,6 +94,8 @@ export
     LineEditREPL,
     StreamREPL
 
+public TerminalMenus
+
 import Base:
     AbstractDisplay,
     display,
@@ -127,7 +114,7 @@ include("options.jl")
 
 include("LineEdit.jl")
 using .LineEdit
-import ..LineEdit:
+import .LineEdit:
     CompletionProvider,
     HistoryProvider,
     add_history,
@@ -151,6 +138,8 @@ using .REPLCompletions
 include("TerminalMenus/TerminalMenus.jl")
 include("docview.jl")
 
+include("Pkg_beforeload.jl")
+
 @nospecialize # use only declared type signatures
 
 answer_color(::AbstractREPL) = ""
@@ -204,7 +193,129 @@ end
 # Temporary alias until Documenter updates
 const softscope! = softscope
 
-const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
+function print_qualified_access_warning(mod::Module, owner::Module, name::Symbol)
+    @warn string(name, " is defined in ", owner, " and is not public in ", mod) maxlog = 1 _id = string("repl-warning-", mod, "-", owner, "-", name) _line = nothing _file = nothing _module = nothing
+end
+
+function has_ancestor(query::Module, target::Module)
+    query == target && return true
+    while true
+        next = parentmodule(query)
+        next == target && return true
+        next == query && return false
+        query = next
+    end
+end
+
+retrieve_modules(::Module, ::Any) = (nothing,)
+function retrieve_modules(current_module::Module, mod_name::Symbol)
+    mod = try
+        getproperty(current_module, mod_name)
+    catch
+        return (nothing,)
+    end
+    return (mod isa Module ? mod : nothing,)
+end
+retrieve_modules(current_module::Module, mod_name::QuoteNode) = retrieve_modules(current_module, mod_name.value)
+function retrieve_modules(current_module::Module, mod_expr::Expr)
+    if Meta.isexpr(mod_expr, :., 2)
+        current_module = retrieve_modules(current_module, mod_expr.args[1])[1]
+        current_module === nothing && return (nothing,)
+        return (current_module, retrieve_modules(current_module, mod_expr.args[2])...)
+    else
+        return (nothing,)
+    end
+end
+
+add_locals!(locals, ast::Any) = nothing
+function add_locals!(locals, ast::Expr)
+    for arg in ast.args
+        add_locals!(locals, arg)
+    end
+    return nothing
+end
+function add_locals!(locals, ast::Symbol)
+    push!(locals, ast)
+    return nothing
+end
+
+function collect_names_to_warn!(warnings, locals, current_module::Module, ast)
+    ast isa Expr || return
+
+    # don't recurse through module definitions
+    ast.head === :module && return
+
+    if Meta.isexpr(ast, :., 2)
+        mod_name, name_being_accessed = ast.args
+        # retrieve the (possibly-nested) module being named here
+        mods = retrieve_modules(current_module, mod_name)
+        all(x -> x isa Module, mods) || return
+        outer_mod = first(mods)
+        mod = last(mods)
+        if name_being_accessed isa QuoteNode
+            name_being_accessed = name_being_accessed.value
+        end
+        name_being_accessed isa Symbol || return
+        owner = try
+            which(mod, name_being_accessed)
+        catch
+            return
+        end
+        # if `owner` is a submodule of `mod`, then don't warn. E.g. the name `parse` is present in the module `JSON`
+        # but is owned by `JSON.Parser`; we don't warn if it is accessed as `JSON.parse`.
+        has_ancestor(owner, mod) && return
+        # Don't warn if the name is public in the module we are accessing it
+        Base.ispublic(mod, name_being_accessed) && return
+        # Don't warn if accessing names defined in Core from Base if they are present in Base (e.g. `Base.throw`).
+        mod === Base && Base.ispublic(Core, name_being_accessed) && return
+        push!(warnings, (; outer_mod, mod, owner, name_being_accessed))
+        # no recursion
+        return
+    elseif Meta.isexpr(ast, :(=), 2)
+        lhs, rhs = ast.args
+        # any symbols we find on the LHS we will count as local. This can potentially be overzealous,
+        # but we want to avoid false positives (unnecessary warnings) more than false negatives.
+        add_locals!(locals, lhs)
+        # we'll recurse into the RHS only
+        return collect_names_to_warn!(warnings, locals, current_module, rhs)
+    elseif Meta.isexpr(ast, :function) && length(ast.args) >= 1
+
+        if Meta.isexpr(ast.args[1], :call, 2)
+            func_name, func_args = ast.args[1].args
+            # here we have a function definition and are inspecting it's arguments for local variables.
+            # we will error on the conservative side by adding all symbols we find (regardless if they are local variables or possibly-global default values)
+            add_locals!(locals, func_args)
+        end
+        # fall through to general recursion
+    end
+
+    for arg in ast.args
+        collect_names_to_warn!(warnings, locals, current_module, arg)
+    end
+
+    return nothing
+end
+
+function collect_qualified_access_warnings(current_mod, ast)
+    warnings = Set()
+    locals = Set{Symbol}()
+    collect_names_to_warn!(warnings, locals, current_mod, ast)
+    filter!(warnings) do (; outer_mod)
+        nameof(outer_mod) ∉ locals
+    end
+    return warnings
+end
+
+function warn_on_non_owning_accesses(current_mod, ast)
+    warnings = collect_qualified_access_warnings(current_mod, ast)
+    for (; outer_mod, mod, owner, name_being_accessed) in warnings
+        print_qualified_access_warning(mod, owner, name_being_accessed)
+    end
+    return ast
+end
+warn_on_non_owning_accesses(ast) = warn_on_non_owning_accesses(Base.active_module(), ast)
+
+const repl_ast_transforms = Any[softscope, warn_on_non_owning_accesses] # defaults for new REPL backends
 
 # Allows an external package to add hooks into the code loading.
 # The hook should take a Vector{Symbol} of package names and
@@ -212,6 +323,27 @@ const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
 # to e.g. install packages on demand
 const install_packages_hooks = Any[]
 
+# N.B.: Any functions starting with __repl_entry cut off backtraces when printing in the REPL.
+# We need to do this for both the actual eval and macroexpand, since the latter can cause custom macro
+# code to run (and error).
+__repl_entry_lower_with_loc(mod::Module, @nospecialize(ast), toplevel_file::Ref{Ptr{UInt8}}, toplevel_line::Ref{Cint}) =
+    ccall(:jl_expand_with_loc, Any, (Any, Any, Ptr{UInt8}, Cint), ast, mod, toplevel_file[], toplevel_line[])
+__repl_entry_eval_expanded_with_loc(mod::Module, @nospecialize(ast), toplevel_file::Ref{Ptr{UInt8}}, toplevel_line::Ref{Cint}) =
+    ccall(:jl_toplevel_eval_flex, Any, (Any, Any, Cint, Cint, Ptr{Ptr{UInt8}}, Ptr{Cint}), mod, ast, 1, 1, toplevel_file, toplevel_line)
+
+function toplevel_eval_with_hooks(mod::Module, @nospecialize(ast), toplevel_file=Ref{Ptr{UInt8}}(Base.unsafe_convert(Ptr{UInt8}, :REPL)), toplevel_line=Ref{Cint}(1))
+    if !isexpr(ast, :toplevel)
+        ast = __repl_entry_lower_with_loc(mod, ast, toplevel_file, toplevel_line)
+        check_for_missing_packages_and_run_hooks(ast)
+        return __repl_entry_eval_expanded_with_loc(mod, ast, toplevel_file, toplevel_line)
+    end
+    local value=nothing
+    for i = 1:length(ast.args)
+        value = toplevel_eval_with_hooks(mod, ast.args[i], toplevel_file, toplevel_line)
+    end
+    return value
+end
+
 function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
     lasterr = nothing
     Base.sigatomic_begin()
@@ -222,13 +354,10 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
                 put!(backend.response_channel, Pair{Any, Bool}(lasterr, true))
             else
                 backend.in_eval = true
-                if !isempty(install_packages_hooks)
-                    check_for_missing_packages_and_run_hooks(ast)
-                end
                 for xf in backend.ast_transforms
                     ast = Base.invokelatest(xf, ast)
                 end
-                value = Core.eval(mod, ast)
+                value = toplevel_eval_with_hooks(mod, ast)
                 backend.in_eval = false
                 setglobal!(Base.MainInclude, :ans, value)
                 put!(backend.response_channel, Pair{Any, Bool}(value, false))
@@ -251,33 +380,49 @@ function check_for_missing_packages_and_run_hooks(ast)
     mods = modules_to_be_loaded(ast)
     filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
     if !isempty(mods)
+        isempty(install_packages_hooks) && load_pkg()
         for f in install_packages_hooks
             Base.invokelatest(f, mods) && return
         end
     end
 end
 
-function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+function _modules_to_be_loaded!(ast::Expr, mods::Vector{Symbol})
     ast.head === :quote && return mods # don't search if it's not going to be run during this eval
     if ast.head === :using || ast.head === :import
         for arg in ast.args
             arg = arg::Expr
             arg1 = first(arg.args)
             if arg1 isa Symbol # i.e. `Foo`
-                if arg1 != :. # don't include local imports
+                if arg1 != :. # don't include local import `import .Foo`
                     push!(mods, arg1)
                 end
             else # i.e. `Foo: bar`
-                push!(mods, first((arg1::Expr).args))
+                sym = first((arg1::Expr).args)::Symbol
+                if sym != :. # don't include local import `import .Foo: a`
+                    push!(mods, sym)
+                end
             end
         end
     end
-    for arg in ast.args
-        if isexpr(arg, (:block, :if, :using, :import))
-            modules_to_be_loaded(arg, mods)
+    if ast.head !== :thunk
+        for arg in ast.args
+            if isexpr(arg, (:block, :if, :using, :import))
+                _modules_to_be_loaded!(arg, mods)
+            end
+        end
+    else
+        code = ast.args[1]
+        for arg in code.code
+            isa(arg, Expr) || continue
+            _modules_to_be_loaded!(arg, mods)
         end
     end
-    filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
+end
+
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+    _modules_to_be_loaded!(ast, mods)
+    filter!(mod::Symbol -> !in(mod, (:Base, :Main, :Core)), mods) # Exclude special non-package modules
     return unique(mods)
 end
 
@@ -336,7 +481,7 @@ end
 function display(d::REPLDisplay, mime::MIME"text/plain", x)
     x = Ref{Any}(x)
     with_repl_linfo(d.repl) do io
-        io = IOContext(io, :limit => true, :module => active_module(d)::Module)
+        io = IOContext(io, :limit => true, :module => Base.active_module(d)::Module)
         if d.repl isa LineEditREPL
             mistate = d.repl.mistate
             mode = LineEdit.mode(mistate)
@@ -349,17 +494,24 @@ function display(d::REPLDisplay, mime::MIME"text/plain", x)
             # this can override the :limit property set initially
             io = foldl(IOContext, d.repl.options.iocontext, init=io)
         end
-        show(io, mime, x[])
+        show_repl(io, mime, x[])
         println(io)
     end
     return nothing
 end
+
 display(d::REPLDisplay, x) = display(d, MIME("text/plain"), x)
 
+show_repl(io::IO, mime::MIME"text/plain", x) = show(io, mime, x)
+
+show_repl(io::IO, ::MIME"text/plain", ex::Expr) =
+    print(io, JuliaSyntaxHighlighting.highlight(
+        sprint(show, ex, context=IOContext(io, :color => false))))
+
 function print_response(repl::AbstractREPL, response, show_value::Bool, have_color::Bool)
     repl.waserror = response[2]
     with_repl_linfo(repl) do io
-        io = IOContext(io, :module => active_module(repl)::Module)
+        io = IOContext(io, :module => Base.active_module(repl)::Module)
         print_response(io, response, show_value, have_color, specialdisplay(repl))
     end
     return nothing
@@ -460,7 +612,7 @@ function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); ba
             Core.println(Core.stderr, e)
             Core.println(Core.stderr, catch_backtrace())
         end
-    get_module = () -> active_module(repl)
+    get_module = () -> Base.active_module(repl)
     if backend_on_current_task
         t = @async run_frontend(repl, backend_ref)
         errormonitor(t)
@@ -544,6 +696,7 @@ mutable struct LineEditREPL <: AbstractREPL
     answer_color::String
     shell_color::String
     help_color::String
+    pkg_color::String
     history_file::Bool
     in_shell::Bool
     in_help::Bool
@@ -556,13 +709,13 @@ mutable struct LineEditREPL <: AbstractREPL
     interface::ModalInterface
     backendref::REPLBackendRef
     frontend_task::Task
-    function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,in_help,envcolors)
+    function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,pkg_color,history_file,in_shell,in_help,envcolors)
         opts = Options()
         opts.hascolor = hascolor
         if !hascolor
             opts.beep_colors = [""]
         end
-        new(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,
+        new(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,pkg_color,history_file,in_shell,
             in_help,envcolors,false,nothing, opts, nothing, Tuple{String,Int}[])
     end
 end
@@ -579,6 +732,7 @@ LineEditREPL(t::TextTerminal, hascolor::Bool, envcolors::Bool=false) =
         hascolor ? Base.answer_color() : "",
         hascolor ? Base.text_colors[:red] : "",
         hascolor ? Base.text_colors[:yellow] : "",
+        hascolor ? Base.text_colors[:blue] : "",
         false, false, false, envcolors
     )
 
@@ -590,13 +744,9 @@ REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
-function active_module() # this method is also called from Base
-    isdefined(Base, :active_repl) || return Main
-    return active_module(Base.active_repl::AbstractREPL)
-end
-active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
-active_module(::AbstractREPL) = Main
-active_module(d::REPLDisplay) = active_module(d.repl)
+Base.active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
+Base.active_module(::AbstractREPL) = Main
+Base.active_module(d::REPLDisplay) = Base.active_module(d.repl)
 
 setmodifiers!(c::CompletionProvider, m::LineEdit.Modifiers) = nothing
 
@@ -618,27 +768,27 @@ end
 
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
 
-function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module)
+function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module; hint::Bool=false)
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift)
+    ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift, hint)
     c.modifiers = LineEdit.Modifiers()
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete
 end
 
-function complete_line(c::ShellCompletionProvider, s::PromptState)
+function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=false)
     # First parse everything up to the current position
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = shell_completions(full, lastindex(partial))
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    ret, range, should_complete = shell_completions(full, lastindex(partial), hint)
+    return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete
 end
 
-function complete_line(c::LatexCompletions, s)
+function complete_line(c::LatexCompletions, s; hint::Bool=false)
     partial = beforecursor(LineEdit.buffer(s))
     full = LineEdit.input_string(s)::String
-    ret, range, should_complete = bslash_completions(full, lastindex(partial))[2]
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    ret, range, should_complete = bslash_completions(full, lastindex(partial), hint)[2]
+    return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete
 end
 
 with_repl_linfo(f, repl) = f(outstream(repl))
@@ -962,7 +1112,7 @@ end
 
 find_hist_file() = get(ENV, "JULIA_HISTORY",
                        !isempty(DEPOT_PATH) ? joinpath(DEPOT_PATH[1], "logs", "repl_history.jl") :
-                       error("DEPOT_PATH is empty and and ENV[\"JULIA_HISTORY\"] not set."))
+                       error("DEPOT_PATH is empty and ENV[\"JULIA_HISTORY\"] not set."))
 
 backend(r::AbstractREPL) = r.backendref
 
@@ -1035,7 +1185,7 @@ enable_promptpaste(v::Bool) = JL_PROMPT_PASTE[] = v
 
 function contextual_prompt(repl::LineEditREPL, prompt::Union{String,Function})
     function ()
-        mod = active_module(repl)
+        mod = Base.active_module(repl)
         prefix = mod == Main ? "" : string('(', mod, ") ")
         pr = prompt isa String ? prompt : prompt()
         prefix * pr
@@ -1049,6 +1199,7 @@ setup_interface(
     extra_repl_keymap::Any = repl.options.extra_keymap
 ) = setup_interface(repl, hascolor, extra_repl_keymap)
 
+
 # This non keyword method can be precompiled which is important
 function setup_interface(
     repl::LineEditREPL,
@@ -1096,7 +1247,7 @@ function setup_interface(
         on_enter = return_callback)
 
     # Setup help mode
-    help_mode = Prompt(contextual_prompt(repl, "help?> "),
+    help_mode = Prompt(contextual_prompt(repl, HELP_PROMPT),
         prompt_prefix = hascolor ? repl.help_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
@@ -1124,6 +1275,34 @@ function setup_interface(
         end,
         sticky = true)
 
+    # Set up dummy Pkg mode that will be replaced once Pkg is loaded
+    # use 6 dots to occupy the same space as the most likely "@v1.xx" env name
+    dummy_pkg_mode = Prompt(Pkg_promptf,
+        prompt_prefix = hascolor ? repl.pkg_color : "",
+        prompt_suffix = hascolor ?
+        (repl.envcolors ? Base.input_color : repl.input_color) : "",
+        repl = repl,
+        complete = LineEdit.EmptyCompletionProvider(),
+        on_done = respond(line->nothing, repl, julia_prompt),
+        on_enter = function (s::MIState)
+                # This is hit when the user tries to execute a command before the real Pkg mode has been
+                # switched to. Ok to do this even if Pkg is loading on the other task because of the loading lock.
+                REPLExt = load_pkg()
+                if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
+                    for mode in repl.interface.modes
+                        if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
+                            # pkg mode
+                            buf = copy(LineEdit.buffer(s))
+                            transition(s, mode) do
+                                LineEdit.state(s, mode).input_buffer = buf
+                            end
+                        end
+                    end
+                end
+                return true
+            end,
+        sticky = true)
+
 
     ################################# Stage II #############################
 
@@ -1131,7 +1310,8 @@ function setup_interface(
     # We will have a unified history for all REPL modes
     hp = REPLHistoryProvider(Dict{Symbol,Prompt}(:julia => julia_prompt,
                                                  :shell => shell_mode,
-                                                 :help  => help_mode))
+                                                 :help  => help_mode,
+                                                 :pkg  => dummy_pkg_mode))
     if repl.history_file
         try
             hist_path = find_hist_file()
@@ -1154,6 +1334,7 @@ function setup_interface(
     julia_prompt.hist = hp
     shell_mode.hist = hp
     help_mode.hist = hp
+    dummy_pkg_mode.hist = hp
 
     julia_prompt.on_done = respond(x->Base.parse_input_line(x,filename=repl_filename(repl,hp)), repl, julia_prompt)
 
@@ -1163,8 +1344,8 @@ function setup_interface(
 
     shell_prompt_len = length(SHELL_PROMPT)
     help_prompt_len = length(HELP_PROMPT)
-    jl_prompt_regex = r"^In \[[0-9]+\]: |^(?:\(.+\) )?julia> "
-    pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
+    jl_prompt_regex = Regex("^In \\[[0-9]+\\]: |^(?:\\(.+\\) )?$JULIA_PROMPT")
+    pkg_prompt_regex = Regex("^(?:\\(.+\\) )?$PKG_PROMPT")
 
     # Canonicalize user keymap input
     if isa(extra_repl_keymap, Dict)
@@ -1194,27 +1375,39 @@ function setup_interface(
         end,
         ']' => function (s::MIState,o...)
             if isempty(s) || position(LineEdit.buffer(s)) == 0
-                pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg")
-                REPLExt = Base.require_stdlib(pkgid, "REPLExt")
-                pkg_mode = nothing
-                if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
-                    for mode in repl.interface.modes
-                        if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
-                            pkg_mode = mode
-                            break
-                        end
-                    end
+                buf = copy(LineEdit.buffer(s))
+                transition(s, dummy_pkg_mode) do
+                    LineEdit.state(s, dummy_pkg_mode).input_buffer = buf
                 end
-                # TODO: Cache the `pkg_mode`?
-                if pkg_mode !== nothing
-                    buf = copy(LineEdit.buffer(s))
-                    transition(s, pkg_mode) do
-                        LineEdit.state(s, pkg_mode).input_buffer = buf
+                # load Pkg on another thread if available so that typing in the dummy Pkg prompt
+                # isn't blocked, but instruct the main REPL task to do the transition via s.async_channel
+                t_replswitch = Threads.@spawn begin
+                    REPLExt = load_pkg()
+                    if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
+                        put!(s.async_channel,
+                            function (s::MIState)
+                                LineEdit.mode(s) === dummy_pkg_mode || return :ok
+                                for mode in repl.interface.modes
+                                    if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
+                                        buf = copy(LineEdit.buffer(s))
+                                        transition(s, mode) do
+                                            LineEdit.state(s, mode).input_buffer = buf
+                                        end
+                                        if !isempty(s) && @invokelatest(LineEdit.check_for_hint(s))
+                                            @invokelatest(LineEdit.refresh_line(s))
+                                        end
+                                        break
+                                    end
+                                end
+                                return :ok
+                            end
+                        )
                     end
-                    return
                 end
+                Base.errormonitor(t_replswitch)
+            else
+                edit_insert(s, ']')
             end
-            edit_insert(s, ']')
         end,
 
         # Bracketed Paste Mode
@@ -1348,7 +1541,7 @@ function setup_interface(
                     # execute the statement
                     terminal = LineEdit.terminal(s) # This is slightly ugly but ok for now
                     raw!(terminal, false) && disable_bracketed_paste(terminal)
-                    LineEdit.mode(s).on_done(s, LineEdit.buffer(s), true)
+                    @invokelatest LineEdit.mode(s).on_done(s, LineEdit.buffer(s), true)
                     raw!(terminal, true) && enable_bracketed_paste(terminal)
                     LineEdit.push_undo(s) # when the last line is incomplete
                 end
@@ -1394,9 +1587,9 @@ function setup_interface(
     b = Dict{Any,Any}[skeymap, mk, prefix_keymap, LineEdit.history_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
     prepend!(b, extra_repl_keymap)
 
-    shell_mode.keymap_dict = help_mode.keymap_dict = LineEdit.keymap(b)
+    shell_mode.keymap_dict = help_mode.keymap_dict = dummy_pkg_mode.keymap_dict = LineEdit.keymap(b)
 
-    allprompts = LineEdit.TextInterface[julia_prompt, shell_mode, help_mode, search_prompt, prefix_prompt]
+    allprompts = LineEdit.TextInterface[julia_prompt, shell_mode, help_mode, dummy_pkg_mode, search_prompt, prefix_prompt]
     return ModalInterface(allprompts)
 end
 
@@ -1566,7 +1759,7 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
         if have_color
             print(repl.stream,repl.prompt_color)
         end
-        print(repl.stream, "julia> ")
+        print(repl.stream, JULIA_PROMPT)
         if have_color
             print(repl.stream, input_color(repl))
         end
@@ -1590,7 +1783,7 @@ module Numbered
 
 using ..REPL
 
-__current_ast_transforms() = isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+__current_ast_transforms() = Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
 
 function repl_eval_counter(hp)
     return length(hp.history) - hp.start_idx
@@ -1652,14 +1845,13 @@ end
 
 function __current_ast_transforms(backend)
     if backend === nothing
-        isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+        Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
     else
         backend.ast_transforms
     end
 end
 
-
-function numbered_prompt!(repl::LineEditREPL=Base.active_repl, backend=nothing)
+function numbered_prompt!(repl::LineEditREPL=Base.active_repl::LineEditREPL, backend=nothing)
     n = Ref{Int}(0)
     set_prompt(repl, n)
     set_output_prefix(repl, n)
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 5ffa718f38604..77f7fdf15cc9c 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -135,62 +135,67 @@ function appendmacro!(syms, macros, needle, endchar)
     end
 end
 
-function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString, all::Bool = false, imported::Bool = false)
-    ssyms = names(mod, all = all, imported = imported)
-    all || filter!(Base.Fix1(Base.isexported, mod), ssyms)
+function append_filtered_mod_names!(ffunc::Function, suggestions::Vector{Completion},
+                                    mod::Module, name::String, complete_internal_only::Bool)
+    imported = usings = !complete_internal_only
+    ssyms = names(mod; all=true, imported, usings)
     filter!(ffunc, ssyms)
     macros = filter(x -> startswith(String(x), "@" * name), ssyms)
     syms = String[sprint((io,s)->Base.show_sym(io, s; allow_macroname=true), s) for s in ssyms if completes_global(String(s), name)]
     appendmacro!(syms, macros, "_str", "\"")
     appendmacro!(syms, macros, "_cmd", "`")
-    return [ModuleCompletion(mod, sym) for sym in syms]
+    for sym in syms
+        push!(suggestions, ModuleCompletion(mod, sym))
+    end
+    return suggestions
 end
 
 # REPL Symbol Completions
-function complete_symbol(@nospecialize(ex), name::String, @nospecialize(ffunc), context_module::Module=Main)
-    mod = context_module
-
-    lookup_module = true
-    t = Union{}
-    val = nothing
-    if ex !== nothing
-        res = repl_eval_ex(ex, context_module)
+function complete_symbol!(suggestions::Vector{Completion},
+                          @nospecialize(prefix), name::String, context_module::Module;
+                          complete_modules_only::Bool=false,
+                          shift::Bool=false)
+    local mod, t, val
+    complete_internal_only = false
+    if prefix !== nothing
+        res = repl_eval_ex(prefix, context_module)
         res === nothing && return Completion[]
         if res isa Const
             val = res.val
             if isa(val, Module)
                 mod = val
-                lookup_module = true
+                if !shift
+                    # when module is explicitly accessed, show internal bindings that are
+                    # defined by the module, unless shift key is pressed
+                    complete_internal_only = true
+                end
             else
-                lookup_module = false
                 t = typeof(val)
             end
         else
-            lookup_module = false
             t = CC.widenconst(res)
         end
+    else
+        mod = context_module
     end
 
-    suggestions = Completion[]
-    if lookup_module
-        # We will exclude the results that the user does not want, as well
-        # as excluding Main.Main.Main, etc., because that's most likely not what
-        # the user wants
-        p = let mod=mod, modname=nameof(mod)
-            (s::Symbol) -> !Base.isdeprecated(mod, s) && s != modname && ffunc(mod, s)::Bool && !(mod === Main && s === :MainInclude)
-        end
-        # Looking for a binding in a module
-        if mod == context_module
-            # Also look in modules we got through `using`
-            mods = ccall(:jl_module_usings, Any, (Any,), context_module)::Vector
-            for m in mods
-                append!(suggestions, filtered_mod_names(p, m::Module, name))
+    if @isdefined(mod) # lookup names available within the module
+        let modname = nameof(mod),
+            is_main = mod===Main
+            append_filtered_mod_names!(suggestions, mod, name, complete_internal_only) do s::Symbol
+                if Base.isdeprecated(mod, s)
+                    return false
+                elseif s === modname
+                    return false # exclude `Main.Main.Main`, etc.
+                elseif complete_modules_only && !completes_module(mod, s)
+                    return false
+                elseif is_main && s === :MainInclude
+                    return false
+                end
+                return true
             end
-            append!(suggestions, filtered_mod_names(p, mod, name, true, true))
-        else
-            append!(suggestions, filtered_mod_names(p, mod, name, true, false))
         end
-    elseif val !== nothing # looking for a property of an instance
+    elseif @isdefined(val) # looking for a property of an instance
         try
             for property in propertynames(val, false)
                 # TODO: support integer arguments (#36872)
@@ -200,13 +205,16 @@ function complete_symbol(@nospecialize(ex), name::String, @nospecialize(ffunc),
             end
         catch
         end
-    elseif field_completion_eligible(t)
+    elseif @isdefined(t) && field_completion_eligible(t)
         # Looking for a member of a type
         add_field_completions!(suggestions, name, t)
     end
     return suggestions
 end
 
+completes_module(mod::Module, x::Symbol) =
+    Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getglobal(mod, x), Module)
+
 function add_field_completions!(suggestions::Vector{Completion}, name::String, @nospecialize(t))
     if isa(t, Union)
         add_field_completions!(suggestions, name, t.a)
@@ -237,7 +245,7 @@ function field_completion_eligible(@nospecialize t)
     return match.method === GENERIC_PROPERTYNAMES_METHOD
 end
 
-function complete_from_list(T::Type, list::Vector{String}, s::Union{String,SubString{String}})
+function complete_from_list!(suggestions::Vector{Completion}, T::Type, list::Vector{String}, s::String)
     r = searchsorted(list, s)
     i = first(r)
     n = length(list)
@@ -245,7 +253,10 @@ function complete_from_list(T::Type, list::Vector{String}, s::Union{String,SubSt
         r = first(r):i
         i += 1
     end
-    Completion[T(kw) for kw in list[r]]
+    for kw in list[r]
+        push!(suggestions, T(kw))
+    end
+    return suggestions
 end
 
 const sorted_keywords = [
@@ -256,11 +267,13 @@ const sorted_keywords = [
     "primitive type", "quote", "return", "struct",
     "try", "using", "while"]
 
-complete_keyword(s::Union{String,SubString{String}}) = complete_from_list(KeywordCompletion, sorted_keywords, s)
+complete_keyword!(suggestions::Vector{Completion}, s::String) =
+    complete_from_list!(suggestions, KeywordCompletion, sorted_keywords, s)
 
 const sorted_keyvals = ["false", "true"]
 
-complete_keyval(s::Union{String,SubString{String}}) = complete_from_list(KeyvalCompletion, sorted_keyvals, s)
+complete_keyval!(suggestions::Vector{Completion}, s::String) =
+    complete_from_list!(suggestions, KeyvalCompletion, sorted_keyvals, s)
 
 function do_raw_escape(s)
     # escape_raw_string with delim='`' and ignoring the rule for the ending \
@@ -282,7 +295,10 @@ function maybe_spawn_cache_PATH()
     @lock PATH_cache_lock begin
         PATH_cache_task isa Task && !istaskdone(PATH_cache_task) && return
         time() < next_cache_update && return
-        PATH_cache_task = Threads.@spawn REPLCompletions.cache_PATH()
+        PATH_cache_task = Threads.@spawn begin
+            REPLCompletions.cache_PATH()
+            @lock PATH_cache_lock PATH_cache_task = nothing # release memory when done
+        end
         Base.errormonitor(PATH_cache_task)
     end
 end
@@ -367,7 +383,8 @@ function complete_path(path::AbstractString;
                        use_envpath=false,
                        shell_escape=false,
                        raw_escape=false,
-                       string_escape=false)
+                       string_escape=false,
+                       contract_user=false)
     @assert !(shell_escape && string_escape)
     if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
         # if the path is just "~", don't consider the expanded username as a prefix
@@ -413,7 +430,7 @@ function complete_path(path::AbstractString;
 
     matches = ((shell_escape ? do_shell_escape(s) : string_escape ? do_string_escape(s) : s) for s in matches)
     matches = ((raw_escape ? do_raw_escape(s) : s) for s in matches)
-    matches = Completion[PathCompletion(s) for s in matches]
+    matches = Completion[PathCompletion(contract_user ? contractuser(s) : s) for s in matches]
     return matches, dir, !isempty(matches)
 end
 
@@ -421,7 +438,8 @@ function complete_path(path::AbstractString,
                        pos::Int;
                        use_envpath=false,
                        shell_escape=false,
-                       string_escape=false)
+                       string_escape=false,
+                       contract_user=false)
     ## TODO: enable this depwarn once Pkg is fixed
     #Base.depwarn("complete_path with pos argument is deprecated because the return value [2] is incorrect to use", :complete_path)
     paths, dir, success = complete_path(path; use_envpath, shell_escape, string_escape)
@@ -538,8 +556,7 @@ struct REPLInterpreter <: CC.AbstractInterpreter
     function REPLInterpreter(limit_aggressive_inference::Bool=false;
                              world::UInt = Base.get_world_counter(),
                              inf_params::CC.InferenceParams = CC.InferenceParams(;
-                                 aggressive_constant_propagation=true,
-                                 unoptimize_throw_blocks=false),
+                                 aggressive_constant_propagation=true),
                              opt_params::CC.OptimizationParams = CC.OptimizationParams(),
                              inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[])
         return new(limit_aggressive_inference, world, inf_params, opt_params, inf_cache)
@@ -554,6 +571,9 @@ CC.cache_owner(::REPLInterpreter) = REPLCacheToken()
 # REPLInterpreter is only used for type analysis, so it should disable optimization entirely
 CC.may_optimize(::REPLInterpreter) = false
 
+# REPLInterpreter doesn't need any sources to be cached, so discard them aggressively
+CC.transform_result_for_cache(::REPLInterpreter, ::Core.MethodInstance, ::CC.WorldRange, ::CC.InferenceResult) = nothing
+
 # REPLInterpreter analyzes a top-level frame, so better to not bail out from it
 CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.InferenceState) = false
 
@@ -583,7 +603,7 @@ is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && sv.cache_mode
 
 function is_call_graph_uncached(sv::CC.InferenceState)
     CC.is_cached(sv) && return false
-    parent = sv.parent
+    parent = CC.frame_parent(sv)
     parent === nothing && return true
     return is_call_graph_uncached(parent::CC.InferenceState)
 end
@@ -606,7 +626,7 @@ function is_repl_frame_getproperty(sv::CC.InferenceState)
     def isa Method || return false
     def.name === :getproperty || return false
     CC.is_cached(sv) && return false
-    return is_repl_frame(sv.parent)
+    return is_repl_frame(CC.frame_parent(sv))
 end
 
 # aggressive global binding resolution for `getproperty(::Module, ::Symbol)` calls within `repl_frame`
@@ -687,13 +707,9 @@ function repl_eval_ex(@nospecialize(ex), context_module::Module; limit_aggressiv
     isexpr(lwr, :thunk) || return nothing # lowered to `Expr(:error, ...)` or similar
     src = lwr.args[1]::Core.CodeInfo
 
-    # construct top-level `MethodInstance`
-    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
-    mi.specTypes = Tuple{}
-
-    mi.def = context_module
     resolve_toplevel_symbols!(src, context_module)
-    @atomic mi.uninferred = src
+    # construct top-level `MethodInstance`
+    mi = ccall(:jl_method_instance_for_thunk, Ref{Core.MethodInstance}, (Any, Any), src, context_module)
 
     interp = REPLInterpreter(limit_aggressive_inference)
     result = CC.InferenceResult(mi)
@@ -881,17 +897,39 @@ const subscript_regex = Regex("^\\\\_[" * join(isdigit(k) || isletter(k) ? "$k"
 const superscripts = Dict(k[3]=>v[1] for (k,v) in latex_symbols if startswith(k, "\\^") && length(k)==3)
 const superscript_regex = Regex("^\\\\\\^[" * join(isdigit(k) || isletter(k) ? "$k" : "\\$k" for k in keys(superscripts)) * "]+\\z")
 
-# Aux function to detect whether we're right after a
-# using or import keyword
-function afterusing(string::String, startpos::Int)
-    (isempty(string) || startpos == 0) && return false
-    str = string[1:prevind(string,startpos)]
-    isempty(str) && return false
-    rstr = reverse(str)
-    r = findfirst(r"\s(gnisu|tropmi)\b", rstr)
-    r === nothing && return false
-    fr = reverseind(str, last(r))
-    return occursin(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end])
+# Aux function to detect whether we're right after a using or import keyword
+function get_import_mode(s::String)
+    # allow all of these to start with leading whitespace and macros like @eval and @eval(
+    # ^\s*(?:@\w+\s*(?:\(\s*)?)?
+
+    # match simple cases like `using |` and `import  |`
+    mod_import_match_simple = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s*$", s)
+    if mod_import_match_simple !== nothing
+        if mod_import_match_simple[1] == "using"
+            return :using_module
+        else
+            return :import_module
+        end
+    end
+    # match module import statements like `using Foo|`, `import Foo, Bar|` and `using Foo.Bar, Baz, |`
+    mod_import_match = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s+([\w\.]+(?:\s*,\s*[\w\.]+)*),?\s*$", s)
+    if mod_import_match !== nothing
+        if mod_import_match.captures[1] == "using"
+            return :using_module
+        else
+            return :import_module
+        end
+    end
+    # now match explicit name import statements like `using Foo: |` and `import Foo: bar, baz|`
+    name_import_match = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s+([\w\.]+)\s*:\s*([\w@!\s,]+)$", s)
+    if name_import_match !== nothing
+        if name_import_match[1] == "using"
+            return :using_name
+        else
+            return :import_name
+        end
+    end
+    return nothing
 end
 
 function close_path_completion(dir, paths, str, pos)
@@ -909,7 +947,7 @@ function close_path_completion(dir, paths, str, pos)
     return lastindex(str) <= pos || str[nextind(str, pos)] != '"'
 end
 
-function bslash_completions(string::String, pos::Int)
+function bslash_completions(string::String, pos::Int, hint::Bool=false)
     slashpos = something(findprev(isequal('\\'), string, pos), 0)
     if (something(findprev(in(bslash_separators), string, pos), 0) < slashpos &&
         !(1 < slashpos && (string[prevind(string, slashpos)]=='\\')))
@@ -1004,7 +1042,8 @@ function identify_possible_method_completion(partial, last_idx)
 end
 
 # Provide completion for keyword arguments in function calls
-function complete_keyword_argument(partial, last_idx, context_module)
+function complete_keyword_argument(partial::String, last_idx::Int, context_module::Module;
+                                   shift::Bool=false)
     frange, ex, wordrange, = identify_possible_method_completion(partial, last_idx)
     fail = Completion[], 0:-1, frange
     ex.head === :call || is_broadcasting_expr(ex) || return fail
@@ -1041,14 +1080,14 @@ function complete_keyword_argument(partial, last_idx, context_module)
 
     # Only add these if not in kwarg space. i.e. not in `foo(; `
     if kwargs_flag == 0
-        append!(suggestions, complete_symbol(nothing, last_word, Returns(true), context_module))
-        append!(suggestions, complete_keyval(last_word))
+        complete_symbol!(suggestions, #=prefix=#nothing, last_word, context_module; shift)
+        complete_keyval!(suggestions, last_word)
     end
 
     return sort!(suggestions, by=completion_text), wordrange
 end
 
-function project_deps_get_completion_candidates(pkgstarts::String, project_file::String)
+function get_loading_candidates(pkgstarts::String, project_file::String)
     loading_candidates = String[]
     d = Base.parsed_toml(project_file)
     pkg = get(d, "name", nothing)::Union{String, Nothing}
@@ -1061,23 +1100,31 @@ function project_deps_get_completion_candidates(pkgstarts::String, project_file:
             startswith(pkg, pkgstarts) && push!(loading_candidates, pkg)
         end
     end
-    return Completion[PackageCompletion(name) for name in loading_candidates]
+    return loading_candidates
+end
+
+function complete_loading_candidates!(suggestions::Vector{Completion}, pkgstarts::String, project_file::String)
+    for name in get_loading_candidates(pkgstarts, project_file)
+        push!(suggestions, PackageCompletion(name))
+    end
+    return suggestions
 end
 
-function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ffunc),
+function complete_identifiers!(suggestions::Vector{Completion},
                                context_module::Module, string::String, name::String,
-                               pos::Int, dotpos::Int, startpos::Int;
-                               comp_keywords=false)
-    ex = nothing
+                               pos::Int, separatorpos::Int, startpos::Int;
+                               comp_keywords::Bool=false,
+                               complete_modules_only::Bool=false,
+                               shift::Bool=false)
     if comp_keywords
-        append!(suggestions, complete_keyword(name))
-        append!(suggestions, complete_keyval(name))
+        complete_keyword!(suggestions, name)
+        complete_keyval!(suggestions, name)
     end
-    if dotpos > 1 && string[dotpos] == '.'
-        s = string[1:prevind(string, dotpos)]
+    if separatorpos > 1 && (string[separatorpos] == '.' || string[separatorpos] == ':')
+        s = string[1:prevind(string, separatorpos)]
         # First see if the whole string up to `pos` is a valid expression. If so, use it.
-        ex = Meta.parse(s, raise=false, depwarn=false)
-        if isexpr(ex, :incomplete)
+        prefix = Meta.parse(s, raise=false, depwarn=false)
+        if isexpr(prefix, :incomplete)
             s = string[startpos:pos]
             # Heuristic to find the start of the expression. TODO: This would be better
             # done with a proper error-recovering parser.
@@ -1109,22 +1156,22 @@ function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ff
             if something(findlast(in(non_identifier_chars), s), 0) < something(findlast(isequal('.'), s), 0)
                 lookup_name, name = rsplit(s, ".", limit=2)
                 name = String(name)
-                ex = Meta.parse(lookup_name, raise=false, depwarn=false)
+                prefix = Meta.parse(lookup_name, raise=false, depwarn=false)
             end
-            isexpr(ex, :incomplete) && (ex = nothing)
-        elseif isexpr(ex, (:using, :import))
-            arg1 = ex.args[1]
-            if isexpr(arg1, :.)
+            isexpr(prefix, :incomplete) && (prefix = nothing)
+        elseif isexpr(prefix, (:using, :import))
+            arglast = prefix.args[end] # focus on completion to the last argument
+            if isexpr(arglast, :.)
                 # We come here for cases like:
                 # - `string`: "using Mod1.Mod2.M"
                 # - `ex`: :(using Mod1.Mod2)
                 # - `name`: "M"
-                # Now we transform `ex` to `:(Mod1.Mod2)` to allow `complete_symbol` to
+                # Now we transform `ex` to `:(Mod1.Mod2)` to allow `complete_symbol!` to
                 # complete for inner modules whose name starts with `M`.
-                # Note that `ffunc` is set to `module_filter` within `completions`
-                ex = nothing
+                # Note that `complete_modules_only=true` is set within `completions`
+                prefix = nothing
                 firstdot = true
-                for arg = arg1.args
+                for arg = arglast.args
                     if arg === :.
                         # override `context_module` if multiple `.` accessors are used
                         if firstdot
@@ -1133,40 +1180,42 @@ function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ff
                             context_module = parentmodule(context_module)
                         end
                     elseif arg isa Symbol
-                        if ex === nothing
-                            ex = arg
+                        if prefix === nothing
+                            prefix = arg
                         else
-                            ex = Expr(:., ex, QuoteNode(arg))
+                            prefix = Expr(:., prefix, QuoteNode(arg))
                         end
                     else # invalid expression
-                        ex = nothing
+                        prefix = nothing
                         break
                     end
                 end
             end
-        elseif isexpr(ex, :call) && length(ex.args) > 1
+        elseif isexpr(prefix, :call) && length(prefix.args) > 1
             isinfix = s[end] != ')'
             # A complete call expression that does not finish with ')' is an infix call.
             if !isinfix
                 # Handle infix call argument completion of the form bar + foo(qux).
                 frange, end_of_identifier = find_start_brace(@view s[1:prevind(s, end)])
-                isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == ex.args[end]
+                isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == prefix.args[end]
             end
             if isinfix
-                ex = ex.args[end]
+                prefix = prefix.args[end]
             end
-        elseif isexpr(ex, :macrocall) && length(ex.args) > 1
+        elseif isexpr(prefix, :macrocall) && length(prefix.args) > 1
             # allow symbol completions within potentially incomplete macrocalls
             if s[end] ≠ '`' && s[end] ≠ ')'
-                ex = ex.args[end]
+                prefix = prefix.args[end]
             end
         end
+    else
+        prefix = nothing
     end
-    append!(suggestions, complete_symbol(ex, name, ffunc, context_module))
-    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
+    complete_symbol!(suggestions, prefix, name, context_module; complete_modules_only, shift)
+    return suggestions
 end
 
-function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true)
+function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true, hint::Bool=false)
     # First parse everything up to the current position
     partial = string[1:pos]
     inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false))
@@ -1209,7 +1258,6 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         length(matches)>0 && return Completion[DictCompletion(identifier, match) for match in sort!(matches)], loc::Int:pos, true
     end
 
-    ffunc = Returns(true)
     suggestions = Completion[]
 
     # Check if this is a var"" string macro that should be completed like
@@ -1219,14 +1267,19 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
     # its invocation.
     varrange = findprev("var\"", string, pos)
 
+    expanded = nothing
+    was_expanded = false
+
     if varrange !== nothing
         ok, ret = bslash_completions(string, pos)
         ok && return ret
         startpos = first(varrange) + 4
-        dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
+        separatorpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
         name = string[startpos:pos]
-        return complete_identifiers!(Completion[], ffunc, context_module, string, name, pos,
-                                     dotpos, startpos)
+        complete_identifiers!(suggestions, context_module, string, name,
+                              pos, separatorpos, startpos;
+                              shift)
+        return sort!(unique!(completion_text, suggestions), by=completion_text), (separatorpos+1):pos, true
     elseif inc_tag === :cmd
         # TODO: should this call shell_completions instead of partially reimplementing it?
         let m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) # fuzzy shell_parse in reverse
@@ -1235,7 +1288,13 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
             scs::String = string[r]
 
             expanded = complete_expanduser(scs, r)
-            expanded[3] && return expanded  # If user expansion available, return it
+            was_expanded = expanded[3]
+            if was_expanded
+                scs = (only(expanded[1])::PathCompletion).path
+                # If tab press, ispath and user expansion available, return it now
+                # otherwise see if we can complete the path further before returning with expanded ~
+                !hint && ispath(scs) && return expanded::Completions
+            end
 
             path::String = replace(scs, r"(\\+)\g1(\\?)`" => "\1\2`") # fuzzy unescape_raw_string: match an even number of \ before ` and replace with half as many
             # This expansion with "\\ "=>' ' replacement and shell_escape=true
@@ -1253,12 +1312,19 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
                         r = nextind(string, startpos + sizeof(dir)):pos
                     else
                         map!(paths, paths) do c::PathCompletion
-                            return PathCompletion(dir * "/" * c.path)
+                            p = dir * "/" * c.path
+                            was_expanded && (p = contractuser(p))
+                            return PathCompletion(p)
                         end
                     end
                 end
             end
-            return sort!(paths, by=p->p.path), r, success
+            if isempty(paths) && !hint && was_expanded
+                # if not able to provide completions, not hinting, and ~ expansion was possible, return ~ expansion
+                return expanded::Completions
+            else
+                return sort!(paths, by=p->p.path), r::UnitRange{Int}, success
+            end
         end
     elseif inc_tag === :string
         # Find first non-escaped quote
@@ -1268,7 +1334,13 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
             scs::String = string[r]
 
             expanded = complete_expanduser(scs, r)
-            expanded[3] && return expanded  # If user expansion available, return it
+            was_expanded = expanded[3]
+            if was_expanded
+                scs = (only(expanded[1])::PathCompletion).path
+                # If tab press, ispath and user expansion available, return it now
+                # otherwise see if we can complete the path further before returning with expanded ~
+                !hint && ispath(scs) && return expanded::Completions
+            end
 
             path = try
                 unescape_string(replace(scs, "\\\$"=>"\$"))
@@ -1280,7 +1352,9 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
                 paths, dir, success = complete_path(path::String, string_escape=true)
 
                 if close_path_completion(dir, paths, path, pos)
-                    paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"")
+                    p = (paths[1]::PathCompletion).path * "\""
+                    hint && was_expanded && (p = contractuser(p))
+                    paths[1] = PathCompletion(p)
                 end
 
                 if success && !isempty(dir)
@@ -1289,21 +1363,31 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
                         # otherwise make it the whole completion
                         if endswith(dir, "/") && startswith(scs, dir)
                             r = (startpos + sizeof(dir)):pos
-                        elseif startswith(scs, dir * "/")
+                        elseif startswith(scs, dir * "/") && dir != dirname(homedir())
+                            was_expanded && (dir = contractuser(dir))
                             r = nextind(string, startpos + sizeof(dir)):pos
                         else
                             map!(paths, paths) do c::PathCompletion
-                                return PathCompletion(dir * "/" * c.path)
+                                p = dir * "/" * c.path
+                                hint && was_expanded && (p = contractuser(p))
+                                return PathCompletion(p)
                             end
                         end
                     end
                 end
 
                 # Fallthrough allowed so that Latex symbols can be completed in strings
-                success && return sort!(paths, by=p->p.path), r, success
+                if success
+                    return sort!(paths, by=p->p.path), r::UnitRange{Int}, success
+                elseif !hint && was_expanded
+                    # if not able to provide completions, not hinting, and ~ expansion was possible, return ~ expansion
+                    return expanded::Completions
+                end
             end
         end
     end
+    # if path has ~ and we didn't find any paths to complete just return the expanded path
+    was_expanded && return expanded::Completions
 
     ok, ret = bslash_completions(string, pos)
     ok && return ret
@@ -1324,29 +1408,30 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
     end
 
     # Check whether we can complete a keyword argument in a function call
-    kwarg_completion, wordrange = complete_keyword_argument(partial, pos, context_module)
+    kwarg_completion, wordrange = complete_keyword_argument(partial, pos, context_module; shift)
     isempty(wordrange) || return kwarg_completion, wordrange, !isempty(kwarg_completion)
 
-    dotpos = something(findprev(isequal('.'), string, pos), 0)
     startpos = nextind(string, something(findprev(in(non_identifier_chars), string, pos), 0))
     # strip preceding ! operator
     if (m = match(r"\G\!+", partial, startpos)) isa RegexMatch
         startpos += length(m.match)
     end
 
-    name = string[max(startpos, dotpos+1):pos]
-    comp_keywords = !isempty(name) && startpos > dotpos
-    if afterusing(string, startpos)
-        # We're right after using or import. Let's look only for packages
-        # and modules we can reach from here
+    separatorpos = something(findprev(isequal('.'), string, pos), 0)
+    namepos = max(startpos, separatorpos+1)
+    name = string[namepos:pos]
+    import_mode = get_import_mode(string)
+    if import_mode === :using_module || import_mode === :import_module
+        # Given input lines like `using Foo|`, `import Foo, Bar|` and `using Foo.Bar, Baz, |`:
+        # Let's look only for packages and modules we can reach from here
 
         # If there's no dot, we're in toplevel, so we should
         # also search for packages
         s = string[startpos:pos]
-        if dotpos <= startpos
+        if separatorpos <= startpos
             for dir in Base.load_path()
                 if basename(dir) in Base.project_names && isfile(dir)
-                    append!(suggestions, project_deps_get_completion_candidates(s, dir))
+                    complete_loading_candidates!(suggestions, s, dir)
                 end
                 isdir(dir) || continue
                 for entry in _readdirx(dir)
@@ -1375,21 +1460,25 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
                 end
             end
         end
-        ffunc = module_filter
         comp_keywords = false
+        complete_modules_only = import_mode === :using_module # allow completion for `import Mod.name` (where `name` is not a module)
+    elseif import_mode === :using_name || import_mode === :import_name
+        # `using Foo: |` and `import Foo: bar, baz|`
+        separatorpos = findprev(isequal(':'), string, pos)::Int
+        comp_keywords = false
+        complete_modules_only = false
+    else
+        comp_keywords = !isempty(name) && startpos > separatorpos
+        complete_modules_only = false
     end
 
-    startpos == 0 && (pos = -1)
-    dotpos < startpos && (dotpos = startpos - 1)
-    return complete_identifiers!(suggestions, ffunc, context_module, string, name, pos,
-                                 dotpos, startpos;
-                                 comp_keywords)
+    complete_identifiers!(suggestions, context_module, string, name,
+                          pos, separatorpos, startpos;
+                          comp_keywords, complete_modules_only, shift)
+    return sort!(unique!(completion_text, suggestions), by=completion_text), namepos:pos, true
 end
 
-module_filter(mod::Module, x::Symbol) =
-    Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getglobal(mod, x), Module)
-
-function shell_completions(string, pos)
+function shell_completions(string, pos, hint::Bool=false)
     # First parse everything up to the current position
     scs = string[1:pos]
     args, last_arg_start = try
@@ -1407,7 +1496,7 @@ function shell_completions(string, pos)
     # If the last char was a space, but shell_parse ignored it search on "".
     if isexpr(lastarg, :incomplete) || isexpr(lastarg, :error)
         partial = string[last_arg_start:pos]
-        ret, range = completions(partial, lastindex(partial))
+        ret, range = completions(partial, lastindex(partial), Main, true, hint)
         range = range .+ (last_arg_start - 1)
         return ret, range, true
     elseif endswith(scs, ' ') && !endswith(scs, "\\ ")
@@ -1422,9 +1511,16 @@ function shell_completions(string, pos)
         # Also try looking into the env path if the user wants to complete the first argument
         use_envpath = length(args.args) < 2
 
-        # TODO: call complete_expanduser here?
+        expanded = complete_expanduser(path, r)
+        was_expanded = expanded[3]
+        if was_expanded
+            path = (only(expanded[1])::PathCompletion).path
+            # If tab press, ispath and user expansion available, return it now
+            # otherwise see if we can complete the path further before returning with expanded ~
+            !hint && ispath(path) && return expanded::Completions
+        end
 
-        paths, dir, success = complete_path(path, use_envpath=use_envpath, shell_escape=true)
+        paths, dir, success = complete_path(path, use_envpath=use_envpath, shell_escape=true, contract_user=was_expanded)
 
         if success && !isempty(dir)
             let dir = do_shell_escape(dir)
@@ -1442,7 +1538,14 @@ function shell_completions(string, pos)
                 end
             end
         end
-
+        # if ~ was expanded earlier and the incomplete string isn't a path
+        # return the path with contracted user to match what the hint shows. Otherwise expand ~
+        # i.e. require two tab presses to expand user
+        if was_expanded && !ispath(path)
+            map!(paths, paths) do c::PathCompletion
+                PathCompletion(contractuser(c.path))
+            end
+        end
         return paths, r, success
     end
     return Completion[], 0:-1, false
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index a1f94852b38ec..ddcfc111cf962 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -176,7 +176,7 @@ Returns `selected(m)`.
 !!! compat "Julia 1.6"
     The `cursor` argument requires Julia 1.6 or later.
 """
-request(m::AbstractMenu; kwargs...) = request(terminal, m; kwargs...)
+request(m::AbstractMenu; kwargs...) = request(default_terminal(), m; kwargs...)
 
 function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Union{Int, Base.RefValue{Int}}=1, suppress_output=false)
     if cursor isa Int
@@ -252,7 +252,7 @@ end
 
 Shorthand for `println(msg); request(m)`.
 """
-request(msg::AbstractString, m::AbstractMenu; kwargs...) = request(terminal, msg, m; kwargs...)
+request(msg::AbstractString, m::AbstractMenu; kwargs...) = request(default_terminal(), msg, m; kwargs...)
 
 function request(term::REPL.Terminals.TTYTerminal, msg::AbstractString, m::AbstractMenu; kwargs...)
     println(term.out_stream, msg)
diff --git a/stdlib/REPL/src/TerminalMenus/Pager.jl b/stdlib/REPL/src/TerminalMenus/Pager.jl
index c823a5dedd1ba..091f87801e7a4 100644
--- a/stdlib/REPL/src/TerminalMenus/Pager.jl
+++ b/stdlib/REPL/src/TerminalMenus/Pager.jl
@@ -39,4 +39,4 @@ function pager(terminal, object)
     pager = Pager(String(take!(buffer)); pagesize = div(lines, 2))
     return request(terminal, pager)
 end
-pager(object) = pager(terminal, object)
+pager(object) = pager(default_terminal(), object)
diff --git a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
index 87869e84d9838..f970cd9a289c2 100644
--- a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
+++ b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
@@ -1,14 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-module TerminalMenus
+"""
+    REPL.TerminalMenus
 
-terminal = nothing  # The user terminal
+A module that contains code for displaying text mode interactive menus.
+Key exported symbols include [`REPL.TerminalMenus.RadioMenu`](@ref) and
+[`REPL.TerminalMenus.MultiSelectMenu`](@ref).
+"""
+module TerminalMenus
 
-import REPL
+using ..REPL: REPL
 
-function __init__()
-    global terminal
-    terminal = REPL.Terminals.TTYTerminal(get(ENV, "TERM", Sys.iswindows() ? "" : "dumb"), stdin, stdout, stderr)
+function default_terminal(; in::IO=stdin, out::IO=stdout, err::IO=stderr)
+    return REPL.Terminals.TTYTerminal(
+        get(ENV, "TERM", Sys.iswindows() ? "" : "dumb"), in, out, err)
 end
 
 include("util.jl")
@@ -25,6 +30,9 @@ export
     Pager,
     request
 
+public Config, config, MultiSelectConfig
+public pick, cancel, writeline, options, numoptions, selected, header, keypress
+
 # TODO: remove in Julia 2.0
 # While not exported, AbstractMenu documented these as an extension interface
 @deprecate printMenu printmenu
diff --git a/stdlib/REPL/src/Terminals.jl b/stdlib/REPL/src/Terminals.jl
index 821ed224f1829..0cf6888d248e8 100644
--- a/stdlib/REPL/src/Terminals.jl
+++ b/stdlib/REPL/src/Terminals.jl
@@ -97,6 +97,7 @@ abstract type UnixTerminal <: TextTerminal end
 pipe_reader(t::UnixTerminal) = t.in_stream::IO
 pipe_writer(t::UnixTerminal) = t.out_stream::IO
 
+@nospecialize
 mutable struct TerminalBuffer <: UnixTerminal
     out_stream::IO
 end
@@ -107,6 +108,7 @@ mutable struct TTYTerminal <: UnixTerminal
     out_stream::IO
     err_stream::IO
 end
+@specialize
 
 const CSI = "\x1b["
 
@@ -118,10 +120,8 @@ cmove_line_up(t::UnixTerminal, n) = (cmove_up(t, n); cmove_col(t, 1))
 cmove_line_down(t::UnixTerminal, n) = (cmove_down(t, n); cmove_col(t, 1))
 cmove_col(t::UnixTerminal, n) = (write(t.out_stream, '\r'); n > 1 && cmove_right(t, n-1))
 
-const is_precompiling = Ref(false)
 if Sys.iswindows()
     function raw!(t::TTYTerminal,raw::Bool)
-        is_precompiling[] && return true
         check_open(t.in_stream)
         if Base.ispty(t.in_stream)
             run((raw ? `stty raw -echo onlcr -ocrnl opost` : `stty sane`),
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index feeeecbd97165..5086aa0c9485c 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -13,8 +13,6 @@ using Base: with_output_color, mapany, isdeprecated, isexported
 
 using Base.Filesystem: _readdirx
 
-import REPL
-
 using InteractiveUtils: subtypes
 
 using Unicode: normalize
@@ -25,7 +23,7 @@ using Unicode: normalize
 function helpmode(io::IO, line::AbstractString, mod::Module=Main)
     internal_accesses = Set{Pair{Module,Symbol}}()
     quote
-        docs = $REPL.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
+        docs = $Markdown.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
         $REPL.insert_internal_warning(docs, $internal_accesses)
     end
 end
@@ -78,20 +76,6 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main, internal_acce
 end
 _helpmode(line::AbstractString, mod::Module=Main) = _helpmode(stdout, line, mod)
 
-# Print horizontal lines between each docstring if there are multiple docs
-function insert_hlines(docs)
-    if !isa(docs, Markdown.MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
-        return docs
-    end
-    docs = docs::Markdown.MD
-    v = Any[]
-    for (n, doc) in enumerate(docs.content)
-        push!(v, doc)
-        n == length(docs.content) || push!(v, Markdown.HorizontalRule())
-    end
-    return Markdown.MD(v)
-end
-
 function formatdoc(d::DocStr)
     buffer = IOBuffer()
     for part in d.text
@@ -475,7 +459,7 @@ repl_corrections(s) = repl_corrections(stdout, s)
 # inverse of latex_symbols Dict, lazily created as needed
 const symbols_latex = Dict{String,String}()
 function symbol_latex(s::String)
-    if isempty(symbols_latex) && isassigned(Base.REPL_MODULE_REF)
+    if isempty(symbols_latex)
         for (k,v) in Iterators.flatten((REPLCompletions.latex_symbols,
                                         REPLCompletions.emoji_symbols))
             symbols_latex[v] = k
diff --git a/stdlib/REPL/src/emoji_symbols.jl b/stdlib/REPL/src/emoji_symbols.jl
index 49a55c97f6564..d6d4a03321d0a 100644
--- a/stdlib/REPL/src/emoji_symbols.jl
+++ b/stdlib/REPL/src/emoji_symbols.jl
@@ -27,6 +27,7 @@ result = mapfoldr(emoji_data, merge, [
     # overwrite the old with names that changed but still keep old ones that were removed
     "https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json",
     "https://raw.githubusercontent.com/iamcal/emoji-data/e512953312c012f6bd00e3f2ef6bf152ca3710f8/emoji_pretty.json",
+    "https://raw.githubusercontent.com/iamcal/emoji-data/a8174c74675355c8c6a9564516b2e961fe7257ef/emoji_pretty.json",
     ];
     init=Dict()
 )
@@ -132,6 +133,7 @@ const emoji_symbols = Dict(
     "\\:bath:" => "🛀",
     "\\:bathtub:" => "🛁",
     "\\:battery:" => "🔋",
+    "\\:beans:" => "🫘",
     "\\:bear:" => "🐻",
     "\\:bearded_person:" => "🧔",
     "\\:beaver:" => "🦫",
@@ -151,6 +153,7 @@ const emoji_symbols = Dict(
     "\\:bird:" => "🐦",
     "\\:birthday:" => "🎂",
     "\\:bison:" => "🦬",
+    "\\:biting_lip:" => "🫦",
     "\\:black_circle:" => "⚫",
     "\\:black_heart:" => "🖤",
     "\\:black_joker:" => "🃏",
@@ -198,6 +201,7 @@ const emoji_symbols = Dict(
     "\\:broom:" => "🧹",
     "\\:brown_heart:" => "🤎",
     "\\:bubble_tea:" => "🧋",
+    "\\:bubbles:" => "🫧",
     "\\:bucket:" => "🪣",
     "\\:bug:" => "🐛",
     "\\:bulb:" => "💡",
@@ -309,6 +313,7 @@ const emoji_symbols = Dict(
     "\\:cool:" => "🆒",
     "\\:cop:" => "👮",
     "\\:copyright:" => "©",
+    "\\:coral:" => "🪸",
     "\\:corn:" => "🌽",
     "\\:couple:" => "👫",
     "\\:couple_with_heart:" => "💑",
@@ -325,6 +330,7 @@ const emoji_symbols = Dict(
     "\\:crossed_fingers:" => "🤞",
     "\\:crossed_flags:" => "🎌",
     "\\:crown:" => "👑",
+    "\\:crutch:" => "🩼",
     "\\:cry:" => "😢",
     "\\:crying_cat_face:" => "😿",
     "\\:crystal_ball:" => "🔮",
@@ -367,7 +373,9 @@ const emoji_symbols = Dict(
     "\\:dollar:" => "💵",
     "\\:dolls:" => "🎎",
     "\\:dolphin:" => "🐬",
+    "\\:donkey:" => "🫏",
     "\\:door:" => "🚪",
+    "\\:dotted_line_face:" => "🫥",
     "\\:doughnut:" => "🍩",
     "\\:dragon:" => "🐉",
     "\\:dragon_face:" => "🐲",
@@ -397,6 +405,7 @@ const emoji_symbols = Dict(
     "\\:elevator:" => "🛗",
     "\\:elf:" => "🧝",
     "\\:email:" => "✉",
+    "\\:empty_nest:" => "🪹",
     "\\:end:" => "🔚",
     "\\:envelope_with_arrow:" => "📩",
     "\\:euro:" => "💶",
@@ -408,12 +417,16 @@ const emoji_symbols = Dict(
     "\\:expressionless:" => "😑",
     "\\:eyeglasses:" => "👓",
     "\\:eyes:" => "👀",
+    "\\:face_holding_back_tears:" => "🥹",
     "\\:face_palm:" => "🤦",
     "\\:face_vomiting:" => "🤮",
     "\\:face_with_cowboy_hat:" => "🤠",
+    "\\:face_with_diagonal_mouth:" => "🫤",
     "\\:face_with_hand_over_mouth:" => "🤭",
     "\\:face_with_head_bandage:" => "🤕",
     "\\:face_with_monocle:" => "🧐",
+    "\\:face_with_open_eyes_and_hand_over_mouth:" => "🫢",
+    "\\:face_with_peeking_eye:" => "🫣",
     "\\:face_with_raised_eyebrow:" => "🤨",
     "\\:face_with_rolling_eyes:" => "🙄",
     "\\:face_with_symbols_on_mouth:" => "🤬",
@@ -452,10 +465,12 @@ const emoji_symbols = Dict(
     "\\:floppy_disk:" => "💾",
     "\\:flower_playing_cards:" => "🎴",
     "\\:flushed:" => "😳",
+    "\\:flute:" => "🪈",
     "\\:fly:" => "🪰",
     "\\:flying_disc:" => "🥏",
     "\\:flying_saucer:" => "🛸",
     "\\:foggy:" => "🌁",
+    "\\:folding_hand_fan:" => "🪭",
     "\\:fondue:" => "🫕",
     "\\:foot:" => "🦶",
     "\\:football:" => "🏈",
@@ -482,6 +497,7 @@ const emoji_symbols = Dict(
     "\\:ghost:" => "👻",
     "\\:gift:" => "🎁",
     "\\:gift_heart:" => "💝",
+    "\\:ginger_root:" => "🫚",
     "\\:giraffe_face:" => "🦒",
     "\\:girl:" => "👧",
     "\\:glass_of_milk:" => "🥛",
@@ -491,6 +507,7 @@ const emoji_symbols = Dict(
     "\\:goat:" => "🐐",
     "\\:goggles:" => "🥽",
     "\\:golf:" => "⛳",
+    "\\:goose:" => "🪿",
     "\\:gorilla:" => "🦍",
     "\\:grapes:" => "🍇",
     "\\:green_apple:" => "🍏",
@@ -498,6 +515,7 @@ const emoji_symbols = Dict(
     "\\:green_heart:" => "💚",
     "\\:green_salad:" => "🥗",
     "\\:grey_exclamation:" => "❕",
+    "\\:grey_heart:" => "🩶",
     "\\:grey_question:" => "❔",
     "\\:grimacing:" => "😬",
     "\\:grin:" => "😁",
@@ -506,11 +524,14 @@ const emoji_symbols = Dict(
     "\\:guide_dog:" => "🦮",
     "\\:guitar:" => "🎸",
     "\\:gun:" => "🔫",
+    "\\:hair_pick:" => "🪮",
     "\\:haircut:" => "💇",
     "\\:hamburger:" => "🍔",
     "\\:hammer:" => "🔨",
+    "\\:hamsa:" => "🪬",
     "\\:hamster:" => "🐹",
     "\\:hand:" => "✋",
+    "\\:hand_with_index_finger_and_thumb_crossed:" => "🫰",
     "\\:handbag:" => "👜",
     "\\:handball:" => "🤾",
     "\\:handshake:" => "🤝",
@@ -524,12 +545,14 @@ const emoji_symbols = Dict(
     "\\:heart_decoration:" => "💟",
     "\\:heart_eyes:" => "😍",
     "\\:heart_eyes_cat:" => "😻",
+    "\\:heart_hands:" => "🫶",
     "\\:heartbeat:" => "💓",
     "\\:heartpulse:" => "💗",
     "\\:hearts:" => "♥",
     "\\:heavy_check_mark:" => "✔",
     "\\:heavy_division_sign:" => "➗",
     "\\:heavy_dollar_sign:" => "💲",
+    "\\:heavy_equals_sign:" => "🟰",
     "\\:heavy_minus_sign:" => "➖",
     "\\:heavy_multiplication_x:" => "✖",
     "\\:heavy_plus_sign:" => "➕",
@@ -559,16 +582,19 @@ const emoji_symbols = Dict(
     "\\:hugging_face:" => "🤗",
     "\\:hushed:" => "😯",
     "\\:hut:" => "🛖",
+    "\\:hyacinth:" => "🪻",
     "\\:i_love_you_hand_sign:" => "🤟",
     "\\:ice_cream:" => "🍨",
     "\\:ice_cube:" => "🧊",
     "\\:ice_hockey_stick_and_puck:" => "🏒",
     "\\:icecream:" => "🍦",
     "\\:id:" => "🆔",
+    "\\:identification_card:" => "🪪",
     "\\:ideograph_advantage:" => "🉐",
     "\\:imp:" => "👿",
     "\\:inbox_tray:" => "📥",
     "\\:incoming_envelope:" => "📨",
+    "\\:index_pointing_at_the_viewer:" => "🫵",
     "\\:information_desk_person:" => "💁",
     "\\:information_source:" => "ℹ",
     "\\:innocent:" => "😇",
@@ -580,7 +606,9 @@ const emoji_symbols = Dict(
     "\\:japanese_castle:" => "🏯",
     "\\:japanese_goblin:" => "👺",
     "\\:japanese_ogre:" => "👹",
+    "\\:jar:" => "🫙",
     "\\:jeans:" => "👖",
+    "\\:jellyfish:" => "🪼",
     "\\:jigsaw:" => "🧩",
     "\\:joy:" => "😂",
     "\\:joy_cat:" => "😹",
@@ -589,6 +617,7 @@ const emoji_symbols = Dict(
     "\\:kangaroo:" => "🦘",
     "\\:key:" => "🔑",
     "\\:keycap_ten:" => "🔟",
+    "\\:khanda:" => "🪯",
     "\\:kimono:" => "👘",
     "\\:kiss:" => "💋",
     "\\:kissing:" => "😗",
@@ -631,11 +660,14 @@ const emoji_symbols = Dict(
     "\\:left_luggage:" => "🛅",
     "\\:left_right_arrow:" => "↔",
     "\\:leftwards_arrow_with_hook:" => "↩",
+    "\\:leftwards_hand:" => "🫲",
+    "\\:leftwards_pushing_hand:" => "🫷",
     "\\:leg:" => "🦵",
     "\\:lemon:" => "🍋",
     "\\:leo:" => "♌",
     "\\:leopard:" => "🐆",
     "\\:libra:" => "♎",
+    "\\:light_blue_heart:" => "🩵",
     "\\:light_rail:" => "🚈",
     "\\:link:" => "🔗",
     "\\:lion_face:" => "🦁",
@@ -650,10 +682,12 @@ const emoji_symbols = Dict(
     "\\:long_drum:" => "🪘",
     "\\:loop:" => "➿",
     "\\:lotion_bottle:" => "🧴",
+    "\\:lotus:" => "🪷",
     "\\:loud_sound:" => "🔊",
     "\\:loudspeaker:" => "📢",
     "\\:love_hotel:" => "🏩",
     "\\:love_letter:" => "💌",
+    "\\:low_battery:" => "🪫",
     "\\:low_brightness:" => "🔅",
     "\\:luggage:" => "🧳",
     "\\:lungs:" => "🫁",
@@ -679,6 +713,7 @@ const emoji_symbols = Dict(
     "\\:mans_shoe:" => "👞",
     "\\:manual_wheelchair:" => "🦽",
     "\\:maple_leaf:" => "🍁",
+    "\\:maracas:" => "🪇",
     "\\:martial_arts_uniform:" => "🥋",
     "\\:mask:" => "😷",
     "\\:massage:" => "💆",
@@ -688,6 +723,7 @@ const emoji_symbols = Dict(
     "\\:mechanical_leg:" => "🦿",
     "\\:mega:" => "📣",
     "\\:melon:" => "🍈",
+    "\\:melting_face:" => "🫠",
     "\\:memo:" => "📝",
     "\\:menorah_with_nine_branches:" => "🕎",
     "\\:mens:" => "🚹",
@@ -702,6 +738,7 @@ const emoji_symbols = Dict(
     "\\:minibus:" => "🚐",
     "\\:minidisc:" => "💽",
     "\\:mirror:" => "🪞",
+    "\\:mirror_ball:" => "🪩",
     "\\:mobile_phone_off:" => "📴",
     "\\:money_mouth_face:" => "🤑",
     "\\:money_with_wings:" => "💸",
@@ -711,6 +748,7 @@ const emoji_symbols = Dict(
     "\\:monorail:" => "🚝",
     "\\:moon:" => "🌔",
     "\\:moon_cake:" => "🥮",
+    "\\:moose:" => "🫎",
     "\\:mortar_board:" => "🎓",
     "\\:mosque:" => "🕌",
     "\\:mosquito:" => "🦟",
@@ -739,6 +777,7 @@ const emoji_symbols = Dict(
     "\\:necktie:" => "👔",
     "\\:negative_squared_cross_mark:" => "❎",
     "\\:nerd_face:" => "🤓",
+    "\\:nest_with_eggs:" => "🪺",
     "\\:nesting_dolls:" => "🪆",
     "\\:neutral_face:" => "😐",
     "\\:new:" => "🆕",
@@ -800,7 +839,9 @@ const emoji_symbols = Dict(
     "\\:page_facing_up:" => "📄",
     "\\:page_with_curl:" => "📃",
     "\\:pager:" => "📟",
+    "\\:palm_down_hand:" => "🫳",
     "\\:palm_tree:" => "🌴",
+    "\\:palm_up_hand:" => "🫴",
     "\\:palms_up_together:" => "🤲",
     "\\:pancakes:" => "🥞",
     "\\:panda_face:" => "🐼",
@@ -812,6 +853,7 @@ const emoji_symbols = Dict(
     "\\:partly_sunny:" => "⛅",
     "\\:partying_face:" => "🥳",
     "\\:passport_control:" => "🛂",
+    "\\:pea_pod:" => "🫛",
     "\\:peach:" => "🍑",
     "\\:peacock:" => "🦚",
     "\\:peanuts:" => "🥜",
@@ -829,6 +871,7 @@ const emoji_symbols = Dict(
     "\\:person_in_steamy_room:" => "🧖",
     "\\:person_in_tuxedo:" => "🤵",
     "\\:person_with_blond_hair:" => "👱",
+    "\\:person_with_crown:" => "🫅",
     "\\:person_with_headscarf:" => "🧕",
     "\\:person_with_pouting_face:" => "🙎",
     "\\:petri_dish:" => "🧫",
@@ -843,10 +886,12 @@ const emoji_symbols = Dict(
     "\\:pinched_fingers:" => "🤌",
     "\\:pinching_hand:" => "🤏",
     "\\:pineapple:" => "🍍",
+    "\\:pink_heart:" => "🩷",
     "\\:pisces:" => "♓",
     "\\:pizza:" => "🍕",
     "\\:placard:" => "🪧",
     "\\:place_of_worship:" => "🛐",
+    "\\:playground_slide:" => "🛝",
     "\\:pleading_face:" => "🥺",
     "\\:plunger:" => "🪠",
     "\\:point_down:" => "👇",
@@ -866,9 +911,12 @@ const emoji_symbols = Dict(
     "\\:pouch:" => "👝",
     "\\:poultry_leg:" => "🍗",
     "\\:pound:" => "💷",
+    "\\:pouring_liquid:" => "🫗",
     "\\:pouting_cat:" => "😾",
     "\\:pray:" => "🙏",
     "\\:prayer_beads:" => "📿",
+    "\\:pregnant_man:" => "🫃",
+    "\\:pregnant_person:" => "🫄",
     "\\:pregnant_woman:" => "🤰",
     "\\:pretzel:" => "🥨",
     "\\:prince:" => "🤴",
@@ -914,7 +962,10 @@ const emoji_symbols = Dict(
     "\\:rice_cracker:" => "🍘",
     "\\:rice_scene:" => "🎑",
     "\\:right-facing_fist:" => "🤜",
+    "\\:rightwards_hand:" => "🫱",
+    "\\:rightwards_pushing_hand:" => "🫸",
     "\\:ring:" => "💍",
+    "\\:ring_buoy:" => "🛟",
     "\\:ringed_planet:" => "🪐",
     "\\:robot_face:" => "🤖",
     "\\:rock:" => "🪨",
@@ -937,6 +988,7 @@ const emoji_symbols = Dict(
     "\\:sagittarius:" => "♐",
     "\\:sake:" => "🍶",
     "\\:salt:" => "🧂",
+    "\\:saluting_face:" => "🫡",
     "\\:sandal:" => "👡",
     "\\:sandwich:" => "🥪",
     "\\:santa:" => "🎅",
@@ -964,6 +1016,7 @@ const emoji_symbols = Dict(
     "\\:seedling:" => "🌱",
     "\\:selfie:" => "🤳",
     "\\:sewing_needle:" => "🪡",
+    "\\:shaking_face:" => "🫨",
     "\\:shallow_pan_of_food:" => "🥘",
     "\\:shark:" => "🦈",
     "\\:shaved_ice:" => "🍧",
@@ -1124,6 +1177,7 @@ const emoji_symbols = Dict(
     "\\:triangular_ruler:" => "📐",
     "\\:trident:" => "🔱",
     "\\:triumph:" => "😤",
+    "\\:troll:" => "🧌",
     "\\:trolleybus:" => "🚎",
     "\\:trophy:" => "🏆",
     "\\:tropical_drink:" => "🍹",
@@ -1188,6 +1242,7 @@ const emoji_symbols = Dict(
     "\\:wedding:" => "💒",
     "\\:whale2:" => "🐋",
     "\\:whale:" => "🐳",
+    "\\:wheel:" => "🛞",
     "\\:wheelchair:" => "♿",
     "\\:white_check_mark:" => "✅",
     "\\:white_circle:" => "⚪",
@@ -1202,7 +1257,9 @@ const emoji_symbols = Dict(
     "\\:wind_chime:" => "🎐",
     "\\:window:" => "🪟",
     "\\:wine_glass:" => "🍷",
+    "\\:wing:" => "🪽",
     "\\:wink:" => "😉",
+    "\\:wireless:" => "🛜",
     "\\:wolf:" => "🐺",
     "\\:woman:" => "👩",
     "\\:womans_clothes:" => "👚",
@@ -1215,6 +1272,7 @@ const emoji_symbols = Dict(
     "\\:worried:" => "😟",
     "\\:wrench:" => "🔧",
     "\\:wrestlers:" => "🤼",
+    "\\:x-ray:" => "🩻",
     "\\:x:" => "❌",
     "\\:yarn:" => "🧶",
     "\\:yawning_face:" => "🥱",
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 00be62dbb170a..9f5b7e3e864ed 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -119,6 +119,46 @@ const latex_symbols = Dict(
     "\\euler" => "ℯ",
     "\\ohm" => "Ω",
 
+    # Music Symbols
+    # Music Symbols - Accidentals
+    "\\flatflat" => "𝄫",
+    "\\sharpsharp" => "𝄪",
+    # Music Symbols - Codas
+    "\\leftrepeatsign" => "𝄆",
+    "\\rightrepeatsign" => "𝄇",
+    "\\dalsegno" => "𝄉",
+    "\\dacapo" => "𝄊",
+    "\\segno" => "𝄋",
+    "\\coda" => "𝄌",
+    # Music Symbols - Clefs
+    "\\clefg" => "𝄞",
+    "\\clefg8va" => "𝄟",
+    "\\clefg8vb" => "𝄠",
+    "\\clefc" => "𝄡",
+    "\\cleff" => "𝄢",
+    "\\cleff8va" => "𝄣",
+    "\\cleff8vb" => "𝄤",
+     # Music Symbols - Rests
+    "\\restmulti" => "𝄺",
+    "\\restwhole" => "𝄻",
+    "\\resthalf" => "𝄼",
+    "\\restquarter" => "𝄽",
+    "\\rest8th" => "𝄾",
+    "\\rest16th" => "𝄿",
+    "\\rest32th" => "𝅀",
+    "\\rest64th" => "𝅁",
+    "\\rest128th" => "𝅂",
+    # Music Symbols - Notes
+    "\\notedoublewhole" => "𝅜",
+    "\\notewhole" => "𝅝",
+    "\\notehalf" => "𝅗𝅥",
+    "\\notequarter" => "𝅘𝅥",
+    "\\note8th" => "𝅘𝅥𝅮",
+    "\\note16th" => "𝅘𝅥𝅯",
+    "\\note32th" => "𝅘𝅥𝅰",
+    "\\note64th" => "𝅘𝅥𝅱",
+    "\\note128th" => "𝅘𝅥𝅲",
+
     # Superscripts
     "\\^0" => "⁰",
     "\\^1" => "¹",
@@ -207,6 +247,8 @@ const latex_symbols = Dict(
     "\\_+" => "₊",
     "\\_-" => "₋",
     "\\_=" => "₌",
+    "\\_<" => "˱",
+    "\\_>" => "˲",
     "\\_(" => "₍",
     "\\_)" => "₎",
     "\\_a" => "ₐ",
diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl
index 7299742eaef1c..82a1a0bb78ee8 100644
--- a/stdlib/REPL/src/precompile.jl
+++ b/stdlib/REPL/src/precompile.jl
@@ -1,15 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 module Precompile
-# Can't use this during incremental: `@eval Module() begin``
 
 import ..REPL
-# Prepare this staging area with all the loaded packages available
-for (_pkgid, _mod) in Base.loaded_modules
-    if !(_pkgid.name in ("Main", "Core", "Base", "REPL"))
-        eval(:(const $(Symbol(_mod)) = $_mod))
-    end
-end
 
 # Ugly hack for our cache file to not have a dependency edge on the FakePTYs file.
 Base._track_dependencies[] = false
@@ -19,92 +12,72 @@ try
 finally
     Base._track_dependencies[] = true
 end
-using Base.Meta
-
-import Markdown
-import StyledStrings
-
-## Debugging options
-# Disable parallel precompiles generation by setting `false`
-const PARALLEL_PRECOMPILATION = true
-
-# View the code sent to the repl by setting this to `stdout`
-const debug_output = devnull # or stdout
-
-CTRL_C = '\x03'
-CTRL_D = '\x04'
-CTRL_R = '\x12'
-UP_ARROW = "\e[A"
-DOWN_ARROW = "\e[B"
-
-repl_script = """
-2+2
-print("")
-printstyled("a", "b")
-display([1])
-display([1 2; 3 4])
-foo(x) = 1
-@time @eval foo(1)
-; pwd
-$CTRL_C
-$CTRL_R$CTRL_C#
-? reinterpret
-using Ra\t$CTRL_C
-\\alpha\t$CTRL_C
-\e[200~paste here ;)\e[201~"$CTRL_C
-$UP_ARROW$DOWN_ARROW$CTRL_C
-123\b\b\b$CTRL_C
-\b\b$CTRL_C
-f(x) = x03
-f(1,2)
-[][1]
-cd("complete_path\t\t$CTRL_C
-"""
-
-julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
-
-const JULIA_PROMPT = "julia> "
-const PKG_PROMPT = "pkg> "
-const SHELL_PROMPT = "shell> "
-const HELP_PROMPT = "help?> "
 
-blackhole = Sys.isunix() ? "/dev/null" : "nul"
-procenv = Dict{String,Any}(
-        "JULIA_HISTORY" => blackhole,
-        "JULIA_PROJECT" => nothing, # remove from environment
-        "JULIA_LOAD_PATH" => "@stdlib",
-        "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
-        "TERM" => "",
-        "JULIA_FALLBACK_REPL" => "0") # Turn REPL.jl on in subprocess
-
-generate_precompile_statements() = try
-    # Extract the precompile statements from the precompile file
-    statements_step = Channel{String}(Inf)
-
-    step = @async mktemp() do precompile_file, precompile_file_h
-        # Collect statements from running a REPL process and replaying our REPL script
-        touch(precompile_file)
-        pts, ptm = open_fake_pty()
-        # we don't want existing REPL caches to be used so ignore them
-        setup_cmd = """
-        push!(Base.ignore_compiled_cache, Base.PkgId(Base.UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL"))
-        import REPL
-        REPL.Terminals.is_precompiling[] = true
-        """
-        p = run(
-                addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file
-                    --cpu-target=native --startup-file=no --compiled-modules=existing
-                    --color=yes -i -e "$setup_cmd"```, procenv),
-                pts, pts, pts; wait=false
-            )
-        Base.close_stdio(pts)
-        # Prepare a background process to copy output from process until `pts` is closed
+let
+    ## Debugging options
+    # View the code sent to the repl by setting this to `stdout`
+    debug_output = devnull # or stdout
+
+    CTRL_C = '\x03'
+    CTRL_D = '\x04'
+    CTRL_R = '\x12'
+    UP_ARROW = "\e[A"
+    DOWN_ARROW = "\e[B"
+
+    repl_script = """
+    2+2
+    print("")
+    printstyled("a", "b")
+    display([1])
+    display([1 2; 3 4])
+    foo(x) = 1
+    @time @eval foo(1)
+    ; pwd
+    $CTRL_C
+    $CTRL_R$CTRL_C#
+    ? reinterpret
+    using Ra\t$CTRL_C
+    \\alpha\t$CTRL_C
+    \e[200~paste here ;)\e[201~"$CTRL_C
+    $UP_ARROW$DOWN_ARROW$CTRL_C
+    123\b\b\b$CTRL_C
+    \b\b$CTRL_C
+    f(x) = x03
+    f(1,2)
+    [][1]
+    Base.Iterators.minimum
+    cd("complete_path\t\t$CTRL_C
+    println("done")
+    """
+
+    JULIA_PROMPT = "julia> "
+    PKG_PROMPT = "pkg> "
+    SHELL_PROMPT = "shell> "
+    HELP_PROMPT = "help?> "
+
+    blackhole = Sys.isunix() ? "/dev/null" : "nul"
+
+    withenv("JULIA_HISTORY" => blackhole,
+            "JULIA_PROJECT" => nothing, # remove from environment
+            "JULIA_LOAD_PATH" => "@stdlib",
+            "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
+            "TERM" => "",
+            "JULIA_FALLBACK_REPL" => "0" # Make sure REPL.jl is turned on
+            ) do
+        rawpts, ptm = open_fake_pty()
+        pts = open(rawpts)::Base.TTY
+        if Sys.iswindows()
+            pts.ispty = false
+        else
+            # workaround libuv bug where it leaks pts
+            Base._fd(pts) == rawpts || Base.close_stdio(rawpts)
+        end
+        # Prepare a background process to copy output from `ptm` until `pts` is closed
         output_copy = Base.BufferStream()
         tee = @async try
             while !eof(ptm)
                 l = readavailable(ptm)
                 write(debug_output, l)
-                Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
                 write(output_copy, l)
             end
             write(debug_output, "\n#### EOF ####\n")
@@ -117,9 +90,31 @@ generate_precompile_statements() = try
             close(ptm)
         end
         Base.errormonitor(tee)
-        repl_inputter = @async begin
+        orig_stdin = stdin
+        orig_stdout = stdout
+        orig_stderr = stderr
+        repltask = @task try
+            Base.run_std_repl(REPL, false, :yes, true)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
+        end
+        Base.errormonitor(repltask)
+        try
+            Base.REPL_MODULE_REF[] = REPL
+            redirect_stdin(pts)
+            redirect_stdout(pts)
+            redirect_stderr(pts)
+            try
+                REPL.print_qualified_access_warning(Base.Iterators, Base, :minimum) # trigger the warning while stderr is suppressed
+            finally
+                redirect_stderr(isopen(orig_stderr) ? orig_stderr : devnull)
+            end
+            schedule(repltask)
             # wait for the definitive prompt before start writing to the TTY
             readuntil(output_copy, JULIA_PROMPT)
+            write(debug_output, "\n#### REPL STARTED ####\n")
             sleep(0.1)
             readavailable(output_copy)
             # Input our script
@@ -151,73 +146,21 @@ generate_precompile_statements() = try
             end
             write(debug_output, "\n#### COMPLETED - Closing REPL ####\n")
             write(ptm, "$CTRL_D")
-            wait(tee)
-            success(p) || Base.pipeline_error(p)
-            close(ptm)
-            write(debug_output, "\n#### FINISHED ####\n")
-        end
-        Base.errormonitor(repl_inputter)
-
-        n_step = 0
-        precompile_copy = Base.BufferStream()
-        buffer_reader = @async for statement in eachline(precompile_copy)
-            push!(statements_step, statement)
-            n_step += 1
-        end
-
-        open(precompile_file, "r") do io
-            while true
-                # We need to always call eof(io) for bytesavailable(io) to work
-                eof(io) && istaskdone(repl_inputter) && eof(io) && break
-                if bytesavailable(io) == 0
-                    sleep(0.1)
-                    continue
-                end
-                write(precompile_copy, readavailable(io))
-            end
-        end
-        close(precompile_copy)
-        wait(buffer_reader)
-        return :ok
-    end
-    !PARALLEL_PRECOMPILATION && wait(step)
-    bind(statements_step, step)
-
-    # Make statements unique
-    statements = Set{String}()
-    # Execute the precompile statements
-    for statement in statements_step
-        # Main should be completely clean
-        occursin("Main.", statement) && continue
-        Base.in!(statement, statements) && continue
-        try
-            ps = Meta.parse(statement)
-            if !isexpr(ps, :call)
-                # these are typically comments
-                @debug "skipping statement because it does not parse as an expression" statement
-                delete!(statements, statement)
-                continue
-            end
-            popfirst!(ps.args) # precompile(...)
-            ps.head = :tuple
-            # println(ps)
-            ps = eval(ps)
-            if !precompile(ps...)
-                @warn "Failed to precompile expression" form=statement _module=nothing _file=nothing _line=0
-            end
-        catch ex
-            # See #28808
-            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
+            wait(repltask)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
         end
+        wait(tee)
     end
-
-    fetch(step) == :ok || throw("Collecting precompiles failed: $(c.excp)")
-    return nothing
-finally
-    GC.gc(true); GC.gc(false); # reduce memory footprint
+    write(debug_output, "\n#### FINISHED ####\n")
+    nothing
 end
 
-generate_precompile_statements()
+precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Int})
+precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, String})
+precompile(Tuple{typeof(Base.:(==)), Char, String})
+precompile(Tuple{typeof(Base.reseteof), Base.TTY})
 
-precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
 end # Precompile
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index c594958a36670..9455632d9f418 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -17,9 +17,9 @@ function simulate_input(menu::TerminalMenus.AbstractMenu, keys...; kwargs...)
             write(new_stdin, "$key")
         end
     end
-    TerminalMenus.terminal.in_stream = new_stdin
+    terminal = TerminalMenus.default_terminal(; in=new_stdin, out=devnull)
 
-    return request(menu; suppress_output=true, kwargs...)
+    return request(terminal, menu; suppress_output=true, kwargs...)
 end
 
 include("radio_menu.jl")
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 123ff820bc939..6b374ed7f0149 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -28,13 +28,6 @@ end
     @test occursin("Couldn't find 'mutable s'", str)
 end
 
-@testset "Non-Markdown" begin
-    # https://github.com/JuliaLang/julia/issues/37765
-    @test isa(REPL.insert_hlines(Markdown.Text("foo")), Markdown.Text)
-    # https://github.com/JuliaLang/julia/issues/37757
-    @test REPL.insert_hlines(nothing) === nothing
-end
-
 @testset "Check @var_str also completes to var\"\" in REPL.doc_completions()" begin
     checks = ["var", "raw", "r"]
     symbols = "@" .* checks .* "_str"
diff --git a/stdlib/REPL/test/precompilation.jl b/stdlib/REPL/test/precompilation.jl
index 2dcf78c114d9a..7efcf0b5e8282 100644
--- a/stdlib/REPL/test/precompilation.jl
+++ b/stdlib/REPL/test/precompilation.jl
@@ -15,8 +15,11 @@ if !Sys.iswindows()
     @testset "No interactive startup compilation" begin
         f, _ = mktemp()
 
-        # start an interactive session
-        cmd = `$(Base.julia_cmd()[1]) --trace-compile=$f -q --startup-file=no -i`
+        # start an interactive session, ensuring `TERM` is unset since it can trigger
+        # different amounts of precompilation stemming from `base/terminfo.jl` depending
+        # on the value, making the test here unreliable
+        cmd = addenv(`$(Base.julia_cmd()[1]) --trace-compile=$f -q --startup-file=no -i`,
+                     Dict("TERM" => ""))
         pts, ptm = open_fake_pty()
         p = run(cmd, pts, pts, pts; wait=false)
         Base.close_stdio(pts)
@@ -27,7 +30,10 @@ if !Sys.iswindows()
         tracecompile_out = read(f, String)
         close(ptm) # close after reading so we don't get precompiles from error shutdown
 
-        expected_precompiles = 1
+        # given this test checks that startup is snappy, it's best to add workloads to
+        # contrib/generate_precompile.jl rather than increase this number. But if that's not
+        # possible, it'd be helpful to add a comment with the statement and a reason below
+        expected_precompiles = 0
 
         n_precompiles = count(r"precompile\(", tracecompile_out)
 
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 12f3f8956122e..f4d594b2a02e1 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -3,12 +3,13 @@
 using Test
 using REPL
 using Random
+using Logging
 import REPL.LineEdit
 using Markdown
 
 empty!(Base.Experimental._hint_handlers) # unregister error hints so they can be tested separately
 
-@test isassigned(Base.REPL_MODULE_REF)
+@test Base.REPL_MODULE_REF[] === REPL
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
@@ -114,7 +115,7 @@ fake_repl() do stdin_write, stdout_read, repl
     Base.wait(repltask)
 end
 
-# These are integration tests. If you want to unit test test e.g. completion, or
+# These are integration tests. If you want to unit test e.g. completion, or
 # exact LineEdit behavior, put them in the appropriate test files.
 # Furthermore since we are emulating an entire terminal, there may be control characters
 # in the mix. If verification needs to be done, keep it to the bare minimum. Basically
@@ -243,8 +244,9 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         @test occursin("shell> ", s) # check for the echo of the prompt
         @test occursin("'", s) # check for the echo of the input
         s = readuntil(stdout_read, "\n\n")
-        @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
-              startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
+        @test(startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
+            startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] "),
+            skip = Sys.iswindows() && Sys.WORD_SIZE == 32)
         write(stdin_write, "\b")
         wait(t)
     end
@@ -500,8 +502,9 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
         help_mode = repl.interface.modes[3]
-        histp = repl.interface.modes[4]
-        prefix_mode = repl.interface.modes[5]
+        pkg_mode = repl.interface.modes[4]
+        histp = repl.interface.modes[5]
+        prefix_mode = repl.interface.modes[6]
 
         hp = REPL.REPLHistoryProvider(Dict{Symbol,Any}(:julia => repl_mode,
                                                        :shell => shell_mode,
@@ -1214,9 +1217,9 @@ global some_undef_global
 @test occursin("does not exist", sprint(show, help_result("..")))
 # test that helpmode is sensitive to contextual module
 @test occursin("No documentation found", sprint(show, help_result("Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Base.Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Fix2", Base)))
 
 
@@ -1399,6 +1402,126 @@ end
     Base.wait(backend.backend_task)
 end
 
+# Mimic of JSON.jl's structure
+module JSON54872
+
+module Parser
+export parse
+function parse end
+end # Parser
+
+using .Parser: parse
+end # JSON54872
+
+# Test the public mechanism
+module JSON54872_public
+public tryparse
+end # JSON54872_public
+
+@testset "warn_on_non_owning_accesses AST transform" begin
+    @test REPL.has_ancestor(JSON54872.Parser, JSON54872)
+    @test !REPL.has_ancestor(JSON54872, JSON54872.Parser)
+
+    # JSON54872.Parser owns `parse`
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.Parser.parse
+    end)
+    @test isempty(warnings)
+
+    # A submodule of `JSON54872` owns `parse`
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.parse
+    end)
+    @test isempty(warnings)
+
+    # `JSON54872` does not own `tryparse` (nor is it public)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.tryparse
+    end)
+    @test length(warnings) == 1
+    @test only(warnings).owner == Base
+    @test only(warnings).name_being_accessed == :tryparse
+
+    # Same for nested access
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.Parser.tryparse
+    end)
+    @test length(warnings) == 1
+    @test only(warnings).owner == Base
+    @test only(warnings).name_being_accessed == :tryparse
+
+    test_logger = TestLogger()
+    with_logger(test_logger) do
+        REPL.warn_on_non_owning_accesses(@__MODULE__, :(JSON54872.tryparse))
+        REPL.warn_on_non_owning_accesses(@__MODULE__, :(JSON54872.tryparse))
+    end
+    # only 1 logging statement emitted thanks to `maxlog` mechanism
+    @test length(test_logger.logs) == 1
+    record = only(test_logger.logs)
+    @test record.level == Warn
+    @test record.message == "tryparse is defined in Base and is not public in $JSON54872"
+
+    # However JSON54872_public has `tryparse` declared public
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872_public.tryparse
+    end)
+    @test isempty(warnings)
+
+    # Now let us test some tricky cases
+    # No warning since `JSON54872` is local (LHS of `=`)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = (; tryparse=1)
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning for nested local access either
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = (; Parser = (; tryparse=1))
+            JSON54872.Parser.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (long-form function arg)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        function f(JSON54872=(; tryparse))
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (short-form function arg)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        f(JSON54872=(; tryparse)) = JSON54872.tryparse
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (long-form anonymous function)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        function (JSON54872=(; tryparse))
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (short-form anonymous function)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        (JSON54872 = (; tryparse)) -> begin
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # false-negative: missing warning
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = JSON54872
+            JSON54872.tryparse
+        end
+    end)
+    @test_broken !isempty(warnings)
+end
 
 backend = REPL.REPLBackend()
 frontend_task = @async begin
@@ -1477,9 +1600,43 @@ end
         @test isempty(mods)
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("begin using Foo; Core.eval(Main,\"using Foo\") end"))
         @test mods == [:Foo]
+
+        mods = REPL.modules_to_be_loaded(:(import .Foo: a))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(:(using .Foo: a))
+        @test isempty(mods)
     end
 end
 
+# Test that the REPL can find `using` statements inside macro expansions
+global packages_requested = Any[]
+old_hooks = copy(REPL.install_packages_hooks)
+empty!(REPL.install_packages_hooks)
+push!(REPL.install_packages_hooks, function(pkgs)
+    append!(packages_requested, pkgs)
+end)
+
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+
+    # Just consume all the output - we only test that the callback ran
+    read_resp_task = @async while !eof(stdout_read)
+        readavailable(stdout_read)
+    end
+
+    write(stdin_write, "macro usingfoo(); :(using FooNotFound); end\n")
+    write(stdin_write, "@usingfoo\n")
+    write(stdin_write, "\x4")
+    Base.wait(repltask)
+    close(stdin_write)
+    close(stdout_read)
+    Base.wait(read_resp_task)
+end
+@test packages_requested == Any[:FooNotFound]
+empty!(REPL.install_packages_hooks); append!(REPL.install_packages_hooks, old_hooks)
+
 # err should reprint error if deeper than top-level
 fake_repl() do stdin_write, stdout_read, repl
     repltask = @async begin
@@ -1494,12 +1651,12 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, "foobar\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
-    @test readline(stdout_read) == ""
+    @test readline(stdout_read) == "" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # check that top-level error did not change `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m"
+    @test readline(stdout_read) == "\e[0m" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # generate deeper error
     write(stdin_write, "foo() = foobar\n")
@@ -1559,8 +1716,9 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
         help_mode = repl.interface.modes[3]
-        histp = repl.interface.modes[4]
-        prefix_mode = repl.interface.modes[5]
+        pkg_mode = repl.interface.modes[4]
+        histp = repl.interface.modes[5]
+        prefix_mode = repl.interface.modes[6]
 
         hp = REPL.REPLHistoryProvider(Dict{Symbol,Any}(:julia => repl_mode,
                                                        :shell => shell_mode,
@@ -1805,3 +1963,25 @@ end
     @test_broken isempty(undoc)
     @test undoc == [:AbstractREPL, :BasicREPL, :LineEditREPL, :StreamREPL]
 end
+
+@testset "Dummy Pkg prompt" begin
+    # do this in an empty depot to test default for new users
+    withenv("JULIA_DEPOT_PATH" => mktempdir(), "JULIA_LOAD_PATH" => nothing) do
+        prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.Pkg_promptf())"`)
+        @test prompt == "(@v$(VERSION.major).$(VERSION.minor)) pkg> "
+    end
+
+    get_prompt(proj::String) = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no $(proj) -e "using REPL; print(REPL.Pkg_promptf())"`)
+
+    @test get_prompt("--project=$(pkgdir(REPL))") == "(REPL) pkg> "
+
+    tdir = mkpath(joinpath(mktempdir(), "foo"))
+    @test get_prompt("--project=$tdir") == "(foo) pkg> "
+
+    proj_file = joinpath(tdir, "Project.toml")
+    touch(proj_file) # make a bad Project.toml
+    @test get_prompt("--project=$proj_file") == "(foo) pkg> "
+
+    write(proj_file, "name = \"Bar\"\n")
+    @test get_prompt("--project=$proj_file") == "(Bar) pkg> "
+end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index 0a73a944ec8ea..3f8addcace73b 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -120,6 +120,7 @@ let ex = quote
         kwtest5(a::Char, b; xyz) = pass
 
         const named = (; len2=3)
+        const fmsoebelkv = (; len2=3)
 
         array = [1, 1]
         varfloat = 0.1
@@ -163,7 +164,8 @@ end
 test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s))))
 test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s, lastindex(s))))
 test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2])
-test_complete_context(s, m=@__MODULE__) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
+test_complete_context(s, m=@__MODULE__; shift::Bool=true) =
+    map_completion_text(@inferred(completions(s,lastindex(s), m, shift)))
 test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
 test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false)))
 
@@ -2129,7 +2131,7 @@ end
 end
 
 # issue #51194
-for (s, compl) in (("2*CompletionFoo.nam", "named"),
+for (s, compl) in (("2*CompletionFoo.fmsoe", "fmsoebelkv"),
                    (":a isa CompletionFoo.test!1", "test!12"),
                    ("-CompletionFoo.Test_y(3).", "yy"),
                    ("99 ⨷⁻ᵨ⁷ CompletionFoo.type_test.", "xx"),
@@ -2138,8 +2140,11 @@ for (s, compl) in (("2*CompletionFoo.nam", "named"),
                    ("CompletionFoo.type_test + CompletionFoo.unicode_αβγ.", "yy"),
                    ("(CompletionFoo.type_test + CompletionFoo.unicode_αβγ).", "xx"),
                    ("foo'CompletionFoo.test!1", "test!12"))
-    c, r = test_complete(s)
-    @test only(c) == compl
+    @testset let s=s, compl=compl
+        c, r = test_complete_noshift(s)
+        @test length(c) == 1
+        @test only(c) == compl
+    end
 end
 
 # allows symbol completion within incomplete :macrocall
@@ -2215,6 +2220,12 @@ let s = "using Base."
     @test res
     @test "BinaryPlatforms" in c
 end
+# JuliaLang/julia#53999
+let s = "using Base.Sort, Base.Th"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Threads" in c
+end
 # test cases with the `.` accessor
 module Issue52922
 module Inner1
@@ -2227,6 +2238,26 @@ let s = "using .Iss"
     @test res
     @test "Issue52922" in c
 end
+let s = " using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = " @time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time(using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
 let s = "using .Issue52922.Inn"
     c, r, res = test_complete_context(s)
     @test res
@@ -2260,3 +2291,128 @@ let s = "Issue53126()."
     @test res
     @test isempty(c)
 end
+
+# complete explicitly `using`ed names
+baremodule TestExplicitUsing
+using Base: @assume_effects
+end # baremodule TestExplicitUsing
+let s = "@assu"
+    c, r, res = test_complete_context(s, TestExplicitUsing)
+    @test res
+    @test "@assume_effects" in c
+end
+let s = "TestExplicitUsing.@assu"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" in c
+end
+baremodule TestExplicitUsingNegative end
+let s = "@assu"
+    c, r, res = test_complete_context(s, TestExplicitUsingNegative)
+    @test res
+    @test "@assume_effects" ∉ c
+end
+let s = "TestExplicitUsingNegative.@assu"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" ∉ c
+end
+# should complete implicitly `using`ed names
+module TestImplicitUsing end
+let s = "@asse"
+    c, r, res = test_complete_context(s, TestImplicitUsing)
+    @test res
+    @test "@assert" in c
+end
+let s = "TestImplicitUsing.@asse"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assert" in c
+end
+
+# JuliaLang/julia#23374: completion for `import Mod.name`
+module Issue23374
+global v23374 = nothing
+global w23374 = missing
+end
+let s = "import .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "import Base.sin, .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "using .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test isempty(c)
+end
+# JuliaLang/julia#23374: completion for `using Mod: name`
+let s = "using Base: @ass"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" in c
+end
+let s = "using .Issue23374: v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "using .Issue23374: v23374, w"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "w23374" in c
+end
+# completes `using ` to `using [list of available modules]`
+let s = "using "
+    c, r, res = test_complete_context(s)
+    @test res
+    @test !isempty(c)
+end
+
+baremodule _TestInternalBindingOnly
+export binding1, binding2
+global binding1 = global binding2 = nothing
+end
+baremodule TestInternalBindingOnly
+using .._TestInternalBindingOnly
+global binding = nothing
+export binding
+end
+for s = ("TestInternalBindingOnly.bind", "using .TestInternalBindingOnly: bind")
+    # when module is explicitly accessed, completion should show internal names only
+    let (c, r, res) = test_complete_context(s; shift=false)
+        @test res
+        @test "binding" ∈ c
+        @test "binding1" ∉ c && "binding2" ∉ c
+    end
+    # unless completion is forced via shift key
+    let (c, r, res) = test_complete_context(s, TestInternalBindingOnly)
+        @test res
+        @test "binding" ∈ c
+        @test "binding1" ∈ c && "binding2" ∈ c
+    end
+end
+# without explicit module access, completion should show all available names
+let (c, r, res) = test_complete_context("bind", TestInternalBindingOnly; shift=false)
+    @test res
+    @test "binding" ∈ c
+    @test "binding1" ∈ c && "binding2" ∈ c
+end
+let (c, r, res) = test_complete_context("si", Main; shift=false)
+    @test res
+    @test "sin" ∈ c
+end
+
+let (c, r, res) = test_complete_context("const xxx = Base.si", Main)
+    @test res
+    @test "sin" ∈ c
+end
+
+let (c, r, res) = test_complete_context("global xxx::Number = Base.", Main)
+    @test res
+    @test "pi" ∈ c
+end
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index 96320ee216198..9ef86bb0d94f8 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -350,8 +350,8 @@ DocTestSetup = nothing
 
 By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom
 number sequence when running your program multiple times. However, a minor release of Julia (e.g.
-1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in
-particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like
+1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed.
+(Even if the sequence produced by a low-level function like
 [`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may
 change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change
 prohibits many algorithmic improvements.
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 432e32a4de691..26116d3bf4c81 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -4,7 +4,7 @@
     Random
 
 Support for generating random numbers. Provides [`rand`](@ref), [`randn`](@ref),
-[`AbstractRNG`](@ref), [`MersenneTwister`](@ref), and [`RandomDevice`](@ref).
+[`AbstractRNG`](@ref), [`Xoshiro`](@ref), [`MersenneTwister`](@ref), and [`RandomDevice`](@ref).
 """
 module Random
 
@@ -29,6 +29,8 @@ export rand!, randn!,
        randcycle, randcycle!,
        AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro
 
+public seed!, default_rng, Sampler, SamplerType, SamplerTrivial, SamplerSimple
+
 ## general definitions
 
 """
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 29010a52eed91..bc476181e5b0d 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -80,7 +80,7 @@ const TAGS = Any[
 const NTAGS = length(TAGS)
 @assert NTAGS == 255
 
-const ser_version = 27 # do not make changes without bumping the version #!
+const ser_version = 29 # do not make changes without bumping the version #!
 
 format_version(::AbstractSerializer) = ser_version
 format_version(s::Serializer) = s.version
@@ -470,11 +470,6 @@ end
 function serialize(s::AbstractSerializer, linfo::Core.MethodInstance)
     serialize_cycle(s, linfo) && return
     writetag(s.io, METHODINSTANCE_TAG)
-    if isdefined(linfo, :uninferred)
-        serialize(s, linfo.uninferred)
-    else
-        writetag(s.io, UNDEFREF_TAG)
-    end
     serialize(s, nothing)  # for backwards compat
     serialize(s, linfo.sparam_vals)
     serialize(s, Any)  # for backwards compat
@@ -1085,6 +1080,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     end
     if makenew
         meth.module = mod
+        meth.debuginfo = NullDebugInfo
         meth.name = name
         meth.file = file
         meth.line = line
@@ -1097,7 +1093,13 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         meth.purity = purity
         if template !== nothing
             # TODO: compress template
-            meth.source = template::CodeInfo
+            template = template::CodeInfo
+            if format_version(s) < 29
+                template.nargs = nargs
+                template.isva = isva
+            end
+            meth.source = template
+            meth.debuginfo = template.debuginfo
             if !@isdefined(slot_syms)
                 slot_syms = ccall(:jl_compress_argnames, Ref{String}, (Any,), meth.source.slotnames)
             end
@@ -1123,9 +1125,13 @@ end
 function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, (Ptr{Cvoid},), C_NULL)
     deserialize_cycle(s, linfo)
-    tag = Int32(read(s.io, UInt8)::UInt8)
-    if tag != UNDEFREF_TAG
-        setfield!(linfo, :uninferred, handle_deserialize(s, tag)::CodeInfo, :monotonic)
+    if format_version(s) < 28
+        tag = Int32(read(s.io, UInt8)::UInt8)
+        if tag != UNDEFREF_TAG
+            code = handle_deserialize(s, tag)::CodeInfo
+            ci = ccall(:jl_new_codeinst_for_uninferred, Ref{CodeInstance}, (Any, Any), linfo, code)
+            @atomic linfo.cache = ci
+        end
     end
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
@@ -1151,6 +1157,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.LineInfoNode})
     return Core.LineInfoNode(mod, method, deserialize(s)::Symbol, Int32(deserialize(s)::Union{Int32, Int}), Int32(deserialize(s)::Union{Int32, Int}))
 end
 
+
 function deserialize(s::AbstractSerializer, ::Type{PhiNode})
     edges = deserialize(s)
     if edges isa Vector{Any}
@@ -1165,6 +1172,7 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     deserialize_cycle(s, ci)
     code = deserialize(s)::Vector{Any}
     ci.code = code
+    ci.debuginfo = NullDebugInfo
     # allow older-style IR with return and gotoifnot Exprs
     for i in 1:length(code)
         stmt = code[i]
@@ -1177,17 +1185,27 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
             end
         end
     end
-    ci.codelocs = deserialize(s)::Vector{Int32}
+    _x = deserialize(s)
+    have_debuginfo = _x isa Core.DebugInfo
+    if have_debuginfo
+        ci.debuginfo = _x
+    else
+        codelocs = _x::Vector{Int32}
+        # TODO: convert codelocs to debuginfo format?
+    end
     _x = deserialize(s)
     if _x isa Array || _x isa Int
         pre_12 = false
-        ci.ssavaluetypes = _x
     else
         pre_12 = true
         # < v1.2
         ci.method_for_inference_limit_heuristics = _x
-        ci.ssavaluetypes = deserialize(s)
-        ci.linetable = deserialize(s)
+        _x = deserialize(s)
+    end
+    ci.ssavaluetypes = _x
+    if pre_12
+        linetable = deserialize(s)
+        # TODO: convert linetable to debuginfo format?
     end
     ssaflags = deserialize(s)
     if length(ssaflags) ≠ length(code)
@@ -1204,32 +1222,30 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         if format_version(s) <= 26
             ci.method_for_inference_limit_heuristics = deserialize(s)
         end
-        ci.linetable = deserialize(s)
+        if !have_debuginfo # pre v1.11 format
+            linetable = deserialize(s)
+            # TODO: convert linetable to debuginfo format?
+        end
     end
     ci.slotnames = deserialize(s)
     if !pre_12
         ci.slotflags = deserialize(s)
         ci.slottypes = deserialize(s)
-        if format_version(s) <= 26
-            deserialize(s) # rettype
-            ci.parent = deserialize(s)
-            world_or_edges = deserialize(s)
-            pre_13 = isa(world_or_edges, Union{UInt, Int})
-            if pre_13
-                ci.min_world = reinterpret(UInt, world_or_edges)
-                ci.max_world = reinterpret(UInt, deserialize(s))
-            else
-                ci.edges = world_or_edges
-                ci.min_world = deserialize(s)::UInt
-                ci.max_world = deserialize(s)::UInt
-            end
+        ci.rettype = deserialize(s)
+        ci.parent = deserialize(s)
+        world_or_edges = deserialize(s)
+        pre_13 = isa(world_or_edges, Union{UInt, Int})
+        if pre_13
+            ci.min_world = reinterpret(UInt, world_or_edges)
+            ci.max_world = reinterpret(UInt, deserialize(s))
         else
-            ci.parent = deserialize(s)
-            ci.method_for_inference_limit_heuristics = deserialize(s)
-            ci.edges = deserialize(s)
+            ci.edges = world_or_edges
             ci.min_world = deserialize(s)::UInt
             ci.max_world = deserialize(s)::UInt
         end
+        if format_version(s) >= 26
+            ci.method_for_inference_limit_heuristics = deserialize(s)
+        end
     end
     if format_version(s) <= 26
         deserialize(s)::Bool # inferred
@@ -1242,6 +1258,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
             ci.inlining_cost = inlining_cost
         end
     end
+    if format_version(s) >= 29
+        ci.nargs = deserialize(s)
+    end
     ci.propagate_inbounds = deserialize(s)
     if format_version(s) < 23
         deserialize(s) # `pure` field has been removed
@@ -1252,6 +1271,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     if format_version(s) >= 24
         ci.nospecializeinfer = deserialize(s)::Bool
     end
+    if format_version(s) >= 29
+        ci.isva = deserialize(s)::Bool
+    end
     if format_version(s) >= 21
         ci.inlining = deserialize(s)::UInt8
     end
@@ -1266,9 +1288,12 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     if format_version(s) >= 22
         ci.inlining_cost = deserialize(s)::UInt16
     end
+    ci.debuginfo = NullDebugInfo
     return ci
 end
 
+import Core: NullDebugInfo
+
 if Int === Int64
 const OtherInt = Int32
 else
@@ -1545,11 +1570,11 @@ function deserialize(s::AbstractSerializer, ::Type{Task})
     t.storage = deserialize(s)
     state = deserialize(s)
     if state === :runnable
-        t._state = Base.task_state_runnable
+        @atomic :release t._state = Base.task_state_runnable
     elseif state === :done
-        t._state = Base.task_state_done
+        @atomic :release t._state = Base.task_state_done
     elseif state === :failed
-        t._state = Base.task_state_failed
+        @atomic :release t._state = Base.task_state_failed
     else
         @assert false
     end
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 5baf8826cc883..3c30b214305fb 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -450,7 +450,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index 4ee9e07a58430..866a1684c85a1 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -90,7 +90,7 @@ function getalladdrinfo(host::String)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -223,7 +223,7 @@ function getnameinfo(address::Union{IPv4, IPv6})
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index 2a812388d8373..2c50b4a0f8b4a 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -223,7 +223,8 @@ end
     end
     @test getnameinfo(ip"192.0.2.1") == "192.0.2.1"
     @test getnameinfo(ip"198.51.100.1") == "198.51.100.1"
-    @test getnameinfo(ip"203.0.113.1") == "203.0.113.1"
+    # Temporarily broken due to a DNS issue. See https://github.com/JuliaLang/julia/issues/55008
+    @test_skip getnameinfo(ip"203.0.113.1") == "203.0.113.1"
     @test getnameinfo(ip"0.1.1.1") == "0.1.1.1"
     @test getnameinfo(ip"::ffff:0.1.1.1") == "::ffff:0.1.1.1"
     @test getnameinfo(ip"::ffff:192.0.2.1") == "::ffff:192.0.2.1"
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index 5eca7be5ec9b7..019306a3e9f65 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = cb602d7b7cf46057ddc87d23cda2bdd168a548ac
+SPARSEARRAYS_SHA1 = 0dd8d45d55b305458d0d3d3451057589b684f72f
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version
index 19a5a24514f2f..83fbece4c8bc0 100644
--- a/stdlib/StyledStrings.version
+++ b/stdlib/StyledStrings.version
@@ -1,4 +1,4 @@
 STYLEDSTRINGS_BRANCH = main
-STYLEDSTRINGS_SHA1 = e0ca0f85412ea5cafabfeaaec4d62ca26c3959d2
+STYLEDSTRINGS_SHA1 = f6035eb97b516862b16e36cab2ecc6ea8adc3d7c
 STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git
 STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
index 24d2e42dbaea9..39b8447138a2d 100644
--- a/stdlib/SuiteSparse_jll/Project.toml
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "SuiteSparse_jll"
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "7.6.0+0"
+version = "7.8.0+0"
 
 [deps]
 libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
@@ -8,7 +8,7 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.11"
+julia = "1.12"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
index 08614880a934c..9e03033c4e3fa 100644
--- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/SuiteSparse_jll.jl
 baremodule SuiteSparse_jll
 using Base, Libdl, libblastrampoline_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
@@ -82,29 +81,34 @@ else
 end
 
 function __init__()
+    # BSD-3-Clause
+    global libamd_handle = dlopen(libamd)
+    global libamd_path = dlpath(libamd_handle)
+    global libcamd_handle = dlopen(libcamd)
+    global libcamd_path = dlpath(libcamd_handle)
+    global libccolamd_handle = dlopen(libccolamd)
+    global libccolamd_path = dlpath(libccolamd_handle)
+    global libcolamd_handle = dlopen(libcolamd)
+    global libcolamd_path = dlpath(libcolamd_handle)
+    global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
+    global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
+
+    # LGPL-2.1+
+    global libbtf_handle = dlopen(libbtf)
+    global libbtf_path = dlpath(libbtf_handle)
+    global libklu_handle = dlopen(libklu)
+    global libklu_path = dlpath(libklu_handle)
+    global libldl_handle = dlopen(libldl)
+    global libldl_path = dlpath(libldl_handle)
+
+    # GPL-2.0+
     if Base.USE_GPL_LIBS
-        global libamd_handle = dlopen(libamd)
-        global libamd_path = dlpath(libamd_handle)
-        global libbtf_handle = dlopen(libbtf)
-        global libbtf_path = dlpath(libbtf_handle)
-        global libcamd_handle = dlopen(libcamd)
-        global libcamd_path = dlpath(libcamd_handle)
-        global libccolamd_handle = dlopen(libccolamd)
-        global libccolamd_path = dlpath(libccolamd_handle)
         global libcholmod_handle = dlopen(libcholmod)
         global libcholmod_path = dlpath(libcholmod_handle)
-        global libcolamd_handle = dlopen(libcolamd)
-        global libcolamd_path = dlpath(libcolamd_handle)
-        global libklu_handle = dlopen(libklu)
-        global libklu_path = dlpath(libklu_handle)
-        global libldl_handle = dlopen(libldl)
-        global libldl_path = dlpath(libldl_handle)
         global librbio_handle = dlopen(librbio)
         global librbio_path = dlpath(librbio_handle)
         global libspqr_handle = dlopen(libspqr)
         global libspqr_path = dlpath(libspqr_handle)
-        global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
-        global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
         global libumfpack_handle = dlopen(libumfpack)
         global libumfpack_path = dlpath(libumfpack_handle)
     end
diff --git a/stdlib/TOML/Project.toml b/stdlib/TOML/Project.toml
index 17fc8be19ec8e..ceb4acf8bbc65 100644
--- a/stdlib/TOML/Project.toml
+++ b/stdlib/TOML/Project.toml
@@ -6,12 +6,13 @@ version = "1.0.3"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
 [compat]
+Dates = "1.11.0"
 julia = "1.6"
 
 [extras]
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
 
 [targets]
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index 858b75a2e0eff..94d2808c0bc24 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -7,6 +7,8 @@ and to serialize Julia data structures to TOML format.
 """
 module TOML
 
+using Dates
+
 module Internals
     # The parser is defined in Base
     using Base.TOML: Parser, parse, tryparse, ParserError, isvalid_barekey_char, reinit!
@@ -36,6 +38,11 @@ performance if a larger number of small files are parsed.
 """
 const Parser = Internals.Parser
 
+# Dates-enabled constructors
+Parser() = Parser{Dates}()
+Parser(io::IO) = Parser{Dates}(io)
+Parser(str::String; filepath=nothing) = Parser{Dates}(str; filepath)
+
 """
     parsefile(f::AbstractString)
     parsefile(p::Parser, f::AbstractString)
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index 91ea4fd392e4b..63f65b017d393 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -34,7 +34,8 @@ function print_toml_escaped(io::IO, s::AbstractString)
 end
 
 const MbyFunc = Union{Function, Nothing}
-const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
+const TOMLValue = Union{AbstractVector, AbstractDict, Bool, Integer, AbstractFloat, AbstractString,
+                        Dates.DateTime, Dates.Time, Dates.Date, Base.TOML.DateTime, Base.TOML.Time, Base.TOML.Date}
 
 
 ########
@@ -89,6 +90,9 @@ function printvalue(f::MbyFunc, io::IO, value::AbstractVector, sorted::Bool)
 end
 
 function printvalue(f::MbyFunc, io::IO, value::TOMLValue, sorted::Bool)
+    value isa Base.TOML.DateTime && (value = Dates.DateTime(value))
+    value isa Base.TOML.Time && (value = Dates.Time(value))
+    value isa Base.TOML.Date && (value = Dates.Date(value))
     value isa Dates.DateTime ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z")) :
     value isa Dates.Time     ? Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss")) :
     value isa Dates.Date     ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd")) :
@@ -97,9 +101,10 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue, sorted::Bool)
     value isa AbstractFloat  ? Base.print(io, isnan(value) ? "nan" :
                                               isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
                                               Float64(value)) :  # TOML specifies IEEE 754 binary64 for float
-    value isa AbstractString ? (Base.print(io, "\"");
+    value isa AbstractString ? (qmark = Base.contains(value, "\n") ? "\"\"\"" : "\"";
+                                Base.print(io, qmark);
                                 print_toml_escaped(io, value);
-                                Base.print(io, "\"")) :
+                                Base.print(io, qmark)) :
     value isa AbstractDict ? print_inline_table(f, io, value, sorted) :
     error("internal error in TOML printing, unhandled value")
 end
diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
index 79a87b9f0f13f..8fba1b1c1df10 100644
--- a/stdlib/TOML/test/print.jl
+++ b/stdlib/TOML/test/print.jl
@@ -195,3 +195,13 @@ LocalPkg = {path = "LocalPkg"}
 """
 @test toml_str(d; sorted=true, inline_tables) == s
 @test roundtrip(s)
+
+# multiline strings (#55083)
+s = """
+a = \"\"\"lorem ipsum
+
+
+
+alpha\"\"\"
+"""
+@test roundtrip(s)
diff --git a/stdlib/TOML/test/utils/utils.jl b/stdlib/TOML/test/utils/utils.jl
index c484a61cee25a..b01acf04a72fe 100644
--- a/stdlib/TOML/test/utils/utils.jl
+++ b/stdlib/TOML/test/utils/utils.jl
@@ -33,7 +33,7 @@ end
 function get_data()
     tmp = mktempdir()
     path = joinpath(tmp, basename(url))
-    Downloads.download(url, path)
+    retry(Downloads.download, delays=fill(10,5))(url, path)
     Tar.extract(`$(exe7z()) x $path -so`, joinpath(tmp, "testfiles"))
     return joinpath(tmp, "testfiles", "toml-test-julia-$version", "testfiles")
 end
diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
index be2ed3acce5b5..53be1b04708b3 100644
--- a/stdlib/TOML/test/values.jl
+++ b/stdlib/TOML/test/values.jl
@@ -4,16 +4,31 @@ using Test
 using TOML
 using TOML: Internals
 
+# Construct an explicit Parser to test the "cached" version of parsing
+const test_parser = TOML.Parser()
+
 function testval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     parsed = TOML.parse(f)["foo"]
     return isequal(v, parsed) && typeof(v) == typeof(parsed)
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    parsed = TOML.parse(test_parser, f)["foo"]
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    return true
 end
 
 function failval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     err = TOML.tryparse(f);
     return err isa TOML.Internals.ParserError && err.type == v
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    err = TOML.tryparse(test_parser, f);
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    return true
 end
 
 @testset "Numbers" begin
@@ -157,6 +172,6 @@ end
 @testset "Array" begin
     @test testval("[1,2,3]", Int64[1,2,3])
     @test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0])
-    @test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)])
+    @test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)])
     @test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"])
 end
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index f1c361eff972e..2403cd1c7c635 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,4 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = 81888a33704b233a2ad6f82f84456a1dd82c87f0
+TAR_SHA1 = 1114260f5c7a7b59441acadca2411fa227bb8a3b
 TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index e68efdfb6ebf5..c1fe9e8e20c63 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -24,7 +24,7 @@ The `Test` module provides simple *unit testing* functionality. Unit testing is
 see if your code is correct by checking that the results are what you expect. It can be helpful
 to ensure your code still works after you make changes, and can be used when developing as a way
 of specifying the behaviors your code should have when complete. You may also want to look at the
-documentation for [adding tests to your Julia Package](@ref adding-tests-to-packages).
+documentation for [adding tests to your Julia Package](https://pkgdocs.julialang.org/dev/creating-packages/#Adding-tests-to-the-package).
 
 Simple unit testing can be performed with the `@test` and `@test_throws` macros:
 
@@ -417,6 +417,8 @@ Add the following to `src/Example.jl`:
 ```julia
 module Example
 
+export greet, simple_add, type_multiply
+
 function greet()
     "Hello world!"
 end
@@ -429,8 +431,6 @@ function type_multiply(a::Float64, b::Float64)
     a * b
 end
 
-export greet, simple_add, type_multiply
-
 end
 ```
 
@@ -469,7 +469,7 @@ end
 We will need to create those two included files, `math_tests.jl` and `greeting_tests.jl`, and add some tests to them.
 
 > **Note:** Notice how we did not have to specify add `Example` into the `test` environment's `Project.toml`.
-> This is a benefit of Julia's testing system that you could [read about more here](@ref adding-tests-to-packages).
+> This is a benefit of Julia's testing system that you could [read about more here](https://pkgdocs.julialang.org/dev/creating-packages/).
 
 #### Writing Tests for `math_tests.jl`
 
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index ddd62ca9a10f8..b4ada2ce3a9cf 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -802,6 +802,9 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
                 if from_macroexpand && extype == LoadError && exc isa Exception
                     Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
                     true
+                elseif extype == ErrorException && isa(exc, FieldError)
+                    Base.depwarn(lazy"ErrorException should no longer be used to test field access; FieldError should be used instead!", :do_test_throws)
+                    true
                 else
                     isa(exc, extype)
                 end
@@ -1835,9 +1838,19 @@ function parse_testset_args(args)
         # a standalone symbol is assumed to be the test set we should use
         # the same is true for a symbol that's not exported from a module
         if isa(arg, Symbol) || Base.isexpr(arg, :.)
+            if testsettype !== nothing
+                msg = """Multiple testset types provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_testset_types; force=true)
+            end
             testsettype = esc(arg)
         # a string is the description
         elseif isa(arg, AbstractString) || (isa(arg, Expr) && arg.head === :string)
+            if desc !== nothing
+                msg = """Multiple descriptions provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_descriptions; force=true)
+            end
             desc = esc(arg)
         # an assignment is an option
         elseif isa(arg, Expr) && arg.head === :(=)
@@ -1900,7 +1913,7 @@ function get_testset_depth()
     return length(testsets)
 end
 
-_args_and_call(args...; kwargs...) = (args[1:end-1], kwargs, args[end](args[1:end-1]...; kwargs...))
+_args_and_call((args..., f)...; kwargs...) = (args, kwargs, f(args...; kwargs...))
 _materialize_broadcasted(f, args...) = Broadcast.materialize(Broadcast.broadcasted(f, args...))
 
 """
@@ -1982,25 +1995,24 @@ function _inferred(ex, mod, allow = :(Union{}))
         quote
             let allow = $(esc(allow))
                 allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
-                $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
+                $(if any(@nospecialize(a)->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
                     # Has keywords
                     args = gensym()
                     kwargs = gensym()
                     quote
                         $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
-                        inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
+                        inftype = $(gen_call_with_extracted_types(mod, Base.infer_return_type, :($(ex.args[1])($(args)...; $(kwargs)...))))
                     end
                 else
                     # No keywords
                     quote
                         args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
                         result = $(esc(ex.args[1]))(args...)
-                        inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
+                        inftype = Base.infer_return_type($(esc(ex.args[1])), Base.typesof(args...))
                     end
                 end)
-                @assert length(inftypes) == 1
                 rettype = result isa Type ? Type{result} : typeof(result)
-                rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
+                rettype <: allow || rettype == typesplit(inftype, allow) || error("return type $rettype does not match inferred return type $inftype")
                 result
             end
         end
diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
index 42d3eaf8eaa48..b224d79e47cd9 100644
--- a/stdlib/Test/src/logging.jl
+++ b/stdlib/Test/src/logging.jl
@@ -2,6 +2,7 @@
 
 using Logging: Logging, AbstractLogger, LogLevel, Info, with_logger
 import Base: occursin
+using Base: @lock
 
 #-------------------------------------------------------------------------------
 """
@@ -35,11 +36,15 @@ struct Ignored ; end
 #-------------------------------------------------------------------------------
 # Logger with extra test-related state
 mutable struct TestLogger <: AbstractLogger
-    logs::Vector{LogRecord}
+    lock::ReentrantLock
+    logs::Vector{LogRecord}  # Guarded by lock.
     min_level::LogLevel
     catch_exceptions::Bool
-    shouldlog_args
-    message_limits::Dict{Any,Int}
+    # Note: shouldlog_args only maintains the info for the most recent log message, which
+    # may not be meaningful in a multithreaded program. See:
+    # https://github.com/JuliaLang/julia/pull/54497#discussion_r1603691606
+    shouldlog_args  # Guarded by lock.
+    message_limits::Dict{Any,Int}  # Guarded by lock.
     respect_maxlog::Bool
 end
 
@@ -80,15 +85,17 @@ Test Passed
 ```
 """
 TestLogger(; min_level=Info, catch_exceptions=false, respect_maxlog=true) =
-    TestLogger(LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
+    TestLogger(ReentrantLock(), LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
 Logging.min_enabled_level(logger::TestLogger) = logger.min_level
 
 function Logging.shouldlog(logger::TestLogger, level, _module, group, id)
-    if get(logger.message_limits, id, 1) > 0
-        logger.shouldlog_args = (level, _module, group, id)
-        true
-    else
-        false
+    @lock logger.lock begin
+        if get(logger.message_limits, id, 1) > 0
+            logger.shouldlog_args = (level, _module, group, id)
+            return true
+        else
+            return false
+        end
     end
 end
 
@@ -98,12 +105,17 @@ function Logging.handle_message(logger::TestLogger, level, msg, _module,
     if logger.respect_maxlog
         maxlog = get(kwargs, :maxlog, nothing)
         if maxlog isa Core.BuiltinInts
-            remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
-            logger.message_limits[id] = remaining - 1
-            remaining > 0 || return
+            @lock logger.lock begin
+                remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
+                logger.message_limits[id] = remaining - 1
+                remaining > 0 || return
+            end
         end
     end
-    push!(logger.logs, LogRecord(level, msg, _module, group, id, file, line, kwargs))
+    r = LogRecord(level, msg, _module, group, id, file, line, kwargs)
+    @lock logger.lock begin
+        push!(logger.logs, r)
+    end
 end
 
 # Catch exceptions for the test logger only if specified
@@ -112,7 +124,9 @@ Logging.catch_exceptions(logger::TestLogger) = logger.catch_exceptions
 function collect_test_logs(f; kwargs...)
     logger = TestLogger(; kwargs...)
     value = with_logger(f, logger)
-    logger.logs, value
+    @lock logger.lock begin
+        return copy(logger.logs), value
+    end
 end
 
 
diff --git a/stdlib/Test/src/precompile.jl b/stdlib/Test/src/precompile.jl
index 2cb2fb7f3f0c6..04907f8425440 100644
--- a/stdlib/Test/src/precompile.jl
+++ b/stdlib/Test/src/precompile.jl
@@ -1,9 +1,15 @@
-redirect_stdout(devnull) do
-    @testset "example" begin
-        @test 1 == 1
-        @test_throws ErrorException error()
-        @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
-        @test_broken 1 == 2
-        @test 1 ≈ 1.0000000000000001
+if Base.generating_output()
+let
+    function example_payload()
+        @testset "example" begin
+            @test 1 == 1
+            @test_throws ErrorException error()
+            @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
+            @test_broken 1 == 2
+            @test 1 ≈ 1.0000000000000001
+        end
     end
+
+    redirect_stdout(example_payload, devnull)
+end
 end
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 460e2eadf42b7..3ddcd7d5de0fd 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -162,7 +162,7 @@ let fails = @testset NoThrowTestSet begin
         @test_throws "A test" error("a test")
         @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
         @test_throws str->occursin("a T", str) error("a test")
-        @test_throws ["BoundsError", "aquire", "1-element", "at index [2]"] [1][2]
+        @test_throws ["BoundsError", "acquire", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -294,7 +294,7 @@ let fails = @testset NoThrowTestSet begin
     end
 
     let str = sprint(show, fails[26])
-        @test occursin("Expected: [\"BoundsError\", \"aquire\", \"1-element\", \"at index [2]\"]", str)
+        @test occursin("Expected: [\"BoundsError\", \"acquire\", \"1-element\", \"at index [2]\"]", str)
         @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
     end
 
@@ -1534,6 +1534,22 @@ end
     @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_2
 end
 
+# Issue 54807
+struct FEexc
+    a::Nothing
+    b::Nothing
+end
+
+@testset "FieldError Shim tests and Softdeprecation of @test_throws ErrorException" begin
+    feexc = FEexc(nothing, nothing)
+    # This is redundant regular test for FieldError
+    @test_throws FieldError feexc.c
+    # This should raise ErrorException
+    @test_throws ErrorException feexc.a = 1
+    # This is test for FieldError shim and deprecation
+    @test_deprecated @test_throws ErrorException feexc.c
+end
+
 # Issue 25483
 mutable struct PassInformationTestSet <: Test.AbstractTestSet
     results::Vector
@@ -1709,4 +1725,14 @@ end
         result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
         @test occursin(expected, result)
     end
+
+end
+
+@testset "Deprecated multiple arguments" begin
+    msg1 = """Multiple descriptions provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg1 @macroexpand @testset "name1" "name2" begin end
+    msg2 = """Multiple testset types provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg2 @macroexpand @testset DefaultTestSet DefaultTestSet begin end
 end
diff --git a/stdlib/UUIDs/src/UUIDs.jl b/stdlib/UUIDs/src/UUIDs.jl
index 2b137bbe88ffc..e3f5f812ef6e2 100644
--- a/stdlib/UUIDs/src/UUIDs.jl
+++ b/stdlib/UUIDs/src/UUIDs.jl
@@ -10,7 +10,7 @@ using Random
 
 import SHA
 
-export UUID, uuid1, uuid4, uuid5, uuid_version
+export UUID, uuid1, uuid4, uuid5, uuid7, uuid_version
 
 import Base: UUID
 
@@ -39,7 +39,7 @@ const namespace_x500 = UUID(0x6ba7b8149dad11d180b400c04fd430c8) # 6ba7b814-9dad-
     uuid1([rng::AbstractRNG]) -> UUID
 
 Generates a version 1 (time-based) universally unique identifier (UUID), as specified
-by RFC 4122. Note that the Node ID is randomly generated (does not identify the host)
+by [RFC 4122](https://www.ietf.org/rfc/rfc4122). Note that the Node ID is randomly generated (does not identify the host)
 according to section 4.5 of the RFC.
 
 The default rng used by `uuid1` is not `Random.default_rng()` and every invocation of `uuid1()` without
@@ -62,6 +62,13 @@ UUID("cfc395e8-590f-11e8-1f13-43a2532b2fa8")
 ```
 """
 function uuid1(rng::AbstractRNG=Random.RandomDevice())
+    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
+    # between the UUID epoch and Unix epoch
+    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
+    _build_uuid1(rng, timestamp)
+end
+
+function _build_uuid1(rng::AbstractRNG, timestamp::UInt64)
     u = rand(rng, UInt128)
 
     # mask off clock sequence and node
@@ -70,9 +77,6 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     # set the unicast/multicast bit and version
     u |= 0x00000000000010000000010000000000
 
-    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
-    # between the UUID epoch and Unix epoch
-    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
     ts_low = timestamp & typemax(UInt32)
     ts_mid = (timestamp >> 32) & typemax(UInt16)
     ts_hi = (timestamp >> 48) & 0x0fff
@@ -81,14 +85,14 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     u |= UInt128(ts_mid) << 80
     u |= UInt128(ts_hi) << 64
 
-    UUID(u)
+    return UUID(u)
 end
 
 """
     uuid4([rng::AbstractRNG]) -> UUID
 
 Generates a version 4 (random or pseudo-random) universally unique identifier (UUID),
-as specified by RFC 4122.
+as specified by [RFC 4122](https://www.ietf.org/rfc/rfc4122).
 
 The default rng used by `uuid4` is not `Random.default_rng()` and every invocation of `uuid4()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
@@ -157,4 +161,47 @@ function uuid5(ns::UUID, name::String)
     return UUID(v)
 end
 
+"""
+    uuid7([rng::AbstractRNG]) -> UUID
+
+Generates a version 7 (random or pseudo-random) universally unique identifier (UUID),
+as specified by [RFC 9652](https://www.rfc-editor.org/rfc/rfc9562).
+
+The default rng used by `uuid7` is not `Random.default_rng()` and every invocation of `uuid7()` without
+an argument should be expected to return a unique identifier. Importantly, the outputs of
+`uuid7` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.12),
+`uuid7` uses `Random.RandomDevice` as the default rng. However, this is an implementation
+detail that may change in the future.
+
+!!! compat "Julia 1.12"
+    `uuid7()` is available as of Julia 1.12.
+
+# Examples
+```jldoctest; filter = r"[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"
+julia> using Random
+
+julia> rng = Xoshiro(123);
+
+julia> uuid7(rng)
+UUID("019026ca-e086-772a-9638-f7b8557cd282")
+```
+"""
+function uuid7(rng::AbstractRNG=Random.RandomDevice())
+    # current time in ms, rounded to an Integer
+    timestamp = round(UInt128, time() * 1e3)
+    _build_uuid7(rng, timestamp)
+end
+
+function _build_uuid7(rng::AbstractRNG, timestamp::UInt128)
+    bytes = rand(rng, UInt128)
+    # make space for the timestamp
+    bytes &= 0x0000000000000fff3fffffffffffffff
+    # version & variant
+    bytes |= 0x00000000000070008000000000000000
+
+    bytes |= timestamp << UInt128(80)
+
+    return UUID(bytes)
+end
+
 end
diff --git a/stdlib/UUIDs/test/runtests.jl b/stdlib/UUIDs/test/runtests.jl
index d2b9ee17f2e38..c6da441076ea8 100644
--- a/stdlib/UUIDs/test/runtests.jl
+++ b/stdlib/UUIDs/test/runtests.jl
@@ -1,23 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, UUIDs, Random
-
-u1 = uuid1()
-u4 = uuid4()
-u5 = uuid5(u1, "julia")
-@test uuid_version(u1) == 1
-@test uuid_version(u4) == 4
-@test uuid_version(u5) == 5
-@test u1 == UUID(string(u1)) == UUID(GenericString(string(u1)))
-@test u4 == UUID(string(u4)) == UUID(GenericString(string(u4)))
-@test u5 == UUID(string(u5)) == UUID(GenericString(string(u5)))
-@test u1 == UUID(UInt128(u1))
-@test u4 == UUID(UInt128(u4))
-@test u5 == UUID(UInt128(u5))
-@test uuid4(MersenneTwister(0)) == uuid4(MersenneTwister(0))
-@test_throws ArgumentError UUID("550e8400e29b-41d4-a716-446655440000")
-@test_throws ArgumentError UUID("550e8400e29b-41d4-a716-44665544000098")
-@test_throws ArgumentError UUID("z50e8400-e29b-41d4-a716-446655440000")
+using UUIDs: _build_uuid1, _build_uuid7
 
 # results similar to Python builtin uuid
 # To reproduce the sequence
@@ -37,11 +21,6 @@ const following_uuids = [
     UUID("d8cc6298-75d5-57e0-996c-279259ab365c"),
 ]
 
-for (idx, init_uuid) in enumerate(following_uuids[1:end-1])
-    next_id = uuid5(init_uuid, "julia")
-    @test next_id == following_uuids[idx+1]
-end
-
 # Python-generated UUID following each of the standard namespaces
 const standard_namespace_uuids = [
     (UUIDs.namespace_dns,  UUID("00ca23ad-40ef-500c-a910-157de3950d07")),
@@ -50,30 +29,98 @@ const standard_namespace_uuids = [
     (UUIDs.namespace_x500, UUID("993c6684-82e7-5cdb-bd46-9bff0362e6a9")),
 ]
 
-for (init_uuid, next_uuid) in standard_namespace_uuids
-    result = uuid5(init_uuid, "julia")
-    @test next_uuid == result
-end
-
-# Issue 35860
-Random.seed!(Random.default_rng(), 10)
+@testset "UUIDs" begin
 u1 = uuid1()
 u4 = uuid4()
-Random.seed!(Random.default_rng(), 10)
-@test u1 != uuid1()
-@test u4 != uuid4()
-
-@test_throws ArgumentError UUID("22b4a8a1ae548-4eeb-9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548a4eeb-9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548-4eeba9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548-4eeb-9270a60426d66a48e")
-str = "22b4a8a1-e548-4eeb-9270-60426d66a48e"
-@test UUID(uppercase(str)) == UUID(str)
-
-for r in rand(UInt128, 10^3)
-    @test UUID(r) == UUID(string(UUID(r)))
+u5 = uuid5(u1, "julia")
+u7 = uuid7()
+
+@testset "Extraction of version numbers" begin
+    @test uuid_version(u1) == 1
+    @test uuid_version(u4) == 4
+    @test uuid_version(u5) == 5
+    @test uuid_version(u7) == 7
+end
+
+@testset "Parsing from string" begin
+    @test u1 == UUID(string(u1)) == UUID(GenericString(string(u1)))
+    @test u4 == UUID(string(u4)) == UUID(GenericString(string(u4)))
+    @test u5 == UUID(string(u5)) == UUID(GenericString(string(u5)))
+    @test u7 == UUID(string(u7)) == UUID(GenericString(string(u7)))
+end
+
+@testset "UInt128 conversion" begin
+    @test u1 == UUID(UInt128(u1))
+    @test u4 == UUID(UInt128(u4))
+    @test u5 == UUID(UInt128(u5))
+    @test u7 == UUID(UInt128(u7))
+end
+
+@testset "Passing an RNG" begin
+    rng = Xoshiro(0)
+    @test uuid1(rng) isa UUID
+    @test uuid4(rng) isa UUID
+    @test uuid7(rng) isa UUID
+end
+
+@testset "uuid1, uuid4 & uuid7 RNG stability" begin
+    @test uuid4(Xoshiro(0)) == uuid4(Xoshiro(0))
+
+    time_uuid1 = rand(UInt64)
+    time_uuid7 = rand(UInt128)
+
+    # we need to go through the internal function to test RNG stability
+    @test _build_uuid1(Xoshiro(0), time_uuid1) == _build_uuid1(Xoshiro(0), time_uuid1)
+    @test _build_uuid7(Xoshiro(0), time_uuid7) == _build_uuid7(Xoshiro(0), time_uuid7)
+end
+
+@testset "Rejection of invalid UUID strings" begin
+    @test_throws ArgumentError UUID("550e8400e29b-41d4-a716-446655440000")
+    @test_throws ArgumentError UUID("550e8400e29b-41d4-a716-44665544000098")
+    @test_throws ArgumentError UUID("z50e8400-e29b-41d4-a716-446655440000")
+    @test_throws ArgumentError UUID("22b4a8a1ae548-4eeb-9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548a4eeb-9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548-4eeba9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548-4eeb-9270a60426d66a48e")
+end
+
+@testset "UUID sequence" begin
+    for (idx, init_uuid) in enumerate(following_uuids[1:end-1])
+        next_id = uuid5(init_uuid, "julia")
+        @test next_id == following_uuids[idx+1]
+    end
+end
+
+@testset "Standard namespace UUIDs" begin
+    for (init_uuid, next_uuid) in standard_namespace_uuids
+        result = uuid5(init_uuid, "julia")
+        @test next_uuid == result
+    end
+end
+
+@testset "Use of Random.RandomDevice (#35860)" begin
+    Random.seed!(Random.default_rng(), 10)
+    u1 = uuid1()
+    u4 = uuid4()
+    u7 = uuid7()
+    Random.seed!(Random.default_rng(), 10)
+    @test u1 != uuid1()
+    @test u4 != uuid4()
+    @test u7 != uuid7()
+end
+
+@testset "case invariance" begin
+    str = "22b4a8a1-e548-4eeb-9270-60426d66a48e"
+    @test UUID(uppercase(str)) == UUID(str)
+end
+
+@testset "Equality of string parsing & direct UInt128 passing" begin
+    for r in rand(UInt128, 10^3)
+        @test UUID(r) == UUID(string(UUID(r)))
+    end
 end
 
 @testset "Docstrings" begin
     @test isempty(Docs.undocumented_names(UUIDs))
 end
+end
diff --git a/stdlib/Unicode/Project.toml b/stdlib/Unicode/Project.toml
index a01833870644e..781da423c63e8 100644
--- a/stdlib/Unicode/Project.toml
+++ b/stdlib/Unicode/Project.toml
@@ -4,6 +4,7 @@ version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Random"]
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
index b1fa9576af3ec..bb5771654430b 100644
--- a/stdlib/Zlib_jll/Project.toml
+++ b/stdlib/Zlib_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "Zlib_jll"
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.13+1"
+version = "1.3.1+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/Zlib_jll/src/Zlib_jll.jl b/stdlib/Zlib_jll/src/Zlib_jll.jl
index ea381b8b0683c..fb043c7143789 100644
--- a/stdlib/Zlib_jll/src/Zlib_jll.jl
+++ b/stdlib/Zlib_jll/src/Zlib_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/Zlib_jll.jl
 baremodule Zlib_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/Zlib_jll/test/runtests.jl b/stdlib/Zlib_jll/test/runtests.jl
index f04f9c70a7054..81eb742a172fe 100644
--- a/stdlib/Zlib_jll/test/runtests.jl
+++ b/stdlib/Zlib_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Zlib_jll
 
 @testset "Zlib_jll" begin
-    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.13"
+    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.3.1"
 end
diff --git a/stdlib/dSFMT_jll/src/dSFMT_jll.jl b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
index 35ada23778a94..b84bf0d8204ae 100644
--- a/stdlib/dSFMT_jll/src/dSFMT_jll.jl
+++ b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule dSFMT_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index 3decbf5f81512..a0eac13b3ab23 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "16.0.6+4"
+version = "18.1.7+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
index 3140dc3989a72..be2acb34faa65 100644
--- a/stdlib/libLLVM_jll/src/libLLVM_jll.jl
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule libLLVM_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/libLLVM_jll/test/runtests.jl b/stdlib/libLLVM_jll/test/runtests.jl
index ea678108ae012..e04076f4145a5 100644
--- a/stdlib/libLLVM_jll/test/runtests.jl
+++ b/stdlib/libLLVM_jll/test/runtests.jl
@@ -3,5 +3,6 @@
 using Test, Libdl, libLLVM_jll
 
 @testset "libLLVM_jll" begin
-    @test dlsym(libLLVM_jll.libLLVM_handle, :LLVMInitializeTarget; throw_error=false) !== nothing
+    # Try to find a symbol from the C API of libLLVM as a simple sanity check.
+    @test dlsym(libLLVM_jll.libLLVM_handle, :LLVMContextCreate; throw_error=false) !== nothing
 end
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 6ee704bc7e1c4..1dd22b7fb8d40 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.8.0+1"
+version = "5.11.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.10"
+julia = "1.12"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
index 49e7932a6b701..bbdad252be14a 100644
--- a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
+++ b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule libblastrampoline_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
index ea7ca1615f050..88e60941f65ee 100644
--- a/stdlib/nghttp2_jll/Project.toml
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "nghttp2_jll"
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.59.0+0"
+version = "1.60.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/nghttp2_jll/src/nghttp2_jll.jl b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
index 76e8d3582c402..5057299614aa5 100644
--- a/stdlib/nghttp2_jll/src/nghttp2_jll.jl
+++ b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/nghttp2_jll.jl
 baremodule nghttp2_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/nghttp2_jll/test/runtests.jl b/stdlib/nghttp2_jll/test/runtests.jl
index daa848d19f9b6..b6ddefb8222cd 100644
--- a/stdlib/nghttp2_jll/test/runtests.jl
+++ b/stdlib/nghttp2_jll/test/runtests.jl
@@ -11,5 +11,5 @@ end
 
 @testset "nghttp2_jll" begin
     info = unsafe_load(ccall((:nghttp2_version,libnghttp2), Ptr{nghttp2_info}, (Cint,), 0))
-    @test VersionNumber(unsafe_string(info.version_str)) == v"1.59.0"
+    @test VersionNumber(unsafe_string(info.version_str)) == v"1.60.0"
 end
diff --git a/stdlib/p7zip_jll/Project.toml b/stdlib/p7zip_jll/Project.toml
index b1bd4bc9e0a1a..6bca9d1d0545b 100644
--- a/stdlib/p7zip_jll/Project.toml
+++ b/stdlib/p7zip_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "p7zip_jll"
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.4.0+2"
+version = "17.5.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/p7zip_jll/src/p7zip_jll.jl b/stdlib/p7zip_jll/src/p7zip_jll.jl
index 01f26de936e78..a2a90a2450ea6 100644
--- a/stdlib/p7zip_jll/src/p7zip_jll.jl
+++ b/stdlib/p7zip_jll/src/p7zip_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/p7zip_jll.jl
 baremodule p7zip_jll
 using Base
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/stdlib.mk b/stdlib/stdlib.mk
index b23754f3a3cac..b79059d3368b1 100644
--- a/stdlib/stdlib.mk
+++ b/stdlib/stdlib.mk
@@ -11,24 +11,19 @@ INDEPENDENT_STDLIBS := \
 	LibCURL_jll LibSSH2_jll LibGit2_jll nghttp2_jll  MozillaCACerts_jll MbedTLS_jll \
 	MPFR_jll OpenLibm_jll PCRE2_jll p7zip_jll Zlib_jll
 
-PKG_EXTS := \
-	REPLExt
-
-STDLIBS := $(STDLIBS_WITHIN_SYSIMG) $(INDEPENDENT_STDLIBS) $(PKG_EXTS)
+STDLIBS := $(STDLIBS_WITHIN_SYSIMG) $(INDEPENDENT_STDLIBS)
 VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 
-SYSIMG_STDLIB_SRCS =
+SYSIMG_STDLIBS_SRCS =
+INDEPENDENT_STDLIBS_SRCS =
 define STDLIB_srcs
-ifneq ($(filter $(1),$(PKG_EXTS)),)
-	$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/Pkg/ext/$1 -name \*.jl) \
-	$$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/Pkg) $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/Pkg/Project.toml
-else
-	$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
-	$$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1) $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/Project.toml
-endif
+$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
+$$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1) $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/Project.toml
 
 ifneq ($(filter $(1),$(STDLIBS_WITHIN_SYSIMG)),)
-	SYSIMG_STDLIB_SRCS += $$($1_SRCS)
+	SYSIMG_STDLIBS_SRCS += $$($1_SRCS)
+else
+	INDEPENDENT_STDLIBS_SRCS += $$($1_SRCS)
 endif
 endef
 
diff --git a/sysimage.mk b/sysimage.mk
index e6fa54be5f186..d0e106d4ce3da 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -17,7 +17,7 @@ $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%-o.a
 	@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ \
 		$(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) \
 		$(if $(findstring -debug,$(notdir $@)),-ljulia-internal-debug -ljulia-debug,-ljulia-internal -ljulia) \
-		$$([ $(OS) = WINNT ] && echo '' -lssp))
+		$$([ $(OS) = WINNT ] && echo '' $(LIBM) -lssp --disable-auto-import --disable-runtime-pseudo-reloc))
 	@$(INSTALL_NAME_CMD)$(notdir $@) $@
 	@$(DSYMUTIL) $@
 
@@ -57,19 +57,19 @@ COMPILER_SRCS += $(shell find $(JULIAHOME)/base/compiler -name \*.jl)
 # sort these to remove duplicates
 BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl -and -not -name sysimg.jl) \
                     $(shell find $(BUILDROOT)/base -name \*.jl  -and -not -name sysimg.jl))
-STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIB_SRCS)
+STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIBS_SRCS)
 RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make sure this always has a trailing slash
 
 $(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
 	$(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \
-		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl)
+		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O1 compiler/compiler.jl)
 	@mv $@.tmp $@
 
 $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
 	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
-			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
+			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O1 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
 			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \
 		echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \
 		false; \
diff --git a/test/.gitignore b/test/.gitignore
index fc55a0df3a173..20bf199b87c74 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -4,3 +4,4 @@
 /libccalltest.*
 /relocatedepot
 /RelocationTestPkg2/src/foo.txt
+/RelocationTestPkg*/Manifest.toml
diff --git a/test/Makefile b/test/Makefile
index e522ec811b167..1b9cb377c943d 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -43,6 +43,7 @@ relocatedepot:
 	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
 	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
 	@cp -R $(SRCDIR)/RelocationTestPkg3 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg4 $(SRCDIR)/relocatedepot
 	@cd $(SRCDIR) && \
 	$(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
 
@@ -55,6 +56,7 @@ revise-relocatedepot: revise-% :
 	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
 	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
 	@cp -R $(SRCDIR)/RelocationTestPkg3 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg4 $(SRCDIR)/relocatedepot
 	@cd $(SRCDIR) && \
 	$(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
 
@@ -70,5 +72,6 @@ clangsa:
 clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
+	@$(MAKE) -C llvmpasses $@
 
 .PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean
diff --git a/test/RelocationTestPkg2/src/RelocationTestPkg2.jl b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
index dc36a06cb48d4..4b1fd2708a727 100644
--- a/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
+++ b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
@@ -1,7 +1,7 @@
 module RelocationTestPkg2
 
-include_dependency("foo.txt")
-include_dependency("foodir")
+include_dependency("foo.txt", track_content=false)
+include_dependency("foodir", track_content=false)
 greet() = print("Hello World!")
 
 end # module RelocationTestPkg2
diff --git a/test/RelocationTestPkg4/Project.toml b/test/RelocationTestPkg4/Project.toml
new file mode 100644
index 0000000000000..8334a684f064e
--- /dev/null
+++ b/test/RelocationTestPkg4/Project.toml
@@ -0,0 +1,6 @@
+name = "RelocationTestPkg4"
+uuid = "d423d817-d7e9-49ac-b245-9d9d6db0b429"
+version = "0.1.0"
+
+[deps]
+RelocationTestPkg1 = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
diff --git a/test/RelocationTestPkg4/src/RelocationTestPkg4.jl b/test/RelocationTestPkg4/src/RelocationTestPkg4.jl
new file mode 100644
index 0000000000000..d24a51d19a918
--- /dev/null
+++ b/test/RelocationTestPkg4/src/RelocationTestPkg4.jl
@@ -0,0 +1,5 @@
+module RelocationTestPkg4
+
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg4
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 9d9c9b9c87776..f655d9abe423f 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -11,6 +11,9 @@ using .Main.StructArrays
 isdefined(Main, :FillArrays) || @eval Main include("testhelpers/FillArrays.jl")
 using .Main.FillArrays
 
+isdefined(Main, :SizedArrays) || @eval Main include("testhelpers/SizedArrays.jl")
+using .Main.SizedArrays
+
 A = rand(5,4,3)
 @testset "Bounds checking" begin
     @test checkbounds(Bool, A, 1, 1, 1) == true
@@ -332,6 +335,13 @@ end
     end
 end
 
+@testset "copy for LinearIndices/CartesianIndices" begin
+    C = CartesianIndices((1:2, 1:4))
+    @test copy(C) === C
+    L = LinearIndices((1:2, 1:4))
+    @test copy(L) === L
+end
+
 # token type on which to dispatch testing methods in order to avoid potential
 # name conflicts elsewhere in the base test suite
 mutable struct TestAbstractArray end
@@ -1212,7 +1222,7 @@ function Base.getindex(S::Strider{<:Any,N}, I::Vararg{Int,N}) where {N}
 end
 Base.strides(S::Strider) = S.strides
 Base.elsize(::Type{<:Strider{T}}) where {T} = Base.elsize(Vector{T})
-Base.cconvert(::Type{Ptr{T}}, S::Strider{T}) where {T} = MemoryRef(S.data.ref, S.offset)
+Base.cconvert(::Type{Ptr{T}}, S::Strider{T}) where {T} = memoryref(S.data.ref, S.offset)
 
 @testset "Simple 3d strided views and permutes" for sz in ((5, 3, 2), (7, 11, 13))
     A = collect(reshape(1:prod(sz), sz))
@@ -1271,6 +1281,9 @@ Base.cconvert(::Type{Ptr{T}}, S::Strider{T}) where {T} = MemoryRef(S.data.ref, S
             end
         end
     end
+    # constant propagation in the PermutedDimsArray constructor
+    X = @inferred (A -> PermutedDimsArray(A, (2,3,1)))(A)
+    @test @inferred((X -> PermutedDimsArray(X, (3,1,2)))(X)) == A
 end
 
 @testset "simple 2d strided views, permutes, transposes" for sz in ((5, 3), (7, 11))
@@ -1423,6 +1436,31 @@ using .Main.OffsetArrays
     end
 end
 
+@testset "Check push!($a, $args...)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        push!(a, args...)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check append!($a, $args)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        append!(a, args)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check sizehint!($a)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+        @test sizehint!(a, 10) === a
+end
+
 @testset "splatting into hvcat" begin
     t = (1, 2)
     @test [t...; 3 4] == [1 2; 3 4]
@@ -1771,6 +1809,9 @@ end
     @test_throws ArgumentError stack([1:3, 4:6]; dims=3)
     @test_throws ArgumentError stack(abs2, 1:3; dims=2)
 
+    @test stack(["hello", "world"]) isa Matrix{Char}
+    @test_throws DimensionMismatch stack(["hello", "world!"])  # had a bug in error printing
+
     # Empty
     @test_throws ArgumentError stack(())
     @test_throws ArgumentError stack([])
@@ -1842,6 +1883,24 @@ function check_pointer_strides(A::AbstractArray)
     return true
 end
 
+@testset "colonful `reshape`, #54245" begin
+    @test reshape([], (0, :)) isa Matrix
+    @test_throws DimensionMismatch reshape([7], (0, :))
+    let b = prevpow(2, typemax(Int))
+        @test iszero(b*b)
+        @test_throws ArgumentError reshape([7], (b, :, b))
+        @test reshape([], (b, :, b)) isa Array{<:Any, 3}
+    end
+    for iterator ∈ (7:6, 7:7, 7:8)
+        for it ∈ (iterator, map(BigInt, iterator))
+            @test reshape(it, (:, Int(length(it)))) isa AbstractMatrix
+            @test reshape(it, (Int(length(it)), :)) isa AbstractMatrix
+            @test reshape(it, (1, :))               isa AbstractMatrix
+            @test reshape(it, (:, 1))               isa AbstractMatrix
+        end
+    end
+end
+
 @testset "strides for ReshapedArray" begin
     # Type-based contiguous Check
     a = vec(reinterpret(reshape, Int16, reshape(view(reinterpret(Int32, randn(10)), 2:11), 5, :)))
@@ -1972,6 +2031,16 @@ end
 
     @test zero([[2,2], [3,3,3]]) isa Vector{Vector{Int}}
     @test zero([[2,2], [3,3,3]]) == [[0,0], [0, 0, 0]]
+
+
+    @test zero(Union{Float64, Missing}[missing]) == [0.0]
+    struct CustomNumber <: Number
+        val::Float64
+    end
+    Base.zero(::Type{CustomNumber}) = CustomNumber(0.0)
+    @test zero([CustomNumber(5.0)]) == [CustomNumber(0.0)]
+    @test zero(Union{CustomNumber, Missing}[missing]) == [CustomNumber(0.0)]
+    @test zero(Vector{Union{CustomNumber, Missing}}(undef, 1)) == [CustomNumber(0.0)]
 end
 
 @testset "`_prechecked_iterate` optimization" begin
@@ -2020,33 +2089,68 @@ end
     end
 end
 
-@testset "_unsetindex!" begin
-    struct MyMatrixUnsetIndexCartInds{T,A<:AbstractMatrix{T}} <: AbstractMatrix{T}
-        data :: A
+@testset "reshape for offset arrays" begin
+    p = Base.IdentityUnitRange(3:4)
+    r = reshape(p, :, 1)
+    @test r[eachindex(r)] == UnitRange(p)
+    @test collect(r) == r
+
+    struct ZeroBasedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
+        a :: A
+        function ZeroBasedArray(a::AbstractArray)
+            Base.require_one_based_indexing(a)
+            new{eltype(a), ndims(a), typeof(a)}(a)
+        end
     end
-    Base.size(A::MyMatrixUnsetIndexCartInds) = size(A.data)
-    Base.getindex(M::MyMatrixUnsetIndexCartInds, i::Int, j::Int) = M.data[i,j]
-    Base.setindex!(M::MyMatrixUnsetIndexCartInds, v, i::Int, j::Int) = setindex!(M.data, v, i, j)
-    struct MyMatrixUnsetIndexLinInds{T,A<:AbstractMatrix{T}} <: AbstractMatrix{T}
-        data :: A
+    Base.parent(z::ZeroBasedArray) = z.a
+    Base.size(z::ZeroBasedArray) = size(parent(z))
+    Base.axes(z::ZeroBasedArray) = map(x -> Base.IdentityUnitRange(0:x - 1), size(parent(z)))
+    Base.getindex(z::ZeroBasedArray{<:Any, N}, i::Vararg{Int,N}) where {N} = parent(z)[map(x -> x + 1, i)...]
+    Base.setindex!(z::ZeroBasedArray{<:Any, N}, val, i::Vararg{Int,N}) where {N} = parent(z)[map(x -> x + 1, i)...] = val
+
+    z = ZeroBasedArray(collect(1:4))
+    r2 = reshape(z, :, 1)
+    @test r2[CartesianIndices(r2)] == r2[LinearIndices(r2)]
+    r2[firstindex(r2)] = 34
+    @test z[0] == 34
+    r2[eachindex(r2)] = r2 .* 2
+    for (i, j) in zip(eachindex(r2), eachindex(z))
+        @test r2[i] == z[j]
+    end
+end
+
+@testset "zero for arbitrary axes" begin
+    r = SizedArrays.SOneTo(2)
+    s = Base.OneTo(2)
+    _to_oneto(x::Integer) = Base.OneTo(2)
+    _to_oneto(x::Union{Base.OneTo, SizedArrays.SOneTo}) = x
+    for (f, v) in ((zeros, 0), (ones, 1), ((x...)->fill(3,x...),3))
+        for ax in ((r,r), (s, r), (2, r))
+            A = f(ax...)
+            @test axes(A) == map(_to_oneto, ax)
+            if all(x -> x isa SizedArrays.SOneTo, ax)
+                @test A isa SizedArrays.SizedArray && parent(A) isa Array
+            else
+                @test A isa Array
+            end
+            @test all(==(v), A)
+        end
     end
-    Base.size(A::MyMatrixUnsetIndexLinInds) = size(A.data)
-    Base.getindex(M::MyMatrixUnsetIndexLinInds, i::Int) = M.data[i]
-    Base.setindex!(M::MyMatrixUnsetIndexLinInds, v, i::Int) = setindex!(M.data, v, i)
-    Base.IndexStyle(::Type{<:MyMatrixUnsetIndexLinInds}) = IndexLinear()
+end
 
-    function test_unsetindex(MT)
-        M = MT(ones(2,2))
-        M2 = MT(Matrix{BigFloat}(undef, 2,2))
-        copyto!(M, M2)
-        @test all(==(1), M)
-        M3 = MT(Matrix{BigFloat}(undef, 2,2))
-        for i in eachindex(M3)
-            @test !isassigned(M3, i)
-        end
-        M3 .= 1
-        @test_throws MethodError copyto!(M3, M2)
+@testset "one" begin
+    @test one([1 2; 3 4]) == [1 0; 0 1]
+    @test one([1 2; 3 4]) isa Matrix{Int}
+
+    struct Mat <: AbstractMatrix{Int}
+        p::Matrix{Int}
     end
-    test_unsetindex(MyMatrixUnsetIndexCartInds)
-    test_unsetindex(MyMatrixUnsetIndexLinInds)
+    Base.size(m::Mat) = size(m.p)
+    Base.IndexStyle(::Type{<:Mat}) = IndexLinear()
+    Base.getindex(m::Mat, i::Int) = m.p[i]
+    Base.setindex!(m::Mat, v, i::Int) = m.p[i] = v
+    Base.similar(::Mat, ::Type{Int}, size::NTuple{2,Int}) = Mat(Matrix{Int}(undef, size))
+
+    @test one(Mat([1 2; 3 4])) == Mat([1 0; 0 1])
+    @test one(Mat([1 2; 3 4])) isa Mat
 end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index ac5b524931046..acdfdc70ba30c 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -97,10 +97,7 @@ ambig(x::Union{Char, Int16}) = 's'
 
 # Automatic detection of ambiguities
 
-const allowed_undefineds = Set([
-    GlobalRef(Base, :active_repl),
-    GlobalRef(Base, :active_repl_backend),
-])
+const allowed_undefineds = Set([])
 
 let Distributed = get(Base.loaded_modules,
                       Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
@@ -332,23 +329,22 @@ end
 @test length(detect_unbound_args(M25341; recursive=true)) == 1
 
 # Test that Core and Base are free of UndefVarErrors
-# not using isempty so this prints more information when it fails
 @testset "detect_unbound_args in Base and Core" begin
     # TODO: review this list and remove everything between test_broken and test
     let need_to_handle_undef_sparam =
             Set{Method}(detect_unbound_args(Core; recursive=true))
         pop!(need_to_handle_undef_sparam, which(Core.Compiler.eltype, Tuple{Type{Tuple{Any}}}))
-        @test_broken need_to_handle_undef_sparam == Set()
+        @test_broken isempty(need_to_handle_undef_sparam)
         pop!(need_to_handle_undef_sparam, which(Core.Compiler._cat, Tuple{Any, AbstractArray}))
         pop!(need_to_handle_undef_sparam, first(methods(Core.Compiler.same_names)))
-        @test need_to_handle_undef_sparam == Set()
+        @test isempty(need_to_handle_undef_sparam)
     end
     let need_to_handle_undef_sparam =
             Set{Method}(detect_unbound_args(Base; recursive=true, allowed_undefineds))
         pop!(need_to_handle_undef_sparam, which(Base._totuple, (Type{Tuple{Vararg{E}}} where E, Any, Any)))
         pop!(need_to_handle_undef_sparam, which(Base.eltype, Tuple{Type{Tuple{Any}}}))
         pop!(need_to_handle_undef_sparam, first(methods(Base.same_names)))
-        @test_broken need_to_handle_undef_sparam == Set()
+        @test_broken isempty(need_to_handle_undef_sparam)
         pop!(need_to_handle_undef_sparam, which(Base._cat, Tuple{Any, AbstractArray}))
         pop!(need_to_handle_undef_sparam, which(Base.byteenv, (Union{AbstractArray{Pair{T,V}, 1}, Tuple{Vararg{Pair{T,V}}}} where {T<:AbstractString,V},)))
         pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{AbstractArray{Union{Missing, T},N} where {T, N}}))
@@ -357,7 +353,7 @@ end
         pop!(need_to_handle_undef_sparam, which(Base.zero, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.one, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.oneunit, Tuple{Type{Union{Missing, T}} where T}))
-        @test need_to_handle_undef_sparam == Set()
+        @test isempty(need_to_handle_undef_sparam)
     end
 end
 
@@ -451,4 +447,20 @@ cc46601(::Type{T}, x::Int) where {T<:AbstractString} = 7
 @test length(methods(cc46601, Tuple{Type{<:Integer}, Integer})) == 2
 @test length(Base.methods_including_ambiguous(cc46601, Tuple{Type{<:Integer}, Integer})) == 7
 
+# Issue #55231
+struct U55231{P} end
+struct V55231{P} end
+U55231(::V55231) = nothing
+(::Type{T})(::V55231) where {T<:U55231} = nothing
+@test length(methods(U55231)) == 2
+U55231(a, b) = nothing
+@test length(methods(U55231)) == 3
+struct S55231{P} end
+struct T55231{P} end
+(::Type{T})(::T55231) where {T<:S55231} = nothing
+S55231(::T55231) = nothing
+@test length(methods(S55231)) == 2
+S55231(a, b) = nothing
+@test length(methods(S55231)) == 3
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 566dd44b8dcd9..333b68e287c4c 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -308,6 +308,35 @@ end
     @test_throws ArgumentError dropdims(a, dims=4)
     @test_throws ArgumentError dropdims(a, dims=6)
 
+
+    a = rand(8, 7)
+    @test @inferred(insertdims(a, dims=1)) == @inferred(insertdims(a, dims=(1,))) == reshape(a, (1, 8, 7))
+    @test @inferred(insertdims(a, dims=3))  == @inferred(insertdims(a, dims=(3,))) == reshape(a, (8, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3)))  == reshape(a, (1, 8, 1, 7))
+    @test @inferred(insertdims(a, dims=(1, 2, 3)))  == reshape(a, (1, 1, 1, 8, 7))
+    @test @inferred(insertdims(a, dims=(1, 4)))  == reshape(a, (1, 8, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3, 5)))  == reshape(a, (1, 8, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 2, 4, 6)))  == reshape(a, (1, 1, 8, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3, 4, 6)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 4, 6, 3)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3, 5, 6)))  == reshape(a, (1, 8, 1, 7, 1, 1))
+
+    @test_throws ArgumentError insertdims(a, dims=(1, 1, 2, 3))
+    @test_throws ArgumentError insertdims(a, dims=(1, 2, 2, 3))
+    @test_throws ArgumentError insertdims(a, dims=(1, 2, 3, 3))
+    @test_throws UndefKeywordError insertdims(a)
+    @test_throws ArgumentError insertdims(a, dims=0)
+    @test_throws ArgumentError insertdims(a, dims=(1, 2, 1))
+    @test_throws ArgumentError insertdims(a, dims=4)
+    @test_throws ArgumentError insertdims(a, dims=6)
+
+    # insertdims and dropdims are inverses
+    b = rand(1,1,1,5,1,1,7)
+    for dims in [1, (1,), 2, (2,), 3, (3,), (1,3), (1,2,3), (1,2), (1,3,5), (1,2,5,6), (1,3,5,6), (1,3,5,6), (1,6,5,3)]
+        @test dropdims(insertdims(a; dims); dims) == a
+        @test insertdims(dropdims(b; dims); dims) == b
+    end
+
     sz = (5,8,7)
     A = reshape(1:prod(sz),sz...)
     @test A[2:6] == [2:6;]
@@ -562,32 +591,32 @@ end
     @test findall(!, m) == [k for (k,v) in pairs(m) if !v]
     @test findfirst(!iszero, a) == 2
     @test findfirst(a.==0) == 1
-    @test findfirst(a.==5) == nothing
+    @test findfirst(a.==5) === nothing
     @test findfirst(Dict(1=>false, 2=>true)) == 2
-    @test findfirst(Dict(1=>false)) == nothing
+    @test findfirst(Dict(1=>false)) === nothing
     @test findfirst(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findfirst(!isequal(1), [1,2,4,1,2,3,4]) == 2
     @test findfirst(isodd, [2,4,6,3,9,2,0]) == 4
-    @test findfirst(isodd, [2,4,6,2,0]) == nothing
+    @test findfirst(isodd, [2,4,6,2,0]) === nothing
     @test findnext(!iszero,a,4) == 4
     @test findnext(!iszero,a,5) == 6
     @test findnext(!iszero,a,1) == 2
     @test findnext(isequal(1),a,4) == 6
-    @test findnext(isequal(5),a,4) == nothing
+    @test findnext(isequal(5),a,4) === nothing
     @test findlast(!iszero, a) == 8
     @test findlast(a.==0) == 5
-    @test findlast(a.==5) == nothing
-    @test findlast(false) == nothing # test non-AbstractArray findlast
+    @test findlast(a.==5) === nothing
+    @test findlast(false) === nothing # test non-AbstractArray findlast
     @test findlast(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findlast(isodd, [2,4,6,3,9,2,0]) == 5
-    @test findlast(isodd, [2,4,6,2,0]) == nothing
+    @test findlast(isodd, [2,4,6,2,0]) === nothing
     @test findprev(!iszero,a,4) == 4
     @test findprev(!iszero,a,5) == 4
-    @test findprev(!iszero,a,1) == nothing
+    @test findprev(!iszero,a,1) === nothing
     @test findprev(isequal(1),a,4) == 2
     @test findprev(isequal(1),a,8) == 6
     @test findprev(isodd, [2,4,5,3,9,2,0], 7) == 5
-    @test findprev(isodd, [2,4,5,3,9,2,0], 2) == nothing
+    @test findprev(isodd, [2,4,5,3,9,2,0], 2) === nothing
     @test findfirst(isequal(0x00), [0x01, 0x00]) == 2
     @test findlast(isequal(0x00), [0x01, 0x00]) == 2
     @test findnext(isequal(0x00), [0x00, 0x01, 0x00], 2) == 3
@@ -1263,6 +1292,10 @@ end
     @test @inferred(mapslices(hcat, [1 2; 3 4], dims=1)) == [1 2; 3 4] # previously an error, now allowed
     @test mapslices(identity, [1 2; 3 4], dims=(2,2)) == [1 2; 3 4] # previously an error
     @test_broken @inferred(mapslices(identity, [1 2; 3 4], dims=(2,2))) == [1 2; 3 4]
+
+    # type inference in mapslices
+    a_ = @inferred (a -> mapslices(identity, reshape(a, size(a)..., 1, 1), dims=(3,4)))(a)
+    @test a_ == reshape(a, size(a)..., 1, 1)
 end
 
 @testset "single multidimensional index" begin
@@ -2072,6 +2105,7 @@ end
 
     I1 = CartesianIndex((2,3,0))
     I2 = CartesianIndex((-1,5,2))
+    @test +I1 == I1
     @test -I1 == CartesianIndex((-2,-3,0))
     @test I1 + I2 == CartesianIndex((1,8,2))
     @test I2 + I1 == CartesianIndex((1,8,2))
@@ -2859,6 +2893,33 @@ end
     @inferred accumulate(*, String[])
     @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=1) == ["a" "b"; "ac" "bd"]
     @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=2) == ["a" "ab"; "c" "cd"]
+
+    # #53438
+    v = [(1, 2), (3, 4)]
+    @test_throws MethodError accumulate(+, v)
+    @test_throws MethodError cumsum(v)
+    @test_throws MethodError cumprod(v)
+    @test_throws MethodError accumulate(+, v; init=(0, 0))
+    @test_throws MethodError accumulate(+, v; dims=1, init=(0, 0))
+
+    # Some checks to ensure we're identifying the widest needed eltype
+    # as identified in PR 53461
+    @testset "Base._accumulate_promote_op" begin
+        # A somewhat contrived example where each call to `foo`
+        # will return a different type
+        foo(x::Bool, y::Int)::Int = x + y
+        foo(x::Int, y::Int)::Float64 = x + y
+        foo(x::Float64, y::Int)::ComplexF64 = x + y * im
+        foo(x::ComplexF64, y::Int)::String = string(x, "+", y)
+
+        v = collect(1:5)
+        @test Base._accumulate_promote_op(foo, v; init=true) === Base._accumulate_promote_op(foo, v) == Union{Float64, String, ComplexF64}
+        @test Base._accumulate_promote_op(/, v) === Base._accumulate_promote_op(/, v; init=0) == Float64
+        @test Base._accumulate_promote_op(+, v) === Base._accumulate_promote_op(+, v; init=0) === Int
+        @test Base._accumulate_promote_op(+, v; init=0.0) === Float64
+        @test Base._accumulate_promote_op(+, Union{Int, Missing}[v...]) === Union{Int, Missing}
+        @test Base._accumulate_promote_op(+, Union{Int, Nothing}[v...]) === Union{Int, Nothing}
+    end
 end
 
 struct F21666{T <: Base.ArithmeticStyle}
@@ -3189,21 +3250,21 @@ end
 
 @testset "Wrapping Memory into Arrays" begin
     mem = Memory{Int}(undef, 10) .= 1
-    memref = MemoryRef(mem)
-    @test_throws DimensionMismatch wrap(Array, mem, (10, 10))
-    @test wrap(Array, mem, (5,)) == ones(Int, 5)
-    @test wrap(Array, mem, 2) == ones(Int, 2)
-    @test wrap(Array, memref, 10) == ones(Int, 10)
-    @test wrap(Array, memref, (2,2,2)) == ones(Int,2,2,2)
-    @test wrap(Array, mem, (5, 2)) == ones(Int, 5, 2)
-
-    memref2 = MemoryRef(mem, 3)
-    @test wrap(Array, memref2, (5,)) == ones(Int, 5)
-    @test wrap(Array, memref2, 2) == ones(Int, 2)
-    @test wrap(Array, memref2, (2,2,2)) == ones(Int,2,2,2)
-    @test wrap(Array, memref2, (3, 2)) == ones(Int, 3, 2)
-    @test_throws DimensionMismatch wrap(Array, memref2, 9)
-    @test_throws DimensionMismatch wrap(Array, memref2, 10)
+    memref = memoryref(mem)
+    @test_throws DimensionMismatch Base.wrap(Array, mem, (10, 10))
+    @test Base.wrap(Array, mem, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, mem, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref, 10) == ones(Int, 10)
+    @test Base.wrap(Array, memref, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, mem, (5, 2)) == ones(Int, 5, 2)
+
+    memref2 = memoryref(mem, 3)
+    @test Base.wrap(Array, memref2, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, memref2, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref2, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, memref2, (3, 2)) == ones(Int, 3, 2)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 9)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 10)
 end
 
 @testset "Memory size" begin
@@ -3214,3 +3275,9 @@ end
     @test size(mem, 2) == 1
     @test size(mem, 0x2) == 1
 end
+
+@testset "MemoryRef" begin
+    mem = Memory{Float32}(undef, 3)
+    ref = memoryref(mem, 2)
+    @test parent(ref) === mem
+end
diff --git a/test/atexit.jl b/test/atexit.jl
index 4a37d465f250b..08a8e0c4b46a2 100644
--- a/test/atexit.jl
+++ b/test/atexit.jl
@@ -220,7 +220,7 @@ using Test
                     # Block until the atexit hooks have all finished. We use a manual "spin
                     # lock" because task switch is disallowed inside the finalizer, below.
                     atexit_has_finished[] = 1
-                    while atexit_has_finished[] == 1 end
+                    while atexit_has_finished[] == 1; GC.safepoint(); end
                     try
                         # By the time this runs, all the atexit hooks will be done.
                         # So this will throw.
@@ -232,7 +232,7 @@ using Test
                         exit(22)
                     end
                 end
-                while atexit_has_finished[] == 0 end
+                while atexit_has_finished[] == 0; GC.safepoint(); end
             end
             # Finalizers run after the atexit hooks, so this blocks exit until the spawned
             # task above gets a chance to run.
@@ -241,7 +241,7 @@ using Test
                 # Allow the spawned task to finish
                 atexit_has_finished[] = 2
                 # Then spin forever to prevent exit.
-                while atexit_has_finished[] == 2 end
+                while atexit_has_finished[] == 2; GC.safepoint(); end
             end
             exit(0)
             """ => 22,
diff --git a/test/atomics.jl b/test/atomics.jl
index 0d6c841073ad5..adfe4c87138cd 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -91,6 +91,7 @@ primitive type Int24 <: Signed 24 end # integral padding
 Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
 Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16)
 Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b)
+Base.:(==)(x::PadIntA, b::Int) = x == PadIntA(b)
 Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b)
 Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b))
 Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")")
@@ -128,6 +129,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20))
 test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
 test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
 test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
+test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
 #FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
@@ -316,6 +318,8 @@ test_field_orderings(ARefxy{Any}(true, false), true, false)
 test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
 test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
 test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
+test_field_orderings(Complex{Rational{Integer}}(10, 30), Complex{Rational{Integer}}(20, 40))
 test_field_orderings(10.0, 20.0)
 test_field_orderings(NaN, Inf)
 
@@ -423,6 +427,92 @@ let a = ARefxy(1, -1)
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
 end
 
+function _test_atomic_get_set_swap_modify(T, x, y, z)
+    @testset "atomic get,set,swap,modify" begin
+        mem = AtomicMemory{T}(undef, 2)
+        @test_throws CanonicalIndexError mem[1] = 3
+
+        @test Base.setindex_atomic!(mem, Base.default_access_order(mem), x, 1) == x
+        @test mem[1] == x
+        @test Base.setindex_atomic!(mem, Base.default_access_order(mem), y, 2) == y
+        @test mem[2] == y
+
+        idx = UInt32(2)
+
+        @test (@atomic mem[1]) == x
+        @test (@atomic mem[idx]) == y
+
+        (old, new) = (mem[idx], z)
+        # old and new are intentionally of different types to test inner conversion
+        @test (@atomic mem[idx] = new) == new
+        @test mem[idx] == new
+        @atomic mem[idx] = old
+
+        @test (@atomicswap mem[idx] = new) == old
+        @test mem[idx] == new
+        @atomic mem[idx] = old
+
+        try
+            old + new
+            @test (@atomic mem[idx] += new) == old + new
+            @test mem[idx] == old + new
+            @atomic mem[idx] = old
+        catch err
+            if !(err isa MethodError)
+                rethrow(err)
+            end
+        end
+    end
+end
+
+function _test_atomic_setonce_replace(T, initial, desired)
+    @testset "atomic setonce,replace" begin
+        mem = AtomicMemory{T}(undef, 2)
+        if isassigned(mem, 2)
+            @test (@atomiconce mem[2] = initial) == false
+            @atomic mem[2] = initial
+        else
+            @test (@atomiconce mem[2] = initial) == true
+            @test mem[2] == initial
+            @test (@atomiconce mem[2] = desired) == false
+            @test mem[2] == initial
+            @test !isassigned(mem, 1)
+        end
+
+        idx = UInt(2)
+
+        expected = @atomic mem[idx]
+        @test (@atomicreplace mem[idx] expected => desired) == (old=expected, success=true)
+        @test mem[idx] == desired
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] desired => desired) == (old=expected, success=false)
+        @test mem[idx] == expected
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] Pair(expected, desired)) == (old=expected, success=true)
+        @test mem[idx] == desired
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] Pair(desired, desired)) == (old=initial, success=false)
+        @test mem[idx] == expected
+    end
+end
+@testset "@atomic with AtomicMemory" begin
+
+    _test_atomic_get_set_swap_modify(Float64, rand(), rand(), 10)
+    _test_atomic_get_set_swap_modify(PadIntA, 123_1, 123_2, 10)
+    _test_atomic_get_set_swap_modify(Union{Nothing,Int}, 123_1, nothing, 10)
+    _test_atomic_get_set_swap_modify(Union{Nothing,Int}, 123_1, 234_5, 10)
+    _test_atomic_get_set_swap_modify(Vector{BigInt}, BigInt[1, 2, 3], BigInt[1, 2], [2, 4])
+
+    _test_atomic_setonce_replace(Float64, rand(), 42)
+    _test_atomic_setonce_replace(PadIntA, 123_1, 123_2)
+    _test_atomic_setonce_replace(Union{Nothing,Int}, 123_1, nothing)
+    _test_atomic_setonce_replace(Vector{BigInt}, BigInt[1, 2], [3, 4])
+    _test_atomic_setonce_replace(String, "abc", "cab")
+end
+
 let a = ARefxy{Union{Nothing,Integer}}()
     @test_throws ConcurrencyViolationError @atomiconce :not_atomic a.x = 2
     @test true === @atomiconce a.x = 1
@@ -481,6 +571,7 @@ test_global_operators(Any)
 test_global_operators(Union{Nothing,Int})
 test_global_operators(Complex{Int32})
 test_global_operators(Complex{Int128})
+test_global_operators(Complex{Real})
 test_global_operators(PadIntA)
 test_global_operators(PadIntB)
 #FIXME: test_global_operators(Int24)
@@ -604,6 +695,7 @@ test_global_orderings(Any, true, false)
 test_global_orderings(Union{Nothing,Missing}, nothing, missing)
 test_global_orderings(Union{Nothing,Int}, nothing, 123_1)
 test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40))
 test_global_orderings(Float64, 10.0, 20.0)
 test_global_orderings(Float64, NaN, Inf)
 
@@ -757,6 +849,7 @@ test_memory_operators(Any)
 test_memory_operators(Union{Nothing,Int})
 test_memory_operators(Complex{Int32})
 test_memory_operators(Complex{Int128})
+test_memory_operators(Complex{Real})
 test_memory_operators(PadIntA)
 test_memory_operators(PadIntB)
 #FIXME: test_memory_operators(Int24)
@@ -944,6 +1037,7 @@ test_memory_orderings(Any, true, false)
 test_memory_orderings(Union{Nothing,Missing}, nothing, missing)
 test_memory_orderings(Union{Nothing,Int}, nothing, 123_1)
 test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
 test_memory_orderings(10.0, 20.0)
 test_memory_orderings(NaN, Inf)
 
diff --git a/test/backtrace.jl b/test/backtrace.jl
index 50a50100488c4..68873678df57b 100644
--- a/test/backtrace.jl
+++ b/test/backtrace.jl
@@ -202,6 +202,15 @@ let trace = try
     end
     @test trace[1].func === Symbol("top-level scope")
 end
+let trace = try
+        eval(Expr(:toplevel, LineNumberNode(3, :a_filename), Expr(:error, 1)))
+    catch
+        stacktrace(catch_backtrace())
+    end
+    @test trace[1].func === Symbol("top-level scope")
+    @test trace[1].file === :a_filename
+    @test trace[1].line == 3
+end
 let trace = try
         include_string(@__MODULE__,
             """
@@ -253,10 +262,14 @@ let code = """
                   if ip isa Base.InterpreterIP && ip.code isa Core.MethodInstance]
     num_fs = sum(meth_names .== :f29695)
     num_gs = sum(meth_names .== :g29695)
-    print(num_fs, ' ', num_gs)
+    if num_fs != 1000 || num_gs != 1000
+        Base.show_backtrace(stderr, bt)
+        error("Expected 1000 frames each, got \$num_fs, \$num_fs")
+    end
+    exit()
     """
 
-    @test read(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`, String) == "1000 1000"
+    @test success(pipeline(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`; stderr))
 end
 
 # Test that modules make it into InterpreterIP for top-level code
diff --git a/test/bitarray.jl b/test/bitarray.jl
index 056a201bd4f6f..67d8fae0eda6d 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -3,6 +3,9 @@
 using Base: findprevnot, findnextnot
 using Random, LinearAlgebra, Test
 
+isdefined(Main, :SizedArrays) || @eval Main include("testhelpers/SizedArrays.jl")
+using .Main.SizedArrays
+
 tc(r1::NTuple{N,Any}, r2::NTuple{N,Any}) where {N} = all(x->tc(x...), [zip(r1,r2)...])
 tc(r1::BitArray{N}, r2::Union{BitArray{N},Array{Bool,N}}) where {N} = true
 tc(r1::SubArray{Bool,N1,BitArray{N2}}, r2::SubArray{Bool,N1,<:Union{BitArray{N2},Array{Bool,N2}}}) where {N1,N2} = true
@@ -82,6 +85,25 @@ allsizes = [((), BitArray{0}), ((v1,), BitVector),
     @test !isassigned(b, length(b) + 1)
 end
 
+@testset "trues and falses with custom axes" begin
+    for ax in ((SizedArrays.SOneTo(2),), (SizedArrays.SOneTo(2), Base.OneTo(2)))
+        t = trues(ax)
+        if all(x -> x isa SizedArrays.SOneTo, ax)
+            @test t isa SizedArrays.SizedArray && parent(t) isa BitArray
+        else
+            @test t isa BitArray
+        end
+        @test all(t)
+
+        f = falses(ax)
+        if all(x -> x isa SizedArrays.SOneTo, ax)
+            @test t isa SizedArrays.SizedArray && parent(t) isa BitArray
+        else
+            @test t isa BitArray
+        end
+        @test !any(f)
+    end
+end
 
 @testset "Conversions for size $sz" for (sz, T) in allsizes
     b1 = rand!(falses(sz...))
@@ -1335,11 +1357,11 @@ timesofar("find")
     @test findprev(b1, 777)  == findprevnot(b2, 777)  == findprev(!, b2, 777)  == 777
     @test findprev(b1, 776)  == findprevnot(b2, 776)  == findprev(!, b2, 776)  == 77
     @test findprev(b1, 77)   == findprevnot(b2, 77)   == findprev(!, b2, 77)   == 77
-    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   == nothing
-    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   == nothing
-    @test findprev(identity, b1, -1) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findprev(Returns(true), b1, -1) == nothing
+    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   === nothing
+    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   === nothing
+    @test findprev(identity, b1, -1) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findprev(Returns(true), b1, -1) === nothing
     @test_throws BoundsError findnext(b1, -1)
     @test_throws BoundsError findnextnot(b2, -1)
     @test_throws BoundsError findnext(!, b2, -1)
@@ -1350,28 +1372,28 @@ timesofar("find")
     @test findnext(b1, 77)   == findnextnot(b2, 77)   == findnext(!, b2, 77)   == 77
     @test findnext(b1, 78)   == findnextnot(b2, 78)   == findnext(!, b2, 78)   == 777
     @test findnext(b1, 777)  == findnextnot(b2, 777)  == findnext(!, b2, 777)  == 777
-    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  == nothing
-    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) == nothing
-    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) == nothing
+    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  === nothing
+    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) === nothing
+    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) === nothing
 
     @test findlast(b1) == Base.findlastnot(b2) == 777
     @test findfirst(b1) == Base.findfirstnot(b2) == 77
 
     b0 = BitVector()
-    @test findprev(Returns(true), b0, -1) == nothing
+    @test findprev(Returns(true), b0, -1) === nothing
     @test_throws BoundsError findprev(Returns(true), b0, 1)
     @test_throws BoundsError findnext(Returns(true), b0, -1)
-    @test findnext(Returns(true), b0, 1) == nothing
+    @test findnext(Returns(true), b0, 1) === nothing
 
     b1 = falses(10)
     @test findprev(Returns(true), b1, 5) == 5
     @test findnext(Returns(true), b1, 5) == 5
-    @test findprev(Returns(true), b1, -1) == nothing
-    @test findnext(Returns(true), b1, 11) == nothing
-    @test findprev(Returns(false), b1, 5) == nothing
-    @test findnext(Returns(false), b1, 5) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findnext(Returns(false), b1, 11) == nothing
+    @test findprev(Returns(true), b1, -1) === nothing
+    @test findnext(Returns(true), b1, 11) === nothing
+    @test findprev(Returns(false), b1, 5) === nothing
+    @test findnext(Returns(false), b1, 5) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findnext(Returns(false), b1, 11) === nothing
     @test_throws BoundsError findprev(Returns(true), b1, 11)
     @test_throws BoundsError findnext(Returns(true), b1, -1)
 
@@ -1393,7 +1415,7 @@ timesofar("find")
     for l = [1, 63, 64, 65, 127, 128, 129]
         f = falses(l)
         t = trues(l)
-        @test findprev(f, l) == findprevnot(t, l) == nothing
+        @test findprev(f, l) == findprevnot(t, l) === nothing
         @test findprev(t, l) == findprevnot(f, l) == l
         b1 = falses(l)
         b1[end] = true
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 2ae3d96b2a709..b2232258744ac 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -808,8 +808,8 @@ let
 end
 
 let
-  bc = Broadcasted(+, (Broadcasted(*, ([1, 2, 3], 4)), 5))
-  @test isbits(Broadcast.flatten(bc).f)
+    bc = Broadcasted(+, (Broadcasted(*, ([1, 2, 3], 4)), 5))
+    @test isbits(Broadcast.flatten(bc).f)
 end
 
 # Issue #26127: multiple splats in a fused dot-expression
@@ -979,6 +979,10 @@ end
     @test sum(bc, dims=1, init=0) == [5]
     bc = Broadcast.instantiate(Broadcast.broadcasted(*, ['a','b'], 'c'))
     @test prod(bc, dims=1, init="") == ["acbc"]
+
+    a = rand(-10:10,32,4); b = rand(-10:10,32,4)
+    bc = Broadcast.instantiate(Broadcast.broadcasted(+,a,b))
+    @test sum(bc; dims = 1, init = 0.0) == sum(collect(bc); dims = 1, init = 0.0)
 end
 
 # treat Pair as scalar:
diff --git a/test/buildkitetestjson.jl b/test/buildkitetestjson.jl
index 8410913e48ef2..49c47e0d8f151 100644
--- a/test/buildkitetestjson.jl
+++ b/test/buildkitetestjson.jl
@@ -1,9 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Convert test(set) results to a Buildkit-compatible JSON representation.
+# Convert test(set) results to a Buildkite-compatible JSON representation.
 # Based on <https://buildkite.com/docs/test-analytics/importing-json#json-test-results-data-reference>.
 
-module BuildKiteTestJSON
+module BuildkiteTestJSON
 
 using Test
 using Dates
@@ -105,9 +105,9 @@ function add_failure_info!(data::Dict{String, Any}, result::Test.Result)
         data["failure_reason"] = if result.test_type === :test_error
             if occursin("\nStacktrace:\n", result.backtrace)
                 err, trace = split(result.backtrace, "\nStacktrace:\n", limit=2)
-                data["failure_expanded"] = Dict{String, Any}(
-                    "expanded" => split(err, '\n'),
-                    "backtrace" => split(trace, '\n'))
+                data["failure_expanded"] =
+                    [Dict{String,Any}("expanded" => split(err, '\n'),
+                                      "backtrace" => split(trace, '\n'))]
             end
             "Exception (unexpectedly) thrown during test"
         elseif result.test_type === :test_nonbool
diff --git a/test/ccall.jl b/test/ccall.jl
index 3647173eb9290..b10504de21abc 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -1934,3 +1934,35 @@ end
     end
     @test_throws "could not load symbol \"test\"" somefunction_not_found_libc()
 end
+
+# issue #52025
+@test Base.unsafe_convert(Ptr{Ptr{Cchar}}, Base.cconvert(Ptr{Ptr{Cchar}}, map(pointer, ["ab"]))) isa Ptr{Ptr{Cchar}}
+#issue #54725
+for A in (reinterpret(UInt, [0]), reshape([0, 0], 1, 2))
+    @test pointer(A) == Base.unsafe_convert(Ptr{Cvoid}, A) == Base.unsafe_convert(Ptr{Int}, A)
+end
+# Cglobal with non-static symbols doesn't error
+function cglobal_non_static1()
+    sym = (:global_var, libccalltest)
+    cglobal(sym)
+end
+global the_sym = (:global_var, libccalltest)
+cglobal_non_static2() = cglobal(the_sym)
+
+@test isa(cglobal_non_static1(), Ptr)
+@test isa(cglobal_non_static2(), Ptr)
+
+@generated function generated_world_counter()
+    return :($(Base.get_world_counter()))
+end
+function world_counter()
+    return Base.get_world_counter()
+end
+let llvm = sprint(code_llvm, world_counter, ())
+    # check that we got a reasonable value for the world age
+    @test (world_counter() != 0) && (world_counter() != -1)
+    # no call to the runtime should be left over
+    @test !occursin("call i64", llvm)
+    # the world age should be -1 in generated functions (or other pure contexts)
+    @test (generated_world_counter() == reinterpret(UInt, -1))
+end
diff --git a/test/channels.jl b/test/channels.jl
index 25b5185c1a336..eed7a7ecc0566 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -12,6 +12,9 @@ using Base: n_avail
     end
     @test wait(a) == "success"
     @test fetch(t) == "finished"
+
+    # Test printing
+    @test repr(a) == "Condition()"
 end
 
 @testset "wait first behavior of wait on Condition" begin
@@ -382,7 +385,7 @@ end
         """error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")""", output))
     # test for invalid state in Workqueue during yield
     t = @async nothing
-    t._state = 66
+    @atomic t._state = 66
     newstderr = redirect_stderr()
     try
         errstream = @async read(newstderr[1], String)
@@ -500,7 +503,7 @@ end
     c = Channel(1)
     close(c)
     @test !isopen(c)
-    c.excp == nothing # to trigger the branch
+    c.excp === nothing # to trigger the branch
     @test_throws InvalidStateException Base.check_channel_state(c)
 end
 
@@ -638,3 +641,11 @@ end
         @test n_avail(c) == 0
     end
 end
+
+@testset "Task properties" begin
+    f() = rand(2,2)
+    t = Task(f)
+    message = "Querying a Task's `scope` field is disallowed.\nThe private `Core.current_scope()` function is better, though still an implementation detail."
+    @test_throws ErrorException(message) t.scope
+    @test t.state == :runnable
+end
diff --git a/test/char.jl b/test/char.jl
index 1d3579013ad18..3100add0e81c5 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -121,7 +121,7 @@ end
     #iterate(c::Char)
     for x in testarrays
         @test iterate(x)[1] == x
-        @test iterate(x, iterate(x)[2]) == nothing
+        @test iterate(x, iterate(x)[2]) === nothing
     end
 
     #isless(x::Char, y::Integer) = isless(UInt32(x), y)
@@ -360,3 +360,31 @@ end
     @test Base.IteratorSize(Char) == Base.HasShape{0}()
     @test convert(ASCIIChar, 1) == Char(1)
 end
+
+@testset "foldable functions" begin
+    v = @inferred (() -> Val(isuppercase('C')))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(islowercase('C')))()
+    @test v isa Val{false}
+
+    v = @inferred (() -> Val(isletter('C')))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(isnumeric('C')))()
+    @test v isa Val{false}
+
+    struct MyChar <: AbstractChar
+        x :: Char
+    end
+    Base.codepoint(m::MyChar) = codepoint(m.x)
+    MyChar(x::UInt32) = MyChar(Char(x))
+
+    v = @inferred (() -> Val(isuppercase(MyChar('C'))))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(islowercase(MyChar('C'))))()
+    @test v isa Val{false}
+
+    v = @inferred (() -> Val(isletter(MyChar('C'))))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(isnumeric(MyChar('C'))))()
+    @test v isa Val{false}
+end
diff --git a/test/checked.jl b/test/checked.jl
index 7ee28186f9783..4031918a38730 100644
--- a/test/checked.jl
+++ b/test/checked.jl
@@ -330,6 +330,9 @@ end
 
     @test checked_pow(BigInt(2), 2) == BigInt(4)
     @test checked_pow(BigInt(2), 100) == BigInt(1267650600228229401496703205376)
+
+    # Perf test: Make sure BigInts allocs don't scale with the power:
+    @test @allocations(checked_pow(BigInt(2), 2)) ≈ @allocations(checked_pow(BigInt(2), 10000)) rtol=0.9
 end
 
 @testset "Additional tests" begin
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 36da1ab39bc57..96d230d185c71 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -29,7 +29,7 @@ const TESTNAMES = [
         "channels", "iostream", "secretbuffer", "specificity",
         "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
         "smallarrayshrink", "opaque_closure", "filesystem", "download",
-        "scopedvalues",
+        "scopedvalues", "compileall"
 ]
 
 const INTERNET_REQUIRED_LIST = [
@@ -155,7 +155,7 @@ function choosetests(choices = [])
     # do subarray before sparse but after linalg
     filtertests!(tests, "subarray")
     filtertests!(tests, "compiler", [
-        "compiler/datastructures", "compiler/inference", "compiler/effects",
+        "compiler/datastructures", "compiler/inference", "compiler/effects", "compiler/compact",
         "compiler/validation", "compiler/ssair", "compiler/irpasses", "compiler/tarjan",
         "compiler/codegen", "compiler/inline", "compiler/contextual", "compiler/invalidation",
         "compiler/AbstractInterpreter", "compiler/EscapeAnalysis/EscapeAnalysis"])
diff --git a/test/clangsa/GCPushPop.cpp b/test/clangsa/GCPushPop.cpp
index 72e0494a7d936..6736d3e181118 100644
--- a/test/clangsa/GCPushPop.cpp
+++ b/test/clangsa/GCPushPop.cpp
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
 
 #include "julia.h"
 #include <string>
diff --git a/test/clangsa/ImplicitAtomicsTest.c b/test/clangsa/ImplicitAtomicsTest.c
index 87154347d9757..cfac3f38e679a 100644
--- a/test/clangsa/ImplicitAtomicsTest.c
+++ b/test/clangsa/ImplicitAtomicsTest.c
@@ -1,7 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
-// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
 
 #include "julia_atomics.h"
 
diff --git a/test/clangsa/Makefile b/test/clangsa/Makefile
index 3bebd45c9a5a6..609809884fce1 100644
--- a/test/clangsa/Makefile
+++ b/test/clangsa/Makefile
@@ -13,7 +13,7 @@ TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.c) $(wildcard $(SRCDIR)/
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
 	LD_LIBRARY_PATH="${build_libdir}:$$LD_LIBRARY_PATH" \
 	CLANGSA_FLAGS="${CLANGSA_FLAGS}" \
-	CLANGSACXX_FLAGS="${CLANGSACXX_FLAGS}" \
+	CLANGSA_CXXFLAGS="${CLANGSA_CXXFLAGS}" \
 	CPPFLAGS_FLAGS="${CPPFLAGS_FLAGS}" \
 	CFLAGS_FLAGS="${CFLAGS_FLAGS}" \
 	CXXFLAGS_FLAGS="${CXXFLAGS_FLAGS}" \
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index b71b2571e8a3e..f402dc30eb33e 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
 
 #include "julia.h"
 #include "julia_internal.h"
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 498254bb26ce0..c6720e23739d8 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -257,6 +257,10 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test expanded == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@foo", "HOME" => homedir()))
     end
 
+    # handling of `@temp` in --project and JULIA_PROJECT
+    @test tempdir() == readchomp(`$exename --project=@temp -e 'println(Base.active_project())'`)[1:lastindex(tempdir())]
+    @test tempdir() == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@temp", "HOME" => homedir()))[1:lastindex(tempdir())]
+
     # --quiet, --banner
     let p = "print((Base.JLOptions().quiet, Base.JLOptions().banner))"
         @test read(`$exename                   -e $p`, String) == "(0, -1)"
@@ -335,43 +339,37 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test errors_not_signals(`$exename -C invalidtarget`)
     @test errors_not_signals(`$exename --cpu-target=invalidtarget`)
 
-    if Sys.iswindows()
-        # -t, --threads
-        code = "print(Threads.threadpoolsize())"
-        cpu_threads = ccall(:jl_effective_threads, Int32, ())
-        @test string(cpu_threads) ==
-            read(`$exename --threads auto -e $code`, String) ==
-            read(`$exename --threads=auto -e $code`, String) ==
-            read(`$exename -tauto -e $code`, String) ==
-            read(`$exename -t auto -e $code`, String)
-        for nt in (nothing, "1")
-            withenv("JULIA_NUM_THREADS" => nt) do
-                @test read(`$exename --threads=2 -e $code`, String) ==
-                    read(`$exename -t 2 -e $code`, String) == "2"
-            end
-        end
-        # We want to test oversubscription, but on manycore machines, this can
-        # actually exhaust limited PID spaces
-        cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
-        if Sys.WORD_SIZE == 32
-            cpu_threads = min(cpu_threads, 50)
-        end
-        @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
-        withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
-            @test read(`$exename -e $code`, String) == string(cpu_threads)
+    # -t, --threads
+    code = "print(Threads.threadpoolsize())"
+    cpu_threads = ccall(:jl_effective_threads, Int32, ())
+    @test string(cpu_threads) ==
+        read(`$exename --threads auto -e $code`, String) ==
+        read(`$exename --threads=auto -e $code`, String) ==
+        read(`$exename -tauto -e $code`, String) ==
+        read(`$exename -t auto -e $code`, String)
+    for nt in (nothing, "1")
+        withenv("JULIA_NUM_THREADS" => nt) do
+            @test read(`$exename --threads=2 -e $code`, String) ==
+                read(`$exename -t 2 -e $code`, String) == "2"
         end
-        @test errors_not_signals(`$exename -t 0`)
-        @test errors_not_signals(`$exename -t -1`)
+    end
+    # We want to test oversubscription, but on manycore machines, this can
+    # actually exhaust limited PID spaces
+    cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
+    if Sys.WORD_SIZE == 32
+        cpu_threads = min(cpu_threads, 50)
+    end
+    @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
+    withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
+        @test read(`$exename -e $code`, String) == string(cpu_threads)
+    end
+    @test errors_not_signals(`$exename -t 0`)
+    @test errors_not_signals(`$exename -t -1`)
 
-        # Combining --threads and --procs: --threads does propagate
-        withenv("JULIA_NUM_THREADS" => nothing) do
-            code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
-            @test read(`$exename -p2 -t2 -e $code`, String) == "6"
-        end
-    else
-        @test_skip "Command line tests with -t are flakey on non-Windows OS"
-        # Known issue: https://github.com/JuliaLang/julia/issues/49154
-        # These tests should be fixed and reenabled on all operating systems.
+    # Combining --threads and --procs: --threads does propagate
+    withenv("JULIA_NUM_THREADS" => nothing) do
+        code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
+        @test read(`$exename -p2 -t2 -e $code`, String) == "6"
     end
 
     # Combining --threads and invalid -C should yield a decent error
@@ -388,7 +386,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # --gcthreads
     code = "print(Threads.ngcthreads())"
     cpu_threads = ccall(:jl_effective_threads, Int32, ())
-    @test (cpu_threads == 1 ? "1" : string(div(cpu_threads, 2))) ==
+    @test string(cpu_threads) ==
           read(`$exename --threads auto -e $code`, String) ==
           read(`$exename --threads=auto -e $code`, String) ==
           read(`$exename -tauto -e $code`, String) ==
@@ -429,9 +427,30 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename -E "isinteractive()" -i`) == "true"
 
     # --color
-    @test readchomp(`$exename --color=yes -E "Base.have_color"`) == "true"
-    @test readchomp(`$exename --color=no -E "Base.have_color"`) == "false"
-    @test errors_not_signals(`$exename --color=false`)
+    function color_cmd(; flag, no_color=nothing, force_color=nothing)
+        cmd = `$exename --color=$flag -E "Base.have_color"`
+        return addenv(cmd, "NO_COLOR" => no_color, "FORCE_COLOR" => force_color)
+    end
+
+    @test readchomp(color_cmd(flag="auto")) == "nothing"
+    @test readchomp(color_cmd(flag="no")) == "false"
+    @test readchomp(color_cmd(flag="yes")) == "true"
+    @test errors_not_signals(color_cmd(flag="false"))
+    @test errors_not_signals(color_cmd(flag="true"))
+
+    @test readchomp(color_cmd(flag="auto", no_color="")) == "nothing"
+    @test readchomp(color_cmd(flag="auto", no_color="1")) == "false"
+    @test readchomp(color_cmd(flag="no", no_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", no_color="1")) == "true"
+
+    @test readchomp(color_cmd(flag="auto", force_color="")) == "nothing"
+    @test readchomp(color_cmd(flag="auto", force_color="1")) == "true"
+    @test readchomp(color_cmd(flag="no", force_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", force_color="1")) == "true"
+
+    @test readchomp(color_cmd(flag="auto", no_color="1", force_color="1")) == "true"
+    @test readchomp(color_cmd(flag="no", no_color="1", force_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", no_color="1", force_color="1")) == "true"
 
     # --history-file
     @test readchomp(`$exename -E "Bool(Base.JLOptions().historyfile)"
@@ -444,9 +463,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     mktempdir() do dir
         helperdir = joinpath(@__DIR__, "testhelpers")
         inputfile = joinpath(helperdir, "coverage_file.jl")
-        expected = replace(read(joinpath(helperdir, "coverage_file.info.bad"), String),
-            "<FILENAME>" => realpath(inputfile))
-        expected_good = replace(read(joinpath(helperdir, "coverage_file.info"), String),
+        expected = replace(read(joinpath(helperdir, "coverage_file.info"), String),
             "<FILENAME>" => realpath(inputfile))
         covfile = replace(joinpath(dir, "coverage.info"), "%" => "%%")
         @test !isfile(covfile)
@@ -464,21 +481,18 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
         @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=user`) == "1"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
         @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=all`) == "2"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in specific file
         tfile = realpath(inputfile)
@@ -488,7 +502,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in directory
         tdir = dirname(realpath(inputfile))
@@ -498,7 +511,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in current directory
         tdir = dirname(realpath(inputfile))
@@ -511,7 +523,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in relative directory
         tdir = dirname(realpath(inputfile))
@@ -523,7 +534,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in relative directory with dot-dot notation
         tdir = dirname(realpath(inputfile))
@@ -535,7 +545,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in a different directory
         tdir = mktempdir() # a dir that contains no code
@@ -570,15 +579,14 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             end
             do_test()
             """), """
-            DA:1,1
             DA:2,1
             DA:3,1
             DA:5,1
             DA:6,0
             DA:9,1
             DA:10,1
-            LH:6
-            LF:7
+            LH:5
+            LF:6
             """)
         @test contains(coverage_info_for("""
             function cov_bug()
@@ -677,7 +685,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test occursin("llvm.dbg.cu", code)
-                @test occursin("int.jl", code)
+                # TODO: consider moving test to llvmpasses as this fails on some platforms
+                # without clear reason
+                @test_skip occursin("int.jl", code)
                 @test !occursin("name: \"Int64\"", code)
             end
             let code = readchomperrors(`$exename -g2 -E "@eval Int64(1)+Int64(1)"`)
@@ -685,7 +695,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test occursin("llvm.dbg.cu", code)
-                @test occursin("int.jl", code)
+                # TODO: consider moving test to llvmpasses as this fails on some platforms
+                # without clear reason
+                @test_skip occursin("int.jl", code)
                 @test occursin("name: \"Int64\"", code)
             end
         end
@@ -775,6 +787,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tested in test/parallel.jl)
     @test errors_not_signals(`$exename --worker=true`)
 
+    # --trace-compile-timing
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr --trace-compile-timing -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin(" ms =# precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
     # test passing arguments
     mktempdir() do dir
         testfile, io = mktemp(dir)
@@ -1131,14 +1154,14 @@ end
 ## `Main.main` entrypoint
 
 # Basic usage
-@test readchomp(`$(Base.julia_cmd()) -e '(@main)(ARGS) = println("hello")'`) == "hello"
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(args) = println("hello")'`) == "hello"
 
 # Test ARGS with -e
-@test readchomp(`$(Base.julia_cmd()) -e '(@main)(ARGS) = println(ARGS)' a b`) == repr(["a", "b"])
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(args) = println(args)' a b`) == repr(["a", "b"])
 
 # Test import from module
-@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(ARGS) = println("hello"); end; using .Hello'`) == "hello"
-@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(ARGS) = println("hello"); end; import .Hello'`) == ""
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(args) = println("hello"); end; using .Hello'`) == "hello"
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(args) = println("hello"); end; import .Hello'`) == ""
 
 # test --bug-report=rr
 if Sys.islinux() && Sys.ARCH in (:i686, :x86_64) # rr is only available on these platforms
diff --git a/test/combinatorics.jl b/test/combinatorics.jl
index 862e3bfa37e1e..527bd86963a6f 100644
--- a/test/combinatorics.jl
+++ b/test/combinatorics.jl
@@ -77,17 +77,12 @@ end
 end
 
 @testset "factorial" begin
-    @test factorial(7) == 5040
-    @test factorial(Int8(7)) == 5040
-    @test factorial(UInt8(7)) == 5040
-    @test factorial(Int16(7)) == 5040
-    @test factorial(UInt16(7)) == 5040
-    @test factorial(Int32(7)) == 5040
-    @test factorial(UInt32(7)) == 5040
-    @test factorial(Int64(7)) == 5040
-    @test factorial(UInt64(7)) == 5040
-    @test factorial(Int128(7)) == 5040
-    @test factorial(UInt128(7)) == 5040
+    for T = Base.uniontypes(Union{Base.Checked.SignedInt,Base.Checked.UnsignedInt})
+        @testset let T = T
+            @test factorial(T(7)) == 5040
+            @test Core.Compiler.is_foldable(Base.infer_effects(factorial, (T,)))
+        end
+    end
     @test factorial(0) == 1
     @test_throws DomainError factorial(-1)
     @test factorial(Int64(20)) == 2432902008176640000
diff --git a/test/compileall.jl b/test/compileall.jl
new file mode 100644
index 0000000000000..beec0d6df49ab
--- /dev/null
+++ b/test/compileall.jl
@@ -0,0 +1,11 @@
+# This test builds a full system image, so it can take a little while.
+# We make it a separate test target here, so that it can run in parallel
+# with the rest of the tests.
+
+mktempdir() do dir
+    @test success(pipeline(`$(Base.julia_cmd()) --compile=all --strip-ir --output-o $(dir)/sys.o.a -e 'exit()'`, stderr=stderr)) skip=(Sys.WORD_SIZE == 32)
+    if isfile(joinpath(dir, "sys.o.a"))
+        Base.Linking.link_image(joinpath(dir, "sys.o.a"), joinpath(dir, "sys.so"))
+        @test success(`$(Base.julia_cmd()) -J $(dir)/sys.so -e 'Base.scrub_repl_backtrace(nothing); exit()'`)
+    end
+end
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index 80288bac486ff..d95354cefa80c 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -6,11 +6,22 @@ const CC = Core.Compiler
 include("irutils.jl")
 include("newinterp.jl")
 
+# interpreter that performs abstract interpretation only
+# (semi-concrete interpretation should be disabled automatically)
+@newinterp AbsIntOnlyInterp1
+CC.may_optimize(::AbsIntOnlyInterp1) = false
+@test Base.infer_return_type(Base.init_stdio, (Ptr{Cvoid},); interp=AbsIntOnlyInterp1()) >: IO
+
+# it should work even if the interpreter discards inferred source entirely
+@newinterp AbsIntOnlyInterp2
+CC.may_optimize(::AbsIntOnlyInterp2) = false
+CC.transform_result_for_cache(::AbsIntOnlyInterp2, ::Core.MethodInstance, ::CC.WorldRange, ::CC.InferenceResult) = nothing
+@test Base.infer_return_type(Base.init_stdio, (Ptr{Cvoid},); interp=AbsIntOnlyInterp2()) >: IO
 
 # OverlayMethodTable
 # ==================
 
-using Base.Experimental: @MethodTable, @overlay
+using Base.Experimental: @MethodTable, @overlay, @consistent_overlay
 
 # @overlay method with return type annotation
 @MethodTable RT_METHOD_DEF
@@ -20,8 +31,8 @@ using Base.Experimental: @MethodTable, @overlay
 end
 
 @newinterp MTOverlayInterp
-@MethodTable OverlayedMT
-CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_inference_world(interp), OverlayedMT)
+@MethodTable OVERLAY_MT
+CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_inference_world(interp), OVERLAY_MT)
 
 function CC.add_remark!(interp::MTOverlayInterp, ::CC.InferenceState, remark)
     if interp.meta !== nothing
@@ -31,8 +42,10 @@ function CC.add_remark!(interp::MTOverlayInterp, ::CC.InferenceState, remark)
     return nothing
 end
 
+struct StrangeSinError end
 strangesin(x) = sin(x)
-@overlay OverlayedMT strangesin(x::Float64) = iszero(x) ? nothing : cos(x)
+@overlay OVERLAY_MT strangesin(x::Float64) =
+    iszero(x) ? throw(StrangeSinError()) : x < 0 ? nothing : cos(x)
 
 # inference should use the overlayed method table
 @test Base.return_types((Float64,); interp=MTOverlayInterp()) do x
@@ -41,6 +54,11 @@ end |> only === Union{Float64,Nothing}
 @test Base.return_types((Any,); interp=MTOverlayInterp()) do x
     @invoke strangesin(x::Float64)
 end |> only === Union{Float64,Nothing}
+@test only(Base.return_types(strangesin, (Float64,); interp=MTOverlayInterp())) === Union{Float64,Nothing}
+@test Base.infer_exception_type(strangesin, (Float64,); interp=MTOverlayInterp()) === Union{StrangeSinError,DomainError}
+@test only(Base.infer_exception_types(strangesin, (Float64,); interp=MTOverlayInterp())) === Union{StrangeSinError,DomainError}
+@test last(only(code_typed(strangesin, (Float64,); interp=MTOverlayInterp()))) === Union{Float64,Nothing}
+@test last(only(Base.code_ircode(strangesin, (Float64,); interp=MTOverlayInterp()))) === Union{Float64,Nothing}
 
 # effect analysis should figure out that the overlayed method is used
 @test Base.infer_effects((Float64,); interp=MTOverlayInterp()) do x
@@ -84,7 +102,7 @@ end |> only === Float64
 
 # not fully covered overlay method match
 overlay_match(::Any) = nothing
-@overlay OverlayedMT overlay_match(::Int) = missing
+@overlay OVERLAY_MT overlay_match(::Int) = missing
 @test Base.return_types((Any,); interp=MTOverlayInterp()) do x
     overlay_match(x)
 end |> only === Union{Nothing,Missing}
@@ -116,12 +134,48 @@ Base.@assume_effects :total totalcall(f, args...) = f(args...)
     end
 end |> only === Nothing
 
+# override `:native_executable` to allow concrete-eval for overlay-ed methods
+function myfactorial(x::Int, raise)
+    res = 1
+    0 ≤ x < 20 || raise("x is too big")
+    Base.@assume_effects :terminates_locally while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+raise_on_gpu1(x) = error(x)
+@overlay OVERLAY_MT @noinline raise_on_gpu1(x) = #=do something with GPU=# error(x)
+raise_on_gpu2(x) = error(x)
+@consistent_overlay OVERLAY_MT @noinline raise_on_gpu2(x) = #=do something with GPU=# error(x)
+raise_on_gpu3(x) = error(x)
+@consistent_overlay OVERLAY_MT @noinline Base.@assume_effects :foldable raise_on_gpu3(x) = #=do something with GPU=# error_on_gpu(x)
+cpu_factorial(x::Int) = myfactorial(x, error)
+gpu_factorial1(x::Int) = myfactorial(x, raise_on_gpu1)
+gpu_factorial2(x::Int) = myfactorial(x, raise_on_gpu2)
+gpu_factorial3(x::Int) = myfactorial(x, raise_on_gpu3)
+
+@test Base.infer_effects(cpu_factorial, (Int,); interp=MTOverlayInterp()) |> Core.Compiler.is_nonoverlayed
+@test Base.infer_effects(gpu_factorial1, (Int,); interp=MTOverlayInterp()) |> !Core.Compiler.is_nonoverlayed
+@test Base.infer_effects(gpu_factorial2, (Int,); interp=MTOverlayInterp()) |> Core.Compiler.is_consistent_overlay
+let effects = Base.infer_effects(gpu_factorial3, (Int,); interp=MTOverlayInterp())
+    # check if `@consistent_overlay` together works with `@assume_effects`
+    # N.B. the overlaid `raise_on_gpu3` is not :foldable otherwise since `error_on_gpu` is (intetionally) undefined.
+    @test Core.Compiler.is_consistent_overlay(effects)
+    @test Core.Compiler.is_foldable(effects)
+end
+@test Base.infer_return_type(; interp=MTOverlayInterp()) do
+    Val(gpu_factorial2(3))
+end == Val{6}
+@test Base.infer_return_type(; interp=MTOverlayInterp()) do
+    Val(gpu_factorial3(3))
+end == Val{6}
+
 # GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
 # https://github.com/JuliaLang/julia/issues/48097
 @newinterp Issue48097Interp
-@MethodTable Issue48097MT
-CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_inference_world(interp), Issue48097MT)
-CC.InferenceParams(::Issue48097Interp) = CC.InferenceParams(; unoptimize_throw_blocks=false)
+@MethodTable ISSUE_48097_MT
+CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_inference_world(interp), ISSUE_48097_MT)
 function CC.concrete_eval_eligible(interp::Issue48097Interp,
     @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
     ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
@@ -132,37 +186,68 @@ function CC.concrete_eval_eligible(interp::Issue48097Interp,
     end
     return ret
 end
-@overlay Issue48097MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
+@overlay ISSUE_48097_MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
 issue48097(; kwargs...) = return 42
-@test_broken fully_eliminated(; interp=Issue48097Interp(), retval=42) do
+@test fully_eliminated(; interp=Issue48097Interp(), retval=42) do
     issue48097(; a=1f0, b=1.0)
 end
 
+# https://github.com/JuliaLang/julia/issues/52938
+@newinterp Issue52938Interp
+@MethodTable ISSUE_52938_MT
+CC.method_table(interp::Issue52938Interp) = CC.OverlayMethodTable(CC.get_inference_world(interp), ISSUE_52938_MT)
+inner52938(x, types::Type, args...; kwargs...) = x
+outer52938(x) = @inline inner52938(x, Tuple{}; foo=Ref(42), bar=1)
+@test fully_eliminated(outer52938, (Any,); interp=Issue52938Interp(), retval=Argument(2))
+
+# https://github.com/JuliaGPU/CUDA.jl/issues/2241
+@newinterp Cuda2241Interp
+@MethodTable CUDA_2241_MT
+CC.method_table(interp::Cuda2241Interp) = CC.OverlayMethodTable(CC.get_inference_world(interp), CUDA_2241_MT)
+inner2241(f, types::Type, args...; kwargs...) = nothing
+function outer2241(f)
+    @inline inner2241(f, Tuple{}; foo=Ref(42), bar=1)
+    return nothing
+end
+# NOTE CUDA.jl overlays `throw_boundserror` in a way that causes effects, but these effects
+#      are ignored for this call graph at the `@assume_effects` annotation on `typejoin`.
+#      Here it's important to use `@consistent_overlay` to avoid tainting the `:nonoverlayed` bit.
+const cuda_kernel_state = Ref{Any}()
+@consistent_overlay CUDA_2241_MT @inline Base.throw_boundserror(A, I) =
+    (cuda_kernel_state[] = (A, I); error())
+@test fully_eliminated(outer2241, (Nothing,); interp=Cuda2241Interp(), retval=nothing)
+
 # Should not concrete-eval overlayed methods in semi-concrete interpretation
 @newinterp OverlaySinInterp
-@MethodTable OverlaySinMT
-CC.method_table(interp::OverlaySinInterp) = CC.OverlayMethodTable(CC.get_inference_world(interp), OverlaySinMT)
+@MethodTable OVERLAY_SIN_MT
+CC.method_table(interp::OverlaySinInterp) = CC.OverlayMethodTable(CC.get_inference_world(interp), OVERLAY_SIN_MT)
 overlay_sin1(x) = error("Not supposed to be called.")
-@overlay OverlaySinMT overlay_sin1(x) = cos(x)
-@overlay OverlaySinMT Base.sin(x::Union{Float32,Float64}) = overlay_sin1(x)
-let oc = Base.code_ircode(; interp=OverlaySinInterp()) do
+@overlay OVERLAY_SIN_MT overlay_sin1(x) = cos(x)
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = overlay_sin1(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
         sin(0.)
-    end |> only |> first |> Core.OpaqueClosure
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
     @test oc() == cos(0.)
 end
-@overlay OverlaySinMT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin1(x)
-let oc = Base.code_ircode(; interp=OverlaySinInterp()) do
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin1(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
         sin(0.)
-    end |> only |> first |> Core.OpaqueClosure
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
     @test oc() == cos(0.)
 end
 _overlay_sin2(x) = error("Not supposed to be called.")
-@overlay OverlaySinMT _overlay_sin2(x) = cos(x)
+@overlay OVERLAY_SIN_MT _overlay_sin2(x) = cos(x)
 overlay_sin2(x) = _overlay_sin2(x)
-@overlay OverlaySinMT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin2(x)
-let oc = Base.code_ircode(; interp=OverlaySinInterp()) do
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin2(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
         sin(0.)
-    end |> only |> first |> Core.OpaqueClosure
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
     @test oc() == cos(0.)
 end
 
@@ -355,12 +440,15 @@ let src = code_typed1((Float64,Float64,Float64)) do x, y, z
     @test count(iscall((src, inlined_usually)), src.code) == 0
 end
 let NoinlineModule = Module()
+    OtherModule = Module()
+    main_func(x, y, z) = inlined_usually(x, y, z)
+    @eval NoinlineModule noinline_func(x, y, z) = $inlined_usually(x, y, z)
+    @eval OtherModule other_func(x, y, z) = $inlined_usually(x, y, z)
+
     interp = NoinlineInterpreter(Set((NoinlineModule,)))
 
     # this anonymous function's context is Main -- it should be inlined as usual
-    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
-            inlined_usually(x, y, z)
-        end
+    let src = code_typed1(main_func, (Float64,Float64,Float64); interp)
         @test count(isinvoke(:inlined_usually), src.code) == 0
         @test count(iscall((src, inlined_usually)), src.code) == 0
     end
@@ -369,26 +457,19 @@ let NoinlineModule = Module()
     method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
     mi = CC.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
     @test CC.haskey(CC.code_cache(interp), mi)
-    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
-            inlined_usually(x, y, z)
-        end
+    let src = code_typed1(main_func, (Float64,Float64,Float64); interp)
         @test count(isinvoke(:inlined_usually), src.code) == 0
         @test count(iscall((src, inlined_usually)), src.code) == 0
     end
 
     # now the context module is `NoinlineModule` -- it should not be inlined
-    let src = @eval NoinlineModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
-            $inlined_usually(x, y, z)
-        end
+    let src = code_typed1(NoinlineModule.noinline_func, (Float64,Float64,Float64); interp)
         @test count(isinvoke(:inlined_usually), src.code) == 1
         @test count(iscall((src, inlined_usually)), src.code) == 0
     end
 
     # the context module is totally irrelevant -- it should be inlined as usual
-    OtherModule = Module()
-    let src = @eval OtherModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
-            $inlined_usually(x, y, z)
-        end
+    let src = code_typed1(OtherModule.other_func, (Float64,Float64,Float64); interp)
         @test count(isinvoke(:inlined_usually), src.code) == 0
         @test count(iscall((src, inlined_usually)), src.code) == 0
     end
@@ -399,76 +480,6 @@ end
 Core.eval(Core.Compiler, quote f(;a=1) = a end)
 @test_throws MethodError Core.Compiler.f(;b=2)
 
-# Custom lookup function
-# ======================
-
-# In the following test with `ConstInvokeInterp`, we use a custom lookup function that
-# uses const-prop'ed source if available, and check if LLVM emits code using it.
-
-using Core: MethodInstance, CodeInstance
-using Base: CodegenParams
-using InteractiveUtils
-
-@newinterp ConstInvokeInterp
-function CC.concrete_eval_eligible(interp::ConstInvokeInterp,
-    @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
-    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
-        f::Any, result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
-    if ret === :semi_concrete_eval
-        return :none # disable semi-concrete interpretation
-    end
-    return ret
-end
-Base.@constprop :aggressive @noinline function custom_lookup_target(c::Bool, x::Int)
-    if c
-        y = sin(x)
-        z = nothing
-    else
-        y = cos(x)
-        z = missing
-    end
-    return y, z
-end
-custom_lookup_context(x::Int) = custom_lookup_target(true, x)
-
-const CONST_INVOKE_INTERP_WORLD = Base.get_world_counter()
-const CONST_INVOKE_INTERP = ConstInvokeInterp(; world=CONST_INVOKE_INTERP_WORLD)
-function custom_lookup(mi::MethodInstance, min_world::UInt, max_world::UInt)
-    for inf_result in CONST_INVOKE_INTERP.inf_cache
-        if inf_result.linfo === mi
-            if CC.any(inf_result.overridden_by_const)
-                return CodeInstance(CONST_INVOKE_INTERP, inf_result)
-            end
-        end
-    end
-    # XXX: This seems buggy, custom_lookup should probably construct the absint on demand.
-    return CC.getindex(CC.code_cache(CONST_INVOKE_INTERP), mi)
-end
-
-let # generate cache
-    code_typed(custom_lookup_context; world=CONST_INVOKE_INTERP_WORLD, interp=CONST_INVOKE_INTERP)
-
-    # check if the lookup function works as expected
-    target_mi = CC.specialize_method(only(methods(custom_lookup_target)), Tuple{typeof(custom_lookup_target),Bool,Int}, Core.svec())
-    target_ci = custom_lookup(target_mi, CONST_INVOKE_INTERP_WORLD, CONST_INVOKE_INTERP_WORLD)
-    @test target_ci.rettype == Tuple{Float64,Nothing} # constprop'ed source
-    # display(@ccall jl_uncompress_ir(target_ci.def.def::Any, C_NULL::Ptr{Cvoid}, target_ci.inferred::Any)::Any)
-
-    raw = false
-    lookup = @cfunction(custom_lookup, Any, (Any,Csize_t,Csize_t))
-    params = CodegenParams(;
-        debug_info_kind=Cint(0),
-        debug_info_level=Cint(2),
-        safepoint_on_entry=raw,
-        gcstack_arg=raw,
-        lookup)
-    io = IOBuffer()
-    code_llvm(io, custom_lookup_target, (Bool,Int,); params)
-    s = String(take!(io))
-    @test  occursin("j_sin_", s)
-    @test !occursin("j_cos_", s)
-end
-
 # custom inferred data
 # ====================
 
@@ -504,3 +515,20 @@ let src = code_typed((Int,); interp=CustomDataInterp()) do x
     @test count(isinvoke(:cos), src.code) == 1
     @test count(isinvoke(:+), src.code) == 0
 end
+
+# ephemeral cache mode
+@newinterp DebugInterp #=ephemeral_cache=#true
+func_ext_cache1(a) = func_ext_cache2(a) * cos(a)
+func_ext_cache2(a) = sin(a)
+let interp = DebugInterp()
+    @test Base.infer_return_type(func_ext_cache1, (Float64,); interp) === Float64
+    @test isdefined(interp, :code_cache)
+    found = false
+    for (mi, codeinst) in interp.code_cache.dict
+        if mi.def.name === :func_ext_cache2
+            found = true
+            break
+        end
+    end
+    @test found
+end
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
index 87228aaf2858a..188ec93ebc5be 100644
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ b/test/compiler/EscapeAnalysis/EAUtils.jl
@@ -68,7 +68,7 @@ import .CC:
 using Core:
     CodeInstance, MethodInstance, CodeInfo
 using .CC:
-    InferenceResult, OptimizationState, IRCode
+    InferenceResult, InferenceState, OptimizationState, IRCode
 using .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState
 
 struct EAToken end
@@ -158,12 +158,12 @@ struct FailedAnalysis
     get_escape_cache::GetEscapeCache
 end
 
-function CC.cache_result!(interp::EscapeAnalyzer, inf_result::InferenceResult)
-    ecacheinfo = CC.traverse_analysis_results(inf_result) do @nospecialize result
+function CC.finish!(interp::EscapeAnalyzer, state::InferenceState; can_discard_trees::Bool=CC.may_discard_trees(interp))
+    ecacheinfo = CC.traverse_analysis_results(state.result) do @nospecialize result
         return result isa EscapeCacheInfo ? result : nothing
     end
-    ecacheinfo isa EscapeCacheInfo && (interp.escape_cache.cache[inf_result.linfo] = ecacheinfo)
-    return @invoke CC.cache_result!(interp::AbstractInterpreter, inf_result::InferenceResult)
+    ecacheinfo isa EscapeCacheInfo && (interp.escape_cache.cache[state.linfo] = ecacheinfo)
+    return @invoke CC.finish!(interp::AbstractInterpreter, state::InferenceState; can_discard_trees)
 end
 
 # printing
diff --git a/test/compiler/EscapeAnalysis/EscapeAnalysis.jl b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
index d8ea8be21fe07..31c21f7228014 100644
--- a/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
+++ b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
@@ -2139,21 +2139,13 @@ end
 # ========================
 
 # propagate escapes imposed on call arguments
-@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
-let result = code_escapes() do
-        broadcast_noescape1(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.stmt))
-    @test !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # TODO `getfield(RefValue{String}, :x)` isn't safe
-end
 @noinline broadcast_noescape2(b) = broadcast(identity, b)
 let result = code_escapes() do
         broadcast_noescape2(Ref("Hi"))
     end
     i = only(findall(isnew, result.ir.stmts.stmt))
     @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # TODO `getfield(RefValue{String}, :x)` isn't safe
+    @test !has_thrown_escape(result.state[SSAValue(i)])
 end
 @noinline allescape_argument(a) = (global GV = a) # obvious escape
 let result = code_escapes() do
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index 805e5c7acc817..0260113044a3b 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -222,18 +222,18 @@ if opt_level > 0
     @test occursin("call i32 @memcmp(", compare_large_struct_ir) || occursin("call i32 @bcmp(", compare_large_struct_ir)
     @test !occursin("%gcframe", compare_large_struct_ir)
 
-    @test occursin("jl_gc_pool_alloc", get_llvm(MutableStruct, Tuple{}))
+    @test occursin("jl_gc_small_alloc", get_llvm(MutableStruct, Tuple{}))
     breakpoint_mutable_ir = get_llvm(breakpoint_mutable, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_mutable_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_mutable_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_mutable_ir)
 
     breakpoint_badref_ir = get_llvm(breakpoint_badref, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_badref_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_badref_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_badref_ir)
 
     breakpoint_ptrstruct_ir = get_llvm(breakpoint_ptrstruct, Tuple{RealStruct})
     @test !occursin("%gcframe", breakpoint_ptrstruct_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_ptrstruct_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_ptrstruct_ir)
 end
 
 function two_breakpoint(a::Float64)
@@ -251,17 +251,17 @@ end
 if opt_level > 0
     breakpoint_f64_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Float64},), a),
                                  Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", breakpoint_f64_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_f64_ir)
     breakpoint_any_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Any},), a),
                                  Tuple{Float64})
-    @test occursin("jl_gc_pool_alloc", breakpoint_any_ir)
+    @test occursin("jl_gc_small_alloc", breakpoint_any_ir)
     two_breakpoint_ir = get_llvm(two_breakpoint, Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", two_breakpoint_ir)
+    @test !occursin("jl_gc_small_alloc", two_breakpoint_ir)
     @test occursin("llvm.lifetime.end", two_breakpoint_ir)
 
     @test load_dummy_ref(1234) === 1234
     load_dummy_ref_ir = get_llvm(load_dummy_ref, Tuple{Int})
-    @test !occursin("jl_gc_pool_alloc", load_dummy_ref_ir)
+    @test !occursin("jl_gc_small_alloc", load_dummy_ref_ir)
     # Hopefully this is reliable enough. LLVM should be able to optimize this to a direct return.
     @test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
 end
@@ -440,7 +440,7 @@ function f1_30093(r)
     end
 end
 
-@test f1_30093(Ref(0)) == nothing
+@test f1_30093(Ref(0)) === nothing
 
 # issue 33590
 function f33590(b, x)
@@ -621,10 +621,10 @@ g40612(a, b) = a[]|a[] === b[]|b[]
 
 # issue #41438
 struct A41438{T}
-  x::Ptr{T}
+    x::Ptr{T}
 end
 struct B41438{T}
-  x::T
+    x::T
 end
 f41438(y) = y[].x
 @test A41438.body.layout != C_NULL
@@ -697,7 +697,7 @@ mktempdir() do pfx
         libs_deleted += 1
     end
     @test libs_deleted > 0
-    @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'print("no codegen!\n")'`) == "no codegen!"
+    @test readchomp(`$pfx/bin/$(Base.julia_exename()) --startup-file=no -e 'print("no codegen!\n")'`) == "no codegen!"
 
     # PR #47343
     libs_emptied = 0
@@ -722,14 +722,14 @@ mutable struct A42645{T}
     end
 end
 mutable struct B42645{T}
-  y::A42645{T}
+    y::A42645{T}
 end
 x42645 = 1
 function f42645()
-  res = B42645(A42645([x42645]))
-  res.y = A42645([x42645])
-  res.y.x = true
-  res
+    res = B42645(A42645([x42645]))
+    res.y = A42645([x42645])
+    res.y.x = true
+    res
 end
 @test ((f42645()::B42645).y::A42645{Int}).x
 
@@ -859,17 +859,143 @@ foo50964(1) # Shouldn't assert!
 
 # https://github.com/JuliaLang/julia/issues/51233
 obj51233 = (1,)
-@test_throws ErrorException obj51233.x
+@test_throws FieldError obj51233.x
 
 # Very specific test for multiversioning
 if Sys.ARCH === :x86_64
     foo52079() = Core.Intrinsics.have_fma(Float64)
     if foo52079() == true
         let io = IOBuffer()
-            code_native(io,^,(Float64,Float64), dump_module=false)
+            code_native(io,Base.Math.exp_impl,(Float64,Float64,Val{:ℯ}), dump_module=false)
             str = String(take!(io))
             @test !occursin("fma_emulated", str)
             @test occursin("vfmadd", str)
         end
     end
 end
+
+#Check if we aren't emitting the store with the wrong TBAA metadata
+
+foo54166(x,i,y) = x[i] = y
+let io = IOBuffer()
+    code_llvm(io,foo54166, (Vector{Union{Missing,Int}}, Int, Int), dump_module=true, raw=true)
+    str = String(take!(io))
+    @test !occursin("jtbaa_unionselbyte", str)
+    @test occursin("jtbaa_arrayselbyte", str)
+end
+
+ex54166 = Union{Missing, Int64}[missing -2; missing -2];
+dims54166 = (1,2)
+@test (minimum(ex54166; dims=dims54166)[1] === missing)
+
+# #54109 - Excessive LLVM time for egal
+struct DefaultOr54109{T}
+    x::T
+    default::Bool
+end
+
+@eval struct Torture1_54109
+    $((Expr(:(::), Symbol("x$i"), DefaultOr54109{Float64}) for i = 1:897)...)
+end
+Torture1_54109() = Torture1_54109((DefaultOr54109(1.0, false) for i = 1:897)...)
+
+@eval struct Torture2_54109
+    $((Expr(:(::), Symbol("x$i"), DefaultOr54109{Float64}) for i = 1:400)...)
+    $((Expr(:(::), Symbol("x$(i+400)"), DefaultOr54109{Int16}) for i = 1:400)...)
+end
+Torture2_54109() = Torture2_54109((DefaultOr54109(1.0, false) for i = 1:400)..., (DefaultOr54109(Int16(1), false) for i = 1:400)...)
+
+@noinline egal_any54109(x, @nospecialize(y::Any)) = x === Base.compilerbarrier(:type, y)
+
+let ir1 = get_llvm(egal_any54109, Tuple{Torture1_54109, Any}),
+    ir2 = get_llvm(egal_any54109, Tuple{Torture2_54109, Any})
+
+    # We can't really do timing on CI, so instead, let's look at the length of
+    # the optimized IR. The original version had tens of thousands of lines and
+    # was slower, so just check here that we only have < 500 lines. If somebody,
+    # implements a better comparison that's larger than that, just re-benchmark
+    # this and adjust the threshold.
+
+    @test count(==('\n'), ir1) < 500
+    @test count(==('\n'), ir2) < 500
+end
+
+## Regression test for egal of a struct of this size without padding, but with
+## non-bitsegal, to make sure that it doesn't accidentally go down the accelerated
+## path.
+@eval struct BigStructAnyInt
+    $((Expr(:(::), Symbol("x$i"), Pair{Any, Int}) for i = 1:33)...)
+end
+BigStructAnyInt() = BigStructAnyInt((Union{Base.inferencebarrier(Float64), Int}=>i for i = 1:33)...)
+@test egal_any54109(BigStructAnyInt(), BigStructAnyInt())
+
+## For completeness, also test correctness, since we don't have a lot of
+## large-struct tests.
+
+# The two allocations of the same struct will likely have different padding,
+# we want to make sure we find them egal anyway - a naive memcmp would
+# accidentally look at it.
+@test egal_any54109(Torture1_54109(), Torture1_54109())
+@test egal_any54109(Torture2_54109(), Torture2_54109())
+@test !egal_any54109(Torture1_54109(), Torture1_54109((DefaultOr54109(2.0, false) for i = 1:897)...))
+
+bar54599() = Base.inferencebarrier(true) ? (Base.PkgId(Main),1) : nothing
+
+function foo54599()
+    pkginfo = @noinline bar54599()
+    pkgid = pkginfo !== nothing ? pkginfo[1] : nothing
+    @noinline println(devnull, pkgid)
+    pkgid.uuid !== nothing ? pkgid.uuid : false
+end
+
+#this function used to crash allocopt due to a no predecessors bug
+barnopreds() = Base.inferencebarrier(true) ? (Base.PkgId(Test),1) : nothing
+function foonopreds()
+    pkginfo = @noinline barnopreds()
+    pkgid = pkginfo !== nothing ? pkginfo[1] : nothing
+    pkgid.uuid !== nothing ? pkgid.uuid : false
+end
+@test foonopreds() !== nothing
+
+# issue 55396
+struct Incomplete55396
+  x::Tuple{Int}
+  y::Int
+  @noinline Incomplete55396(x::Int) = new((x,))
+end
+let x = Incomplete55396(55396)
+    @test x.x === (55396,)
+end
+
+# Core.getptls() special handling
+@test !occursin("call ptr @jlplt", get_llvm(Core.getptls, Tuple{})) #It should lower to a direct load of the ptls and not a ccall
+
+# issue 55208
+@noinline function f55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    return z isa Core.TypeofBottom
+end
+@test f55208((Union{}, 5, 6, 7), 0)
+
+@noinline function g55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    typeof(z)
+end
+@test g55208((Union{}, true, true), 0) === typeof(Union{})
+
+@test string((Core.Union{}, true, true, true)) == "(Union{}, true, true, true)"
+
+# Issue #55558
+for (T, StructName) in ((Int128, :Issue55558), (UInt128, :UIssue55558))
+    @eval begin
+        struct $(StructName)
+            a::$(T)
+            b::Int64
+            c::$(T)
+        end
+        local broken_i128 = Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "powerpc64le"
+        @test fieldoffset($(StructName), 2) == 16
+        @test fieldoffset($(StructName), 3) == 32 broken=broken_i128
+        @test sizeof($(StructName)) == 48 broken=broken_i128
+    end
+end
diff --git a/test/compiler/compact.jl b/test/compiler/compact.jl
new file mode 100644
index 0000000000000..0ac1bce8e9324
--- /dev/null
+++ b/test/compiler/compact.jl
@@ -0,0 +1,51 @@
+using Core.Compiler: IncrementalCompact, insert_node_here!, finish,
+    NewInstruction, verify_ir, ReturnNode, SSAValue
+
+foo_test_function(i) = i == 1 ? 1 : 2
+
+@testset "IncrementalCompact statefulness" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+
+    # set up first iterator
+    x = Core.Compiler.iterate(compact)
+    x = Core.Compiler.iterate(compact, x[2])
+
+    # set up second iterator
+    x = Core.Compiler.iterate(compact)
+
+    # consume remainder
+    while x !== nothing
+        x = Core.Compiler.iterate(compact, x[2])
+    end
+
+    ir = finish(compact)
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# Test early finish of IncrementalCompact
+@testset "IncrementalCompact early finish" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+
+    insert_node_here!(compact, NewInstruction(ReturnNode(1), Union{}, ir[SSAValue(1)][:line]))
+    new_ir = finish(compact)
+    # TODO: Should IncrementalCompact be doing this internally?
+    empty!(new_ir.cfg.blocks[1].succs)
+    verify_ir(new_ir)
+    @test length(new_ir.cfg.blocks) == 1
+end
+
+# Test reverse affinity insert at start of compact
+@testset "IncrementalCompact reverse affinity insert" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+    @test !Core.Compiler.did_just_finish_bb(compact)
+
+    insert_node_here!(compact, NewInstruction(ReturnNode(1), Union{}, ir[SSAValue(1)][:line]), true)
+    new_ir = finish(compact)
+    # TODO: Should IncrementalCompact be doing this internally?
+    empty!(new_ir.cfg.blocks[1].succs)
+    verify_ir(new_ir)
+    @test length(new_ir.cfg.blocks) == 1
+end
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
index bbcf7b0dfb959..fc91a37c5bd9e 100644
--- a/test/compiler/contextual.jl
+++ b/test/compiler/contextual.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# N.B.: This file is also run from interpreter.jl, so needs to be standalone-executable
+using Test
+
 # Cassette
 # ========
 
@@ -7,12 +10,9 @@ module MiniCassette
     # A minimal demonstration of the cassette mechanism. Doesn't support all the
     # fancy features, but sufficient to exercise this code path in the compiler.
 
-    using Core.Compiler: retrieve_code_info, CodeInfo,
-        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, SlotNumber, quoted,
-        signature_type, anymap
-    using Base: _methods_by_ftype
+    using Core.IR
+    using Core.Compiler: retrieve_code_info, quoted, signature_type, anymap
     using Base.Meta: isexpr
-    using Test
 
     export Ctx, overdub
 
@@ -43,14 +43,16 @@ module MiniCassette
         end
     end
 
-    function transform!(ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
+    function transform!(mi::MethodInstance, ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
         code = ci.code
+        di = Core.Compiler.DebugInfoStream(mi, ci.debuginfo, length(code))
         ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+1:end]...]
         ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+1:end]...]
         # Insert one SSAValue for every argument statement
         prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
-        prepend!(ci.codelocs, fill(0, nargs))
+        prepend!(di.codelocs, fill(Int32(0), 3nargs))
         prepend!(ci.ssaflags, fill(0x00, nargs))
+        ci.debuginfo = Core.DebugInfo(di, length(code))
         ci.ssavaluetypes += nargs
         function map_slot_number(slot::Int)
             if slot == 1
@@ -70,12 +72,12 @@ module MiniCassette
         end
     end
 
-    function overdub_generator(world::UInt, source, self, c, f, args)
+    function overdub_generator(world::UInt, source, self, ctx, f, args)
         @nospecialize
         if !Base.issingletontype(f)
             # (c, f, args..) -> f(args...)
-            code_info = :(return f(args...))
-            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :c, :f, :args), Core.svec())(world, source, code_info)
+            ex = :(return f(args...))
+            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :ctx, :f, :args), Core.svec())(world, source, ex)
         end
 
         tt = Tuple{f, args...}
@@ -83,22 +85,23 @@ module MiniCassette
         mi = Core.Compiler.specialize_method(match)
         # Unsupported in this mini-cassette
         @assert !mi.def.isva
-        code_info = retrieve_code_info(mi, world)
-        @assert isa(code_info, CodeInfo)
-        code_info = copy(code_info)
-        @assert code_info.edges === nothing
-        code_info.edges = MethodInstance[mi]
-        transform!(code_info, length(args), match.sparams)
+        src = retrieve_code_info(mi, world)
+        @assert isa(src, CodeInfo)
+        src = copy(src)
+        @assert src.edges === nothing
+        src.edges = MethodInstance[mi]
+        transform!(mi, src, length(args), match.sparams)
         # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
         # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
-        return code_info
+        # Match the generator, since that's what our transform! does
+        src.nargs = 4
+        src.isva = true
+        return src
     end
 
-    @inline function overdub(c::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...)
-        f(args...)
-    end
+    @inline overdub(::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...) = f(args...)
 
-    @eval function overdub(c::Ctx, f, args...)
+    @eval function overdub(ctx::Ctx, f, args...)
         $(Expr(:meta, :generated_only))
         $(Expr(:meta, :generated, overdub_generator))
     end
@@ -119,104 +122,3 @@ f() = 2
 foo(i) = i+bar(Val(1))
 
 @test @inferred(overdub(Ctx(), foo, 1)) == 43
-
-# overlay method tables
-# =====================
-
-module OverlayModule
-
-using Base.Experimental: @MethodTable, @overlay
-
-@MethodTable(mt)
-
-@overlay mt function sin(x::Float64)
-    1
-end
-
-# short function def
-@overlay mt cos(x::Float64) = 2
-
-# parametric function def
-@overlay mt tan(x::T) where {T} = 3
-
-end # module OverlayModule
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
-@test only(methods).method.module === Base.Math
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
-@test only(methods).method.module === OverlayModule
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
-@test isempty(methods)
-
-# precompilation
-
-load_path = mktempdir()
-depot_path = mktempdir()
-try
-    pushfirst!(LOAD_PATH, load_path)
-    pushfirst!(DEPOT_PATH, depot_path)
-
-    write(joinpath(load_path, "Foo.jl"),
-          """
-          module Foo
-          Base.Experimental.@MethodTable(mt)
-          Base.Experimental.@overlay mt sin(x::Int) = 1
-          end
-          """)
-
-     # precompiling Foo serializes the overlay method through the `mt` binding in the module
-     Foo = Base.require(Main, :Foo)
-     @test length(Foo.mt) == 1
-
-    write(joinpath(load_path, "Bar.jl"),
-          """
-          module Bar
-          Base.Experimental.@MethodTable(mt)
-          end
-          """)
-
-    write(joinpath(load_path, "Baz.jl"),
-          """
-          module Baz
-          using Bar
-          Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
-          end
-          """)
-
-     # when referring an method table in another module,
-     # the overlay method needs to be discovered explicitly
-     Bar = Base.require(Main, :Bar)
-     @test length(Bar.mt) == 0
-     Baz = Base.require(Main, :Baz)
-     @test length(Bar.mt) == 1
-finally
-    filter!((≠)(load_path), LOAD_PATH)
-    filter!((≠)(depot_path), DEPOT_PATH)
-    rm(load_path, recursive=true, force=true)
-    try
-        rm(depot_path, force=true, recursive=true)
-    catch err
-        @show err
-    end
-end
-
-# Test that writing a bad cassette-style pass gives the expected error (#49715)
-function generator49715(world, source, self, f, tt)
-    tt = tt.parameters[1]
-    sig = Tuple{f, tt.parameters...}
-    mi = Base._which(sig; world)
-
-    error("oh no")
-
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
-    stub(world, source, :(nothing))
-end
-
-@eval function doit49715(f, tt)
-  $(Expr(:meta, :generated, generator49715))
-  $(Expr(:meta, :generated_only))
-end
-
-@test_throws "oh no" doit49715(sin, Tuple{Int})
diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl
index fa70c8de9d853..11c30aad0b9a4 100644
--- a/test/compiler/effects.jl
+++ b/test/compiler/effects.jl
@@ -89,13 +89,13 @@ Base.@assume_effects :terminates_globally function recur_termination1(x)
     0 ≤ x < 20 || error("bad fact")
     return x * recur_termination1(x-1)
 end
-@test_broken Core.Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
+@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
 @test Core.Compiler.is_terminates(Base.infer_effects(recur_termination1, (Int,)))
 function recur_termination2()
     Base.@assume_effects :total !:terminates_globally
     recur_termination1(12)
 end
-@test_broken fully_eliminated(recur_termination2)
+@test fully_eliminated(recur_termination2)
 @test fully_eliminated() do; recur_termination2(); end
 
 Base.@assume_effects :terminates_globally function recur_termination21(x)
@@ -104,15 +104,15 @@ Base.@assume_effects :terminates_globally function recur_termination21(x)
     return recur_termination22(x)
 end
 recur_termination22(x) = x * recur_termination21(x-1)
-@test_broken Core.Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
-@test_broken Core.Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
+@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
+@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
 @test Core.Compiler.is_terminates(Base.infer_effects(recur_termination21, (Int,)))
 @test Core.Compiler.is_terminates(Base.infer_effects(recur_termination22, (Int,)))
 function recur_termination2x()
     Base.@assume_effects :total !:terminates_globally
     recur_termination21(12) + recur_termination22(12)
 end
-@test_broken fully_eliminated(recur_termination2x)
+@test fully_eliminated(recur_termination2x)
 @test fully_eliminated() do; recur_termination2x(); end
 
 # anonymous function support for `@assume_effects`
@@ -236,7 +236,7 @@ end
 # Effect modeling for Core.compilerbarrier
 @test Base.infer_effects(Base.inferencebarrier, Tuple{Any}) |> Core.Compiler.is_removable_if_unused
 
-# allocation/access of uninitialized fields should taint the :consistent-cy
+# effects modeling for allocation/access of uninitialized fields
 struct Maybe{T}
     x::T
     Maybe{T}() where T = new{T}()
@@ -244,57 +244,9 @@ struct Maybe{T}
     Maybe(x::T) where T = new{T}(x)
 end
 Base.getindex(x::Maybe) = x.x
-
 struct SyntacticallyDefined{T}
     x::T
 end
-
-import Core.Compiler: Const, getfield_notundefined
-for T = (Base.RefValue, Maybe) # both mutable and immutable
-    for name = (Const(1), Const(:x))
-        @test getfield_notundefined(T{String}, name)
-        @test getfield_notundefined(T{Integer}, name)
-        @test getfield_notundefined(T{Union{String,Integer}}, name)
-        @test getfield_notundefined(Union{T{String},T{Integer}}, name)
-        @test !getfield_notundefined(T{Int}, name)
-        @test !getfield_notundefined(T{<:Integer}, name)
-        @test !getfield_notundefined(T{Union{Int32,Int64}}, name)
-        @test !getfield_notundefined(T, name)
-    end
-    # throw doesn't account for undefined behavior
-    for name = (Const(0), Const(2), Const(1.0), Const(:y), Const("x"),
-                Float64, String, Nothing)
-        @test getfield_notundefined(T{String}, name)
-        @test getfield_notundefined(T{Int}, name)
-        @test getfield_notundefined(T{Integer}, name)
-        @test getfield_notundefined(T{<:Integer}, name)
-        @test getfield_notundefined(T{Union{Int32,Int64}}, name)
-        @test getfield_notundefined(T, name)
-    end
-    # should not be too conservative when field isn't known very well but object information is accurate
-    @test getfield_notundefined(T{String}, Int)
-    @test getfield_notundefined(T{String}, Symbol)
-    @test getfield_notundefined(T{Integer}, Int)
-    @test getfield_notundefined(T{Integer}, Symbol)
-    @test !getfield_notundefined(T{Int}, Int)
-    @test !getfield_notundefined(T{Int}, Symbol)
-    @test !getfield_notundefined(T{<:Integer}, Int)
-    @test !getfield_notundefined(T{<:Integer}, Symbol)
-end
-# should be conservative when object information isn't accurate
-@test !getfield_notundefined(Any, Const(1))
-@test !getfield_notundefined(Any, Const(:x))
-# tuples and namedtuples should be okay if not given accurate information
-for TupleType = Any[Tuple{Int,Int,Int}, Tuple{Int,Vararg{Int}}, Tuple{Any}, Tuple,
-                    NamedTuple{(:a, :b), Tuple{Int,Int}}, NamedTuple{(:x,),Tuple{Any}}, NamedTuple],
-    FieldType = Any[Int, Symbol, Any]
-    @test getfield_notundefined(TupleType, FieldType)
-end
-# skip analysis on fields that are known to be defined syntactically
-@test Core.Compiler.getfield_notundefined(SyntacticallyDefined{Float64}, Symbol)
-@test Core.Compiler.getfield_notundefined(Const(Main), Const(:var))
-@test Core.Compiler.getfield_notundefined(Const(Main), Const(42))
-# high-level tests for `getfield_notundefined`
 @test Base.infer_effects() do
     Maybe{Int}()
 end |> !Core.Compiler.is_consistent
@@ -904,7 +856,6 @@ end |> Core.Compiler.is_foldable_nothrow
 @test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
     getfield(w, s)
 end |> Core.Compiler.is_foldable
-@test Core.Compiler.getfield_notundefined(WrapperOneField{Float64}, Symbol)
 @test Base.infer_effects(Tuple{WrapperOneField{Symbol}, Symbol}) do w, s
     getfield(w, s)
 end |> Core.Compiler.is_foldable
@@ -970,7 +921,7 @@ unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
 abstractly_recursive1() = abstractly_recursive2()
 abstractly_recursive2() = (Core.Compiler._return_type(abstractly_recursive1, Tuple{}); 1)
 abstractly_recursive3() = abstractly_recursive2()
-@test Core.Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
+@test_broken Core.Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
 actually_recursive1(x) = actually_recursive2(x)
 actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
 actually_recursive3(x) = actually_recursive2(x)
@@ -996,7 +947,7 @@ end
 let effects = Base.infer_effects() do
         isdefined(defined_ref, :x)
     end
-    @test Core.Compiler.is_consistent(effects)
+    @test !Core.Compiler.is_consistent(effects)
     @test Core.Compiler.is_nothrow(effects)
 end
 let effects = Base.infer_effects() do
@@ -1096,13 +1047,15 @@ function f2_optrefine()
     end
     return true
 end
+@test !Core.Compiler.is_nothrow(Base.infer_effects(f2_optrefine; optimize=false))
 @test Core.Compiler.is_nothrow(Base.infer_effects(f2_optrefine))
 
 function f3_optrefine(x)
     @fastmath sqrt(x)
     return x
 end
-@test Core.Compiler.is_consistent(Base.infer_effects(f3_optrefine))
+@test !Core.Compiler.is_consistent(Base.infer_effects(f3_optrefine; optimize=false))
+@test Core.Compiler.is_consistent(Base.infer_effects(f3_optrefine, (Float64,)))
 
 # Check that :consistent is properly modeled for throwing statements
 const GLOBAL_MUTABLE_SWITCH = Ref{Bool}(false)
@@ -1215,22 +1168,22 @@ callgetfield_inbounds(x, f) = @inbounds callgetfield2(x, f)
       Core.Compiler.ALWAYS_FALSE
 
 # noub modeling for memory ops
-let (memoryref, memoryrefget, memoryref_isassigned, memoryrefset!) =
-        (Core.memoryref, Core.memoryrefget, Core.memoryref_isassigned, Core.memoryrefset!)
+let (memoryrefnew, memoryrefget, memoryref_isassigned, memoryrefset!) =
+        (Core.memoryrefnew, Core.memoryrefget, Core.memoryref_isassigned, Core.memoryrefset!)
     function builtin_effects(@nospecialize xs...)
         interp = Core.Compiler.NativeInterpreter()
         𝕃 = Core.Compiler.typeinf_lattice(interp)
         rt = Core.Compiler.builtin_tfunction(interp, xs..., nothing)
         return Core.Compiler.builtin_effects(𝕃, xs..., rt)
     end
-    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[Memory,]))
-    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int]))
-    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Core.Const(true)]))
-    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Core.Const(false)]))
-    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Bool]))
-    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Int]))
-    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Vararg{Bool}]))
-    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Vararg{Any}]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[Memory,]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Core.Const(true)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Core.Const(false)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Bool]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Int]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Vararg{Bool}]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Vararg{Any}]))
     @test Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(true)]))
     @test !Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(false)]))
     @test !Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Bool]))
@@ -1251,7 +1204,7 @@ let (memoryref, memoryrefget, memoryref_isassigned, memoryrefset!) =
     @test !Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Vararg{Any}]))
     # `:boundscheck` taint should be refined by post-opt analysis
     @test Base.infer_effects() do xs::Vector{Any}, i::Int
-        memoryrefget(memoryref(getfield(xs, :ref), i, Base.@_boundscheck), :not_atomic, Base.@_boundscheck)
+        memoryrefget(memoryrefnew(getfield(xs, :ref), i, Base.@_boundscheck), :not_atomic, Base.@_boundscheck)
     end |> Core.Compiler.is_noub_if_noinbounds
 end
 
@@ -1387,3 +1340,29 @@ let; Base.Experimental.@force_compile; func52843(); end
 # https://github.com/JuliaLang/julia/issues/53508
 @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (UnitRange{Int},Int)))
 @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Base.OneTo{Int},Int)))
+
+@noinline f53613() = @assert isdefined(@__MODULE__, :v53613)
+g53613() = f53613()
+h53613() = g53613()
+@test !Core.Compiler.is_consistent(Base.infer_effects(f53613))
+@test !Core.Compiler.is_consistent(Base.infer_effects(g53613))
+@test_throws AssertionError f53613()
+@test_throws AssertionError g53613()
+@test_throws AssertionError h53613()
+global v53613 = nothing
+@test f53613() === nothing
+@test g53613() === nothing
+@test h53613() === nothing
+
+# tuple/svec effects
+@test Base.infer_effects((Vector{Any},)) do xs
+    Core.tuple(xs...)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Vector{Any},)) do xs
+    Core.svec(xs...)
+end |> Core.Compiler.is_nothrow
+
+# effects for unknown `:foreigncall`s
+@test Base.infer_effects() do
+    @ccall unsafecall()::Cvoid
+end == Core.Compiler.EFFECTS_UNKNOWN
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index a9b1d89d40e9d..9454c53a09fb7 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -479,7 +479,7 @@ end
 @test f15259(1,2) == (1,2,1,2)
 # check that error cases are still correct
 @eval g15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); a.z)
-@test_throws ErrorException g15259(1,1)
+@test_throws FieldError g15259(1,1)
 @eval h15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); getfield(a, 3))
 @test_throws BoundsError h15259(1,1)
 
@@ -1065,7 +1065,7 @@ gl_17003 = [1, 2, 3]
 f2_17003(item::AVector_17003) = nothing
 f2_17003(::Any) = f2_17003(NArray_17003(gl_17003))
 
-@test f2_17003(1) == nothing
+@test f2_17003(1) === nothing
 
 # issue #20847
 function segfaultfunction_20847(A::Vector{NTuple{N, T}}) where {N, T}
@@ -1076,7 +1076,7 @@ end
 tuplevec_20847 = Tuple{Float64, Float64}[(0.0,0.0), (1.0,0.0)]
 
 for A in (1,)
-    @test segfaultfunction_20847(tuplevec_20847) == nothing
+    @test segfaultfunction_20847(tuplevec_20847) === nothing
 end
 
 # Issue #20902, check that this doesn't error.
@@ -1097,7 +1097,7 @@ f21771(::Val{U}) where {U} = Tuple{g21771(U)}
 
 # PR #28284, check that constants propagate through calls to new
 struct t28284
-  x::Int
+    x::Int
 end
 f28284() = Val(t28284(1))
 @inferred f28284()
@@ -1538,7 +1538,7 @@ let nfields_tfunc(@nospecialize xs...) =
     @test sizeof_nothrow(String)
     @test !sizeof_nothrow(Type{String})
     @test sizeof_tfunc(Type{Union{Int64, Int32}}) == Const(Core.sizeof(Union{Int64, Int32}))
-    let PT = Core.Compiler.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
+    let PT = Core.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
         @test sizeof_tfunc(PT) === Const(16)
         @test nfields_tfunc(PT) === Const(2)
         @test sizeof_nothrow(PT)
@@ -1937,6 +1937,8 @@ function f24852_kernel_cinfo(world::UInt, source, fsig::Type)
     end
     pushfirst!(code_info.slotnames, Symbol("#self#"))
     pushfirst!(code_info.slotflags, 0x00)
+    code_info.nargs = 4
+    code_info.isva = false
     # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
     # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return match.method, code_info
@@ -2149,78 +2151,75 @@ end
 
 @testset "branching on conditional object" begin
     # simple
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         return b ? 0 : a # ::Int
-    end == Any[Int]
+    end == Int
 
     # can use multiple times (as far as the subject of condition hasn't changed)
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         c = b ? 0 : a # c::Int
         d = !b ? a : 0 # d::Int
         return c, d # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 
     # should invalidate old constraint when the subject of condition has changed
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         cond = a === nothing
         r1 = cond ? 0 : a # r1::Int
         a = 0
         r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
         return r1, r2 # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 end
 
 # https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
 # `PartialStruct` shouldn't wrap `Conditional`
-let M = Module()
-    @eval M begin
-        struct BePartialStruct
-            val::Int
-            cond
-        end
-    end
-
-    rt = @eval M begin
-        Base.return_types((Union{Nothing,Int},)) do a
-            cond = a === nothing
-            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
-            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
-            a = $(gensym(:anyvar))::Any
-            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
-            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
-        end |> only
-    end
-    @test rt == Tuple{Union{Nothing,Int},Any}
+struct BePartialStruct
+    val::Int
+    cond
+end
+@test Tuple{Union{Nothing,Int},Any} == @eval Base.infer_return_type((Union{Nothing,Int},)) do a
+    cond = a === nothing
+    obj = $(Expr(:new, BePartialStruct, 42, :cond))
+    r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+    a = $(gensym(:anyvar))::Any
+    r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
+    return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
 end
 
 # make sure we never form nested `Conditional` (https://github.com/JuliaLang/julia/issues/46207)
-@test Base.return_types((Any,)) do a
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     42 === c ? :a : "b"
-end |> only === String
-@test Base.return_types((Any,)) do a
+end == String
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     c === 42 ? :a : "b"
-end |> only === String
-
-@testset "conditional constraint propagation from non-`Conditional` object" begin
-    @test Base.return_types((Bool,)) do b
-        if b
-            return !b ? nothing : 1 # ::Int
-        else
-            return 0
-        end
-    end == Any[Int]
+end == String
 
-    @test Base.return_types((Any,)) do b
-        if b
-            return b # ::Bool
-        else
-            return nothing
-        end
-    end == Any[Union{Bool,Nothing}]
+function condition_object_update1(cond)
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : 1 # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+function condition_object_update2(x)
+    cond = x isa Int
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : x # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+@testset "state update for condition object" begin
+    # refine the type of condition object into constant boolean values on branching
+    @test Base.infer_return_type(condition_object_update1, (Bool,)) == Int
+    @test Base.infer_return_type(condition_object_update1, (Any,)) == Int
+    # refine even when their original type is `Conditional`
+    @test Base.infer_return_type(condition_object_update2, (Any,)) == Int
 end
 
 @testset "`from_interprocedural!`: translate inter-procedural information" begin
@@ -2563,6 +2562,14 @@ Base.return_types(intermustalias_edgecase, (Any,); interp=MustAliasInterpreter()
     intermustalias_edgecase(x)
 end |> only === Core.Compiler.InterMustAlias
 
+@test Base.infer_return_type((AliasableField,Integer,); interp=MustAliasInterpreter()) do a, x
+    s = (;x)
+    if getfield(a, :f) isa Symbol
+        return getfield(s, getfield(a, :f))
+    end
+    return 0
+end == Integer
+
 function f25579(g)
     h = g[]
     t = (h === nothing)
@@ -4193,6 +4200,110 @@ end
     end
 end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
 
+@testset "constraint back-propagation from typeassert" begin
+    @test Base.infer_return_type((Any,)) do a
+        typeassert(a, Int)
+        return a
+    end == Int
+
+    @test Base.infer_return_type((Any,Bool)) do a, b
+        if b
+            typeassert(a, Int64)
+        else
+            typeassert(a, Int32)
+        end
+        return a
+    end == Union{Int32,Int64}
+end
+
+callsig_backprop_basic(::Int) = nothing
+callsig_backprop_unionsplit(::Int32) = nothing
+callsig_backprop_unionsplit(::Int64) = nothing
+callsig_backprop_multi(::Int32, ::Int64) = nothing
+callsig_backprop_any(::Any) = nothing
+callsig_backprop_lhs(::Int) = nothing
+callsig_backprop_bailout(::Val{0}) = 0
+callsig_backprop_bailout(::Val{1}) = undefvar # undefvar::Any triggers `bail_out_call`
+callsig_backprop_bailout(::Val{2}) = 2
+callsig_backprop_addinteger(a::Integer, b::Integer) = a + b # results in too many matching methods and triggers `bail_out_call`)
+@test Base.infer_return_type(callsig_backprop_addinteger) == Any
+let effects = Base.infer_effects(callsig_backprop_addinteger)
+    @test !Core.Compiler.is_consistent(effects)
+    @test !Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test !Core.Compiler.is_terminates(effects)
+end
+callsig_backprop_anti(::Any) = :any
+callsig_backprop_anti(::Int) = :int
+
+@testset "constraint back-propagation from call signature" begin
+    # basic case
+    @test Base.infer_return_type(a->(callsig_backprop_basic(a); return a), (Any,)) == Int
+
+    # union-split case
+    @test Base.infer_return_type(a->(callsig_backprop_unionsplit(a); return a), (Any,)) == Union{Int32,Int64}
+
+    # multiple arguments updates
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_multi(a, b)
+        return a, b
+    end == Tuple{Int32,Int64}
+
+    # refinement should happen only when it's worthwhile
+    @test Base.infer_return_type(a->(callsig_backprop_any(a); return a), (Integer,)) == Integer
+
+    # state update on lhs slot (assignment effect should have the precedence)
+    @test Base.infer_return_type((Any,)) do a
+        a = callsig_backprop_lhs(a)
+        return a
+    end == Nothing
+
+    # make sure to throw away an intermediate refinement information when we bail out early
+    # (inference would bail out on `callsig_backprop_bailout(::Val{1})`)
+    @test Base.infer_return_type(a->(callsig_backprop_bailout(a); return a), (Any,)) == Any
+
+    # if we see all the matching methods, we don't need to throw away refinement information
+    # even if it's caught by `bail_out_call` check
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_addinteger(a, b)
+        return a, b
+    end == Tuple{Integer,Integer}
+
+    # anti case
+    @test Base.infer_return_type((Any,)) do x
+        callsig_backprop_anti(x)
+        return x
+    end == Any
+end
+
+# make sure to add backedges when we use call signature constraint
+function callsig_backprop_invalidation_outer(a)
+    callsig_backprop_invalidation_inner!(a)
+    return a
+end
+@eval callsig_backprop_invalidation_inner!(::Int) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    callsig_backprop_invalidation_outer(a)
+end == Int
+# new definition of `callsig_backprop_invalidation_inner!` should invalidate `callsig_backprop_invalidation_outer`
+# (even if the previous return type is annotated as `Any`)
+@eval callsig_backprop_invalidation_inner!(::Nothing) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    # since inference will bail out at the first matched `_inner!` and so call signature constraint won't be available
+    callsig_backprop_invalidation_outer(a)
+end ≠ Int
+
+# https://github.com/JuliaLang/julia/issues/37866
+function issue37866(v::Vector{Union{Nothing,Float64}})
+    for x in v
+        if x > 5.0
+            return x # x > 5.0 is MethodError for Nothing so can assume ::Float64
+        end
+    end
+    return 0.0
+end
+@test Base.infer_return_type(issue37866, (Vector{Union{Nothing,Float64}},)) == Float64
+
 # make sure inference on a recursive call graph with nested `Type`s terminates
 # https://github.com/JuliaLang/julia/issues/40336
 f40336(@nospecialize(t)) = f40336(Type{t})
@@ -4260,10 +4371,10 @@ let # Test the presence of PhiNodes in lowered IR by taking the above function,
     Core.Compiler.replace_code_newstyle!(ci, ir)
     ci.ssavaluetypes = length(ci.ssavaluetypes)
     @test any(x->isa(x, Core.PhiNode), ci.code)
-    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any, true,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(b, 1.0)
     @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
-    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any, true,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(true, 1.0)
     @test Base.return_types(oc, Tuple{}) == Any[Float64]
 end
@@ -4435,7 +4546,7 @@ let x = Tuple{Int,Any}[
         #=19=# (0, Expr(:pop_exception, Core.SSAValue(2)))
         #=20=# (0, Core.ReturnNode(Core.SlotNumber(3)))
     ]
-    handler_at, handlers = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
+    (;handler_at, handlers) = Core.Compiler.compute_trycatch(last.(x))
     @test map(x->x[1] == 0 ? 0 : handlers[x[1]].enter_idx, handler_at) == first.(x)
 end
 
@@ -4485,8 +4596,10 @@ let
                # Vararg
         #=va=# Bound, unbound,         # => Tuple{Integer,Integer} (invalid `TypeVar` widened beforehand)
         } where Bound<:Integer
-    argtypes = Core.Compiler.most_general_argtypes(method, specTypes, true)
+    argtypes = Core.Compiler.most_general_argtypes(method, specTypes)
     popfirst!(argtypes)
+    # N.B.: `argtypes` do not have va processing applied yet
+    @test length(argtypes) == 12
     @test argtypes[1] == Integer
     @test argtypes[2] == Integer
     @test argtypes[3] == Type{Bound} where Bound<:Integer
@@ -4497,7 +4610,8 @@ let
     @test argtypes[8] == Any
     @test argtypes[9] == Union{Nothing,Bound} where Bound<:Integer
     @test argtypes[10] == Any
-    @test argtypes[11] == Tuple{Integer,Integer}
+    @test argtypes[11] == Integer
+    @test argtypes[12] == Integer
 end
 
 # make sure not to call `widenconst` on `TypeofVararg` objects
@@ -4629,32 +4743,80 @@ end
 
 # issue #43784
 @testset "issue #43784" begin
-    init = Base.ImmutableDict{Any,Any}()
-    a = Const(init)
-    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, Any])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-
-    init = Base.ImmutableDict{Number,Number}()
-    a = Const(init)
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c === typeof(init)
-
-    a = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c.fields[2] === Any # or Number
-    @test c.fields[3] === ComplexF64
-
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-    @test c.fields[2] === Complex
-    @test c.fields[3] === Complex
+    ⊑ = Core.Compiler.partialorder(Core.Compiler.fallback_lattice)
+    ⊔ = Core.Compiler.join(Core.Compiler.fallback_lattice)
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+
+    let init = Base.ImmutableDict{Any,Any}()
+        a = Const(init)
+        b = PartialStruct(typeof(init), Any[Const(init), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Any,Any}(1,2)
+        a = Const(init)
+        b = PartialStruct(typeof(init), Any[Const(getfield(init,1)), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 3
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = Const(init)
+        b = PartialStruct(typeof(init), Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(typeof(init), Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Number
+        @test c.fields[3] === ComplexF64
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Complex
+        @test c.fields[3] === Complex
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(T, Any[T])
+        b = PartialStruct(T, Any[T, Number, Number])
+        @test b ⊑ a
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 1
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(T, Any[T])
+        b = Const(T())
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = PartialStruct(T, Any[T])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = Const(T(1,2))
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
 
     global const ginit43784 = Base.ImmutableDict{Any,Any}()
     @test Base.return_types() do
@@ -4688,6 +4850,31 @@ end
     @test a == Tuple
 end
 
+let ⊑ = Core.Compiler.partialorder(Core.Compiler.fallback_lattice)
+    ⊔ = Core.Compiler.join(Core.Compiler.fallback_lattice)
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+
+    @test  (Const((1,2)) ⊑ PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]))
+    @test !(Const((1,2)) ⊑ PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test !(Const((1,2,3)) ⊑ PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]))
+    @test  (Const((1,2,3)) ⊑ PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test  (Const((1,2)) ⊑ PartialStruct(Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]))
+    @test  (Const((1,2)) ⊑ PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}])) broken=true
+    @test  (Const((1,2,3)) ⊑ PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]))
+    @test !(PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2,3)))
+
+    t = Const((false, false)) ⊔ Const((false, true))
+    @test t isa PartialStruct && length(t.fields) == 2 && t.fields[1] === Const(false)
+    t = t ⊔ Const((false, false, 0))
+    @test t ⊑ Union{Tuple{Bool,Bool},Tuple{Bool,Bool,Int}}
+end
+
 # Test that a function-wise `@max_methods` works as expected
 Base.Experimental.@max_methods 1 function f_max_methods end
 f_max_methods(x::Int) = 1
@@ -4938,13 +5125,13 @@ g() = empty_nt_values(Base.inferencebarrier(Tuple{}))
 # to terminate the call.
 @newinterp RecurseInterpreter
 let CC = Core.Compiler
-    function CC.const_prop_entry_heuristic(interp::RecurseInterpreter, result::CC.MethodCallResult,
-                                           si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
+    function CC.const_prop_rettype_heuristic(interp::RecurseInterpreter, result::CC.MethodCallResult,
+                                             si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
         if result.rt isa CC.LimitedAccuracy
             return force # allow forced constprop to recurse into unresolved cycles
         end
-        return @invoke CC.const_prop_entry_heuristic(interp::CC.AbstractInterpreter, result::CC.MethodCallResult,
-                                                     si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
+        return @invoke CC.const_prop_rettype_heuristic(interp::CC.AbstractInterpreter, result::CC.MethodCallResult,
+                                                       si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
     end
 end
 Base.@constprop :aggressive type_level_recurse1(x...) = x[1] == 2 ? 1 : (length(x) > 100 ? x : type_level_recurse2(x[1] + 1, x..., x...))
@@ -5286,6 +5473,15 @@ end
 foo51090(b) = return bar51090(b)
 @test !fully_eliminated(foo51090, (Int,))
 
+Base.@assume_effects :terminates_globally @noinline function bar51090_terminates(b)
+    b == 0 && return
+    r = foo51090_terminates(b - 1)
+    Base.donotdelete(b)
+    return r
+end
+foo51090_terminates(b) = return bar51090_terminates(b)
+@test !fully_eliminated(foo51090_terminates, (Int,))
+
 # exploit throwness from concrete eval for intrinsics
 @test Base.return_types() do
     Base.or_int(true, 1)
@@ -5600,6 +5796,13 @@ end |> only === Float64
 @test Base.infer_exception_type(c::Missing -> c ? 1 : 2) == TypeError
 @test Base.infer_exception_type(c::Any -> c ? 1 : 2) == TypeError
 
+# exception type inference for `:new`
+struct NewExctInference
+    a::Int
+    @eval NewExctInference(a) = $(Expr(:new, :NewExctInference, :a))
+end
+@test Base.infer_exception_type(NewExctInference, (Float64,)) == TypeError
+
 # semi-concrete interpretation accuracy
 # https://github.com/JuliaLang/julia/issues/50037
 @inline countvars50037(bitflags::Int, var::Int) = bitflags >> 0
@@ -5627,3 +5830,314 @@ end
 
 # Issue #52613
 @test (code_typed((Any,)) do x; TypeVar(x...); end)[1][2] === TypeVar
+
+# https://github.com/JuliaLang/julia/issues/53590
+func53590(b) = b ? Int : Float64
+function issue53590(b1, b2)
+    T1 = func53590(b1)
+    T2 = func53590(b2)
+    return typejoin(T1, T2)
+end
+@test issue53590(true, true) == Int
+@test issue53590(true, false) == Real
+@test issue53590(false, false) == Float64
+@test issue53590(false, true) == Real
+
+# Expr(:throw_undef_if_not) handling
+@eval function has_tuin()
+    $(Expr(:throw_undef_if_not, :x, false))
+end
+@test Core.Compiler.return_type(has_tuin, Tuple{}) === Union{}
+@test_throws UndefVarError has_tuin()
+
+function gen_tuin_from_arg(world::UInt, source, _, _)
+    ci = make_codeinfo(Any[
+        Expr(:throw_undef_if_not, :x, Core.Argument(2)),
+        ReturnNode(true),
+    ]; slottypes=Any[Any, Bool])
+    ci.slotnames = Symbol[:var"#self#", :def]
+    ci.nargs = 2
+    ci.isva = false
+    ci
+end
+
+@eval function has_tuin2(def)
+    $(Expr(:meta, :generated, gen_tuin_from_arg))
+    $(Expr(:meta, :generated_only))
+end
+@test_throws UndefVarError has_tuin2(false)
+@test has_tuin2(true)
+
+# issue #53585
+let t = ntuple(i -> i % 8 == 1 ? Int64 : Float64, 4000)
+    @test only(Base.return_types(Base.promote_typeof, t)) == Type{Float64}
+    @test only(Base.return_types(vcat, t)) == Vector{Float64}
+end
+
+# Infinite loop in inference on SSA assignment
+const stop_infinite_loop::Base.Threads.Atomic{Bool} = Base.Threads.Atomic{Bool}(false)
+function gen_infinite_loop_ssa_generator(world::UInt, source, _)
+    ci = make_codeinfo(Any[
+        # Block 1
+        (),
+        # Block 2
+        PhiNode(Int32[1, 5], Any[SSAValue(1), SSAValue(3)]),
+        Expr(:call, tuple, SSAValue(2)),
+        Expr(:call, getindex, GlobalRef(@__MODULE__, :stop_infinite_loop)),
+        GotoIfNot(SSAValue(4), 2),
+        # Block 3
+        ReturnNode(SSAValue(2))
+    ]; slottypes=Any[Any])
+    ci.slotnames = Symbol[:var"#self#"]
+    ci.nargs = 1
+    ci.isva = false
+    ci
+end
+
+@eval function gen_infinite_loop_ssa()
+    $(Expr(:meta, :generated, gen_infinite_loop_ssa_generator))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+
+# We want to make sure that both this returns `Tuple` and that
+# it doesn't infinite loop inside inference.
+@test Core.Compiler.return_type(gen_infinite_loop_ssa, Tuple{}) === Tuple
+
+# inference local cache lookup with extended lattice elements that may be transformed
+# by `matching_cache_argtypes`
+@newinterp CachedConditionalInterp
+Base.@constprop :aggressive function func_cached_conditional(x, y)
+    if x
+        @noinline sin(y)
+    else
+        0.0
+    end
+end;
+function test_func_cached_conditional(y)
+    y₁ = func_cached_conditional(isa(y, Float64), y)
+    y₂ = func_cached_conditional(isa(y, Float64), y)
+    return y₁, y₂
+end;
+let interp = CachedConditionalInterp();
+    @test Base.infer_return_type(test_func_cached_conditional, (Any,); interp) == Tuple{Float64, Float64}
+    @test count(interp.inf_cache) do result
+        result.linfo.def.name === :func_cached_conditional
+    end == 1
+end
+
+# fieldcount on `Tuple` should constant fold, even though `.fields` not const
+@test fully_eliminated(Base.fieldcount, Tuple{Type{Tuple{Nothing, Int, Int}}})
+
+# Vararg-constprop regression from MutableArithmetics (#54341)
+global SIDE_EFFECT54341::Int
+function foo54341(a, b, c, d, args...)
+    # Side effect to force constprop rather than semi-concrete
+    global SIDE_EFFECT54341 = a + b + c + d
+    return SIDE_EFFECT54341
+end
+bar54341(args...) = foo54341(4, args...)
+
+@test Core.Compiler.return_type(bar54341, Tuple{Vararg{Int}}) === Int
+
+# `PartialStruct` for partially initialized structs:
+struct PartiallyInitialized1
+    a; b; c
+    PartiallyInitialized1(a) = (@nospecialize; new(a))
+    PartiallyInitialized1(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized1(a, b, c) = (@nospecialize; new(a, b, c))
+end
+mutable struct PartiallyInitialized2
+    a; b; c
+    PartiallyInitialized2(a) = (@nospecialize; new(a))
+    PartiallyInitialized2(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized2(a, b, c) = (@nospecialize; new(a, b, c))
+end
+
+# 1. isdefined modeling for partial struct
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((PartiallyInitialized1,)) do x
+    @assert isdefined(x, :a)
+    return Val(isdefined(x, :c))
+end == Val
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized1(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    s = PartiallyInitialized2(a, b)
+    s.c = c
+    Val(isdefined(s, :c))
+end >: Val{true}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized2(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 1))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 2))
+end == Val
+
+# 2. getfield modeling for partial struct
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized1(a, b), :b)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized1(a, b), f, #=boundscheck=#false)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized1(a, b, c), :c)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized1(a, b, c), f, #=boundscheck=#false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized2(a, b), :b)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized2(a, b), f, #=boundscheck=#false)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized2(a, b, c), :c)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized2(a, b, c), f, #=boundscheck=#false)
+end |> Core.Compiler.is_nothrow
+
+# isdefined-Conditionals
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isdefined(x, :x)
+        return getfield(x, :x)
+    end
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isassigned(x)
+        return x[]
+    end
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, c
+    x = PartiallyInitialized2(a)
+    x.c = c
+    if isdefined(x, :c)
+        return x.b
+    end
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((PartiallyInitialized2,); optimize=false) do x
+    if isdefined(x, :b)
+        if isdefined(x, :c)
+            return x.c
+        end
+        return x.b
+    end
+    return nothing
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Bool,Int,); optimize=false) do c, b
+    x = c ? PartiallyInitialized1(true) : PartiallyInitialized1(true, b)
+    if isdefined(x, :b)
+        return Val(x.a), x.b
+    end
+    return nothing
+end |> Core.Compiler.is_nothrow
+
+# refine `undef` information from `@isdefined` check
+function isdefined_nothrow(c, x)
+    local val
+    if c
+        val = x
+    end
+    if @isdefined val
+        return val
+    end
+    return zero(Int)
+end
+@test Core.Compiler.is_nothrow(Base.infer_effects(isdefined_nothrow, (Bool,Int)))
+@test !any(first(only(code_typed(isdefined_nothrow, (Bool,Int)))).code) do @nospecialize x
+    Meta.isexpr(x, :throw_undef_if_not)
+end
+
+# End to end test case for the partially initialized struct with `PartialStruct`
+@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
+@test fully_eliminated() do
+    broadcast_noescape1(Ref("x"))
+end
+
+# InterConditional rt with Vararg argtypes
+fcondvarargs(a, b, c, d) = isa(d, Int64)
+gcondvarargs(a, x...) = return fcondvarargs(a, x...) ? isa(a, Int64) : !isa(a, Int64)
+@test Core.Compiler.return_type(gcondvarargs, Tuple{Vararg{Any}}) === Bool
+
+# JuliaLang/julia#55627: argtypes check in `abstract_call_opaque_closure`
+issue55627_make_oc() = Base.Experimental.@opaque (x::Int) -> 2x
+@test Base.infer_return_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end == Union{}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    f = issue55627_make_oc()
+    return f(1), f(xs...)
+end == Tuple{Int,Int}
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end >: MethodError
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f('1')
+end >: TypeError
+
+# `exct` modeling for opaque closure
+oc_exct_1() = Base.Experimental.@opaque (x) -> x < 0 ? throw(x) : x
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_1()(x)
+end == Int
+oc_exct_2() = Base.Experimental.@opaque Tuple{Number}->Number (x) -> '1'
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_2()(x)
+end == TypeError
+
+# nothrow modeling for `invoke` calls
+f_invoke_nothrow(::Number) = :number
+f_invoke_nothrow(::Int) = :int
+@test Base.infer_effects((Int,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Char,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Union{Nothing,Int},)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Core.Compiler.is_nothrow
+
+# `exct` modeling for `invoke` calls
+f_invoke_exct(x::Number) = x < 0 ? throw(x) : x
+f_invoke_exct(x::Int) = x
+@test Base.infer_exception_type((Int,)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Int
+@test Base.infer_exception_type() do
+    @invoke f_invoke_exct(42::Number)
+end == Union{}
+@test Base.infer_exception_type((Union{Nothing,Int},)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Union{Int,TypeError}
+@test Base.infer_exception_type((Int,)) do x
+    invoke(f_invoke_exct, Number, x)
+end == TypeError
+@test Base.infer_exception_type((Char,)) do x
+    invoke(f_invoke_exct, Tuple{Number}, x)
+end == TypeError
+
+@test Base.infer_exception_type((Vector{Any},)) do args
+    Core.throw_methoderror(args...)
+end == Union{MethodError,ArgumentError}
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 3e48d91ba880d..a8b5fd66dcd0d 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -1818,23 +1818,55 @@ let src = code_typed1((AtomicMemoryRef{Int},)) do a
     @test count(isinvokemodify(:+), src.code) == 1
 end
 
-
-
 # apply `ssa_inlining_pass` multiple times
-let interp = Core.Compiler.NativeInterpreter()
+func_mul_int(a::Int, b::Int) = Core.Intrinsics.mul_int(a, b)
+multi_inlining1(a::Int, b::Int) = @noinline func_mul_int(a, b)
+let i::Int, continue_::Bool
+    interp = Core.Compiler.NativeInterpreter()
     # check if callsite `@noinline` annotation works
-    ir, = Base.code_ircode((Int,Int); optimize_until="inlining", interp) do a, b
-        @noinline a*b
-    end |> only
-    i = findfirst(isinvoke(:*), ir.stmts.stmt)
+    ir, = only(Base.code_ircode(multi_inlining1, (Int,Int); optimize_until="inlining", interp))
+    i = findfirst(isinvoke(:func_mul_int), ir.stmts.stmt)
     @test i !== nothing
-
-    # ok, now delete the callsite flag, and see the second inlining pass can inline the call
+    # now delete the callsite flag, and see the second inlining pass can inline the call
+    @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
+    inlining = Core.Compiler.InliningState(interp)
+    ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
+    @test findfirst(isinvoke(:func_mul_int), ir.stmts.stmt) === nothing
+    @test (i = findfirst(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt)) !== nothing
+    lins = Base.IRShow.buildLineInfoNode(ir.debuginfo, nothing, i)
+    @test (continue_ = length(lins) == 2) # :multi_inlining1 -> :func_mul_int
+    if continue_
+        def1 = lins[1].method
+        @test def1 isa Core.MethodInstance && def1.def.name === :multi_inlining1
+        def2 = lins[2].method
+        @test def2 isa Core.MethodInstance && def2.def.name === :func_mul_int
+    end
+end
+
+call_func_mul_int(a::Int, b::Int) = @noinline func_mul_int(a, b)
+multi_inlining2(a::Int, b::Int) = call_func_mul_int(a, b)
+let i::Int, continue_::Bool
+    interp = Core.Compiler.NativeInterpreter()
+    # check if callsite `@noinline` annotation works
+    ir, = only(Base.code_ircode(multi_inlining2, (Int,Int); optimize_until="inlining", interp))
+    i = findfirst(isinvoke(:func_mul_int), ir.stmts.stmt)
+    @test i !== nothing
+    # now delete the callsite flag, and see the second inlining pass can inline the call
     @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
     inlining = Core.Compiler.InliningState(interp)
     ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
-    @test count(isinvoke(:*), ir.stmts.stmt) == 0
-    @test count(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt) == 1
+    @test findfirst(isinvoke(:func_mul_int), ir.stmts.stmt) === nothing
+    @test (i = findfirst(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt)) !== nothing
+    lins = Base.IRShow.buildLineInfoNode(ir.debuginfo, nothing, i)
+    @test_broken (continue_ = length(lins) == 3) # see TODO in `ir_inline_linetable!`
+    if continue_
+        def1 = lins[1].method
+        @test def1 isa Core.MethodInstance && def1.def.name === :multi_inlining2
+        def2 = lins[2].method
+        @test def2 isa Core.MethodInstance && def2.def.name === :call_func_mul_int
+        def3 = lins[3].method
+        @test def3 isa Core.MethodInstance && def3.def.name === :call_func_mul_int
+    end
 end
 
 # Test special purpose inliner for Core.ifelse
@@ -2159,3 +2191,26 @@ end
 @test !Core.Compiler.is_nothrow(Base.infer_effects(issue53062, (Bool,)))
 @test issue53062(false) == -1
 @test_throws MethodError issue53062(true)
+
+struct Issue52644
+    tuple::Type{<:Tuple}
+end
+issue52644(::DataType) = :DataType
+issue52644(::UnionAll) = :UnionAll
+let ir = Base.code_ircode((Issue52644,); optimize_until="Inlining") do t
+        issue52644(t.tuple)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    irfunc = Core.OpaqueClosure(ir)
+    @test irfunc(Issue52644(Tuple{})) === :DataType
+    @test irfunc(Issue52644(Tuple{<:Integer})) === :UnionAll
+end
+issue52644_single(x::DataType) = :DataType
+let ir = Base.code_ircode((Issue52644,); optimize_until="Inlining") do t
+        issue52644_single(t.tuple)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    irfunc = Core.OpaqueClosure(ir)
+    @test irfunc(Issue52644(Tuple{})) === :DataType
+    @test_throws MethodError irfunc(Issue52644(Tuple{<:Integer}))
+end
diff --git a/test/compiler/interpreter_exec.jl b/test/compiler/interpreter_exec.jl
index b3aca0cfe2903..f00bc92c7443d 100644
--- a/test/compiler/interpreter_exec.jl
+++ b/test/compiler/interpreter_exec.jl
@@ -22,7 +22,7 @@ let m = Meta.@lower 1 + 1
     nstmts = length(src.code)
     src.ssavaluetypes = nstmts
     src.ssaflags = fill(UInt8(0x00), nstmts)
-    src.codelocs = fill(Int32(1), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
     Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
     global test29262 = true
     @test :a === @eval $m
@@ -62,7 +62,7 @@ let m = Meta.@lower 1 + 1
     nstmts = length(src.code)
     src.ssavaluetypes = nstmts
     src.ssaflags = fill(UInt8(0x00), nstmts)
-    src.codelocs = fill(Int32(1), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
     m.args[1] = copy(src)
     Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
     global test29262 = true
@@ -81,7 +81,7 @@ let m = Meta.@lower 1 + 1
         QuoteNode(:b),
         GlobalRef(@__MODULE__, :test29262),
         # block 2
-        EnterNode(11),
+        EnterNode(12),
         # block 3
         UpsilonNode(),
         UpsilonNode(),
@@ -91,19 +91,22 @@ let m = Meta.@lower 1 + 1
         UpsilonNode(SSAValue(1)),
         # block 5
         Expr(:throw_undef_if_not, :expected, false),
+        ReturnNode(), # unreachable
         # block 6
         PhiCNode(Any[SSAValue(5), SSAValue(7), SSAValue(9)]), # NULL, :a, :b
         PhiCNode(Any[SSAValue(6)]), # NULL
+        Expr(:pop_exception, SSAValue(4)),
         # block 7
-        ReturnNode(SSAValue(11)),
+        ReturnNode(SSAValue(12)),
     ]
     nstmts = length(src.code)
     src.ssavaluetypes = nstmts
     src.ssaflags = fill(UInt8(0x00), nstmts)
-    src.codelocs = fill(Int32(1), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
     Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
     global test29262 = true
     @test :a === @eval $m
     global test29262 = false
     @test :b === @eval $m
+    @test isempty(current_exceptions())
 end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 9345c2e26db33..281317ac25bf8 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -29,7 +29,7 @@ let code = Any[
         ReturnNode(Core.SSAValue(10)),
     ]
     ir = make_ircode(code)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
+    domtree = Core.Compiler.construct_domtree(ir)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
     phi = ir.stmts.stmt[3]
@@ -47,7 +47,7 @@ let code = Any[]
     push!(code, Expr(:call, :opaque))
     push!(code, ReturnNode(nothing))
     ir = make_ircode(code)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
+    domtree = Core.Compiler.construct_domtree(ir)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
 end
@@ -802,9 +802,9 @@ function each_stmt_a_bb(stmts, preds, succs)
     ir = IRCode()
     empty!(ir.stmts.stmt)
     append!(ir.stmts.stmt, stmts)
-    empty!(ir.stmts.type); append!(ir.stmts.type, [Nothing for _ = 1:length(stmts)])
+    empty!(ir.stmts.type); append!(ir.stmts.type, [Any for _ = 1:length(stmts)])
     empty!(ir.stmts.flag); append!(ir.stmts.flag, [0x0 for _ = 1:length(stmts)])
-    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:length(stmts)])
+    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:3length(stmts)])
     empty!(ir.stmts.info); append!(ir.stmts.info, [NoCallInfo() for _ = 1:length(stmts)])
     empty!(ir.cfg.blocks); append!(ir.cfg.blocks, [BasicBlock(StmtRange(i, i), preds[i], succs[i]) for i = 1:length(stmts)])
     empty!(ir.cfg.index);  append!(ir.cfg.index,  [i for i = 2:length(stmts)])
@@ -1801,3 +1801,172 @@ let n = 1000
     end
 end
 @test f_1000_blocks() == 0
+
+# https://github.com/JuliaLang/julia/issues/53521
+# Incorrect scope counting in :leave
+using Base.ScopedValues
+function f53521()
+    VALUE = ScopedValue(1)
+    @with VALUE => 2 begin
+        for i = 1
+            @with VALUE => 3 begin
+                try
+                    foo()
+                catch
+                    nothing
+                end
+            end
+        end
+    end
+end
+@test code_typed(f53521)[1][2] === Nothing
+
+# Test that adce_pass! sets Refined on PhiNode values
+let code = Any[
+    # Basic Block 1
+    GotoIfNot(false, 3)
+    # Basic Block 2
+    nothing
+    # Basic Block 3
+    PhiNode(Int32[1, 2], Any[1.0, 1])
+    ReturnNode(Core.SSAValue(3))
+]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Nothing, Union{Int64, Float64}, Any])
+    (ir, made_changes) = Core.Compiler.adce_pass!(ir)
+    @test made_changes
+    @test (ir[Core.SSAValue(length(ir.stmts))][:flag] & Core.Compiler.IR_FLAG_REFINED) != 0
+end
+
+# JuliaLang/julia#52991: statements that may not :terminate should not be deleted
+@noinline Base.@assume_effects :effect_free :nothrow function issue52991(n)
+    local s = 0
+    try
+        while true
+            yield()
+            if n - rand(1:10) > 0
+                s += 1
+            else
+                break
+            end
+        end
+    catch
+    end
+    return s
+end
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(issue52991, (Int,)))
+let src = code_typed1((Int,)) do x
+        issue52991(x)
+        nothing
+    end
+    @test count(isinvoke(:issue52991), src.code) == 1
+end
+let t = @async begin
+        issue52991(11) # this call never terminates
+        nothing
+    end
+    sleep(1)
+    if istaskdone(t)
+        ok = false
+    else
+        ok = true
+        schedule(t, InterruptException(); error=true)
+    end
+    @test ok
+end
+
+# JuliaLang/julia47664
+@test !fully_eliminated() do
+    any(isone, Iterators.repeated(0))
+end
+@test !fully_eliminated() do
+    all(iszero, Iterators.repeated(0))
+end
+
+## Test that cfg_simplify respects implicit `unreachable` terminators
+let code = Any[
+        # block 1
+        GotoIfNot(Core.Argument(2), 4),
+        # block 2
+        Expr(:call, Base.throw, "error"), # an implicit `unreachable` terminator
+        # block 3
+        Expr(:call, :opaque),
+        # block 4
+        ReturnNode(nothing),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Union{}, Any, Union{}])
+
+    # Unfortunately `compute_basic_blocks` does not notice the `throw()` so it gives us
+    # a slightly imprecise CFG. Instead manually construct the CFG we need for this test:
+    empty!(ir.cfg.blocks)
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(1,1), [], [2,4]))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(2,2), [1], []))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(3,3), [], []))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(4,4), [1], []))
+    empty!(ir.cfg.index)
+    append!(ir.cfg.index, Int[2,3,4])
+    ir.stmts.stmt[1] = GotoIfNot(Core.Argument(2), 4)
+
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 3 # should have removed block 3
+end
+
+let code = Any[
+        # block 1
+        EnterNode(4, 1),
+        # block 2
+        GotoNode(3), # will be turned into nothing
+        # block 3
+        GotoNode(5),
+        # block 4
+        ReturnNode(),
+        # block 5
+        Expr(:leave, SSAValue(1)),
+        # block 6
+        GotoIfNot(Core.Argument(1), 8),
+        # block 7
+        ReturnNode(1),
+        # block 8
+        ReturnNode(2),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Any, Any, Any, Any, Union{}, Union{}])
+    @test length(ir.cfg.blocks) == 8
+    Core.Compiler.verify_ir(ir)
+
+    # Union typed deletion marker in basic block 2
+    Core.Compiler.setindex!(ir, nothing, SSAValue(2))
+
+    # Test cfg_simplify
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 6
+    gotoifnot = Core.Compiler.last(ir.cfg.blocks[3].stmts)
+    inst = ir[SSAValue(gotoifnot)]
+    @test isa(inst[:stmt], GotoIfNot)
+    # Make sure we didn't accidentally schedule the unreachable block as
+    # fallthrough
+    @test isdefined(ir[SSAValue(gotoifnot+1)][:inst]::ReturnNode, :val)
+end
+
+# https://github.com/JuliaLang/julia/issues/54596
+# finalized object's uses have no postdominator
+let f = (x)->nothing, mi = Base.method_instance(f, (Base.RefValue{Nothing},)), code = Any[
+   # Basic Block 1
+   Expr(:new, Base.RefValue{Nothing}, nothing)
+   Expr(:call, Core.finalizer, f, SSAValue(1), true, mi)
+   GotoIfNot(false, 6)
+   # Basic Block 2
+   Expr(:call, Base.getfield, SSAValue(1), :x)
+   ReturnNode(SSAValue(4))
+   # Basic Block 3
+   Expr(:call, Base.getfield, SSAValue(1), :x)
+   ReturnNode(SSAValue(6))
+]
+   ir = make_ircode(code; ssavaluetypes=Any[Base.RefValue{Nothing}, Nothing, Any, Nothing, Any, Nothing, Any])
+   inlining = Core.Compiler.InliningState(Core.Compiler.NativeInterpreter())
+   Core.Compiler.verify_ir(ir)
+   ir = Core.Compiler.sroa_pass!(ir, inlining)
+   Core.Compiler.verify_ir(ir)
+end
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
index 788d7bbc721ee..c11444d8daabc 100644
--- a/test/compiler/irutils.jl
+++ b/test/compiler/irutils.jl
@@ -62,7 +62,8 @@ let m = Meta.@lower 1 + 1
     orig_src = m.args[1]::CodeInfo
     global function make_codeinfo(code::Vector{Any};
                                   ssavaluetypes::Union{Nothing,Vector{Any}}=nothing,
-                                  slottypes::Union{Nothing,Vector{Any}}=nothing)
+                                  slottypes::Union{Nothing,Vector{Any}}=nothing,
+                                  slotnames::Union{Nothing,Vector{Symbol}}=nothing)
         src = copy(orig_src)
         src.code = code
         nstmts = length(src.code)
@@ -71,20 +72,27 @@ let m = Meta.@lower 1 + 1
         else
             src.ssavaluetypes = ssavaluetypes
         end
-        src.codelocs = fill(one(Int32), nstmts)
+        src.debuginfo = Core.DebugInfo(:none)
         src.ssaflags = fill(zero(UInt32), nstmts)
         if slottypes !== nothing
             src.slottypes = slottypes
             src.slotflags = fill(zero(UInt8), length(slottypes))
         end
+        if slotnames !== nothing
+            src.slotnames = slotnames
+        end
         return src
     end
     global function make_ircode(code::Vector{Any};
-                                ssavaluetypes::Union{Nothing,Vector{Any}}=nothing,
                                 slottypes::Union{Nothing,Vector{Any}}=nothing,
-                                verify::Bool=true)
-        src = make_codeinfo(code; ssavaluetypes, slottypes)
-        ir = Core.Compiler.inflate_ir(src)
+                                verify::Bool=true,
+                                kwargs...)
+        src = make_codeinfo(code; slottypes, kwargs...)
+        if slottypes !== nothing
+            ir = Core.Compiler.inflate_ir(src, slottypes)
+        else
+            ir = Core.Compiler.inflate_ir(src)
+        end
         verify && Core.Compiler.verify_ir(ir)
         return ir
     end
diff --git a/test/compiler/newinterp.jl b/test/compiler/newinterp.jl
index 74a2b29cabc28..d86a1831def79 100644
--- a/test/compiler/newinterp.jl
+++ b/test/compiler/newinterp.jl
@@ -3,29 +3,49 @@
 # TODO set up a version who defines new interpreter with persistent cache?
 
 """
-    @newinterp NewInterpreter
+    @newinterp NewInterpreter [ephemeral_cache::Bool=false]
 
 Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
 from the native code cache, satisfying the minimum interface requirements.
+
+When the `ephemeral_cache=true` option is specified, `NewInterpreter` will hold
+`CodeInstance` in an ephemeral non-integrated cache, rather than in the integrated
+`Core.Compiler.InternalCodeCache`.
+Keep in mind that ephemeral cache lacks support for invalidation and doesn't persist across
+sessions. However it is an usual Julia object of the type `code_cache::IdDict{MethodInstance,CodeInstance}`,
+making it easier for debugging and inspecting the compiler behavior.
 """
-macro newinterp(InterpName)
-    cache_token = QuoteNode(gensym(string(InterpName, "Cache")))
+macro newinterp(InterpName, ephemeral_cache::Bool=false)
+    cache_token = QuoteNode(gensym(string(InterpName, "CacheToken")))
+    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
     InterpName = esc(InterpName)
     C = Core
     CC = Core.Compiler
     quote
+        $(ephemeral_cache && quote
+        struct $InterpCacheName
+            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
+        end
+        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
+        end)
         struct $InterpName <: $CC.AbstractInterpreter
             meta # additional information
             world::UInt
             inf_params::$CC.InferenceParams
             opt_params::$CC.OptimizationParams
             inf_cache::Vector{$CC.InferenceResult}
+            $(ephemeral_cache && :(code_cache::$InterpCacheName))
             function $InterpName(meta = nothing;
                                  world::UInt = Base.get_world_counter(),
                                  inf_params::$CC.InferenceParams = $CC.InferenceParams(),
                                  opt_params::$CC.OptimizationParams = $CC.OptimizationParams(),
-                                 inf_cache::Vector{$CC.InferenceResult} = $CC.InferenceResult[])
-                return new(meta, world, inf_params, opt_params, inf_cache)
+                                 inf_cache::Vector{$CC.InferenceResult} = $CC.InferenceResult[],
+                                 $(ephemeral_cache ?
+                                    Expr(:kw, :(code_cache::$InterpCacheName), :($InterpCacheName())) :
+                                    Expr(:kw, :_, :nothing)))
+                return $(ephemeral_cache ?
+                    :(new(meta, world, inf_params, opt_params, inf_cache, code_cache)) :
+                    :(new(meta, world, inf_params, opt_params, inf_cache)))
             end
         end
         $CC.InferenceParams(interp::$InterpName) = interp.inf_params
@@ -33,5 +53,12 @@ macro newinterp(InterpName)
         $CC.get_inference_world(interp::$InterpName) = interp.world
         $CC.get_inference_cache(interp::$InterpName) = interp.inf_cache
         $CC.cache_owner(::$InterpName) = $cache_token
+        $(ephemeral_cache && quote
+        $CC.code_cache(interp::$InterpName) = $CC.WorldView(interp.code_cache, $CC.WorldRange(interp.world))
+        $CC.get(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance, default) = get(wvc.cache.dict, mi, default)
+        $CC.getindex(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = getindex(wvc.cache.dict, mi)
+        $CC.haskey(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = haskey(wvc.cache.dict, mi)
+        $CC.setindex!(wvc::$CC.WorldView{$InterpCacheName}, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+        end)
     end
 end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 0a53dec80f732..b7d75d0be5567 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -9,16 +9,6 @@ include("irutils.jl")
 
 make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
 
-function make_ci(code)
-    ci = (Meta.@lower 1 + 1).args[1]
-    ci.code = code
-    nstmts = length(ci.code)
-    ci.ssavaluetypes = nstmts
-    ci.codelocs = fill(Int32(1), nstmts)
-    ci.ssaflags = fill(Int32(0), nstmts)
-    return ci
-end
-
 # TODO: this test is broken
 #let code = Any[
 #        GotoIfNot(SlotNumber(2), 4),
@@ -38,7 +28,6 @@ end
 #        false, false, false, false
 #    ))
 #
-#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), Int32(0), UInt32(0))
 #    Compiler.run_passes(ci, 1, [NullLineInfo])
 #    # XXX: missing @test
 #end
@@ -73,8 +62,8 @@ let cfg = CFG(BasicBlock[
     @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
     let correct_idoms = Compiler.naive_idoms(cfg.blocks),
         correct_pidoms = Compiler.naive_idoms(cfg.blocks, true)
-        @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
-        @test Compiler.construct_postdomtree(cfg.blocks).idoms_bb == correct_pidoms
+        @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms
+        @test Compiler.construct_postdomtree(cfg).idoms_bb == correct_pidoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -85,8 +74,8 @@ let cfg = CFG(BasicBlock[
                 c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
                 d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
                 cfg′ = CFG(blocks, cfg.index)
-                @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms
-                @test Compiler.construct_postdomtree(cfg′.blocks).idoms_bb == correct_pidoms
+                @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms
+                @test Compiler.construct_postdomtree(cfg′).idoms_bb == correct_pidoms
             end
         end
     end
@@ -113,8 +102,9 @@ let cfg = CFG(BasicBlock[
     make_bb([0, 1, 2] , [5]   ), # 0 predecessor should be preserved
     make_bb([2, 3]    , []    ),
 ], Int[])
-    insts = Compiler.InstructionStream([], [], Any[], Int32[], UInt8[])
-    ir = Compiler.IRCode(insts, cfg, Core.LineInfoNode[], Any[], Expr[], Compiler.VarState[])
+    insts = Compiler.InstructionStream([], [], Core.Compiler.CallInfo[], Int32[], UInt32[])
+    di = Compiler.DebugInfoStream(insts.line)
+    ir = Compiler.IRCode(insts, cfg, di, Any[], Expr[], Compiler.VarState[])
     compact = Compiler.IncrementalCompact(ir, true)
     @test length(compact.cfg_transform.result_bbs) == 4 && 0 in compact.cfg_transform.result_bbs[3].preds
 end
@@ -229,42 +219,97 @@ let code = Any[
     ]
     ir = make_ircode(code; verify=false)
     ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) == nothing
+    @test Core.Compiler.verify_ir(ir) === nothing
 end
 
 # issue #37919
-let ci = code_lowered(()->@isdefined(_not_def_37919_), ())[1]
+let ci = only(code_lowered(()->@isdefined(_not_def_37919_), ()))
     ir = Core.Compiler.inflate_ir(ci)
     @test Core.Compiler.verify_ir(ir) === nothing
 end
 
 let code = Any[
         # block 1
-        GotoIfNot(Argument(2), 4),
+        GotoIfNot(Argument(2), 4)
+        # block 2
+        Expr(:call, throw, "potential throw")
+        ReturnNode() # unreachable
+        # block 3
+        ReturnNode(Argument(3))
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int])
+    visited = BitSet()
+    @test !Core.Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(false, 1) == 1
+    @test_throws "potential throw" oc(true, 1)
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 3)
         # block 2
-        GotoNode(3),
+        ReturnNode(Argument(3))
         # block 3
-        Expr(:call, throw, "potential throw"),
+        Expr(:call, throw, "potential throw")
+        ReturnNode() # unreachable
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int])
+    visited = BitSet()
+    @test !Core.Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(true, 1) == 1
+    @test_throws "potential throw" oc(false, 1)
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 5)
+        # block 2
+        GotoNode(3)
+        # block 3
+        Expr(:call, throw, "potential throw")
+        ReturnNode()
         # block 4
-        Expr(:call, Core.Intrinsics.add_int, Argument(3), Argument(4)),
-        GotoNode(6),
+        Expr(:call, Core.Intrinsics.add_int, Argument(3), Argument(4))
+        GotoNode(7)
         # block 5
-        ReturnNode(SSAValue(4))
+        ReturnNode(SSAValue(5))
     ]
     ir = make_ircode(code; slottypes=Any[Any,Bool,Int,Int])
-    lazypostdomtree = Core.Compiler.LazyPostDomtree(ir)
     visited = BitSet()
-    @test !Core.Compiler.visit_conditional_successors(lazypostdomtree, ir, #=bb=#1) do succ::Int
+    @test !Core.Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
         push!(visited, succ)
         return false
     end
     @test 2 ∈ visited
     @test 3 ∈ visited
-    @test 4 ∉ visited
-    @test 5 ∉ visited
+    @test 4 ∈ visited
+    @test 5 ∈ visited
     oc = Core.OpaqueClosure(ir)
     @test oc(false, 1, 1) == 2
     @test_throws "potential throw" oc(true, 1, 1)
+
+    let buf = IOBuffer()
+        oc = Core.OpaqueClosure(ir; slotnames=Symbol[:ocfunc, :x, :y, :z])
+        try
+            oc(true, 1, 1)
+        catch
+            Base.show_backtrace(buf, catch_backtrace())
+        end
+        s = String(take!(buf))
+        @test occursin("(x::Bool, y::$Int, z::$Int)", s)
+    end
 end
 
 # Test dynamic update of domtree with edge insertions and deletions in the
@@ -292,7 +337,7 @@ let cfg = CFG(BasicBlock[
         make_bb([2, 6], []),
         make_bb([4],    [5, 3]),
     ], Int[])
-    domtree = Compiler.construct_domtree(cfg.blocks)
+    domtree = Compiler.construct_domtree(cfg)
     @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
     @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
@@ -446,7 +491,7 @@ let
 
     # this isn't valid code, we just care about looking at a variety of IR nodes
     body = Any[
-        Expr(:enter, 11),
+        EnterNode(11),
         Expr(:call, :+, SSAValue(3), 1),
         Expr(:throw_undef_if_not, :expected, false),
         Expr(:leave, Core.SSAValue(1)),
@@ -486,7 +531,7 @@ let ir = Base.code_ircode((Bool,Any)) do c, x
         end
     end
     # domination analysis
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
+    domtree = Core.Compiler.construct_domtree(ir)
     @test Core.Compiler.dominates(domtree, 1, 2)
     @test Core.Compiler.dominates(domtree, 1, 3)
     @test Core.Compiler.dominates(domtree, 1, 4)
@@ -497,7 +542,7 @@ let ir = Base.code_ircode((Bool,Any)) do c, x
         end
     end
     # post domination analysis
-    post_domtree = Core.Compiler.construct_postdomtree(ir.cfg.blocks)
+    post_domtree = Core.Compiler.construct_postdomtree(ir)
     @test Core.Compiler.postdominates(post_domtree, 4, 1)
     @test Core.Compiler.postdominates(post_domtree, 4, 2)
     @test Core.Compiler.postdominates(post_domtree, 4, 3)
@@ -544,27 +589,6 @@ end
     @test show(devnull, ir) === nothing
 end
 
-@testset "IncrementalCompact statefulness" begin
-    foo(i) = i == 1 ? 1 : 2
-    ir = only(Base.code_ircode(foo, (Int,)))[1]
-    compact = Core.Compiler.IncrementalCompact(ir)
-
-    # set up first iterator
-    x = Core.Compiler.iterate(compact)
-    x = Core.Compiler.iterate(compact, x[2])
-
-    # set up second iterator
-    x = Core.Compiler.iterate(compact)
-
-    # consume remainder
-    while x !== nothing
-        x = Core.Compiler.iterate(compact, x[2])
-    end
-
-    ir = Core.Compiler.complete(compact)
-    @test Core.Compiler.verify_ir(ir) === nothing
-end
-
 # insert_node! operations
 # =======================
 
@@ -708,3 +732,88 @@ end
         end
     end
 end
+
+# Test that things don't break if one branch of the frontend PhiNode becomes unreachable
+const global_error_switch_const1::Bool = false
+function gen_unreachable_phinode_edge1(world::UInt, source, args...)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch_const1),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, identity, Argument(3)),
+        # block 3
+        PhiNode(Int32[2, 3], Any[Argument(2), SSAValue(3)]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any,Int,Int])
+    ci.slotnames = Symbol[:var"#self#", :x, :y]
+    ci.nargs = 3
+    ci.isva = false
+    return ci
+end
+@eval function f_unreachable_phinode_edge1(x, y)
+    $(Expr(:meta, :generated, gen_unreachable_phinode_edge1))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+@test f_unreachable_phinode_edge1(1, 2) == 1
+
+const global_error_switch_const2::Bool = true
+function gen_unreachable_phinode_edge2(world::UInt, source, args...)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch_const2),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, identity, Argument(3)),
+        # block 3
+        PhiNode(Int32[2, 3], Any[Argument(2), SSAValue(3)]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any,Int,Int])
+    ci.slotnames = Symbol[:var"#self#", :x, :y]
+    ci.nargs = 3
+    ci.isva = false
+    return ci
+end
+@eval function f_unreachable_phinode_edge2(x, y)
+    $(Expr(:meta, :generated, gen_unreachable_phinode_edge2))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+@test f_unreachable_phinode_edge2(1, 2) == 2
+
+global global_error_switch::Bool = true
+function gen_must_throw_phinode_edge(world::UInt, source, _)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, error, "This error is expected"),
+        # block 3
+        PhiNode(Int32[2, 3], Any[1, 2]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any])
+    ci.slotnames = Symbol[:var"#self#"]
+    ci.nargs = 1
+    ci.isva = false
+    return ci
+end
+@eval function f_must_throw_phinode_edge()
+    $(Expr(:meta, :generated, gen_must_throw_phinode_edge))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+let ir = first(only(Base.code_ircode(f_must_throw_phinode_edge)))
+    @test !any(@nospecialize(x)->isa(x,PhiNode), ir.stmts.stmt)
+end
+@test_throws ErrorException f_must_throw_phinode_edge()
+global global_error_switch = false
+@test f_must_throw_phinode_edge() == 1
+
+# Test roundtrip of debuginfo compression
+let cl = Int32[32, 1, 1, 1000, 240, 230]
+    str = ccall(:jl_compress_codelocs, Any, (Int32, Any, Int), 378, cl, 2)::String;
+    cl2 = ccall(:jl_uncompress_codelocs, Any, (Any, Int), str, 2)
+    @test cl == cl2
+end
diff --git a/test/compiler/tarjan.jl b/test/compiler/tarjan.jl
index 44acafb2ca5f4..11c6b68e58b1b 100644
--- a/test/compiler/tarjan.jl
+++ b/test/compiler/tarjan.jl
@@ -102,7 +102,7 @@ function test_reachability(V, E; deletions = 2E ÷ 3, all_checks=false)
     end
 
     cfg = rand_cfg(V, E)
-    domtree = Core.Compiler.construct_domtree(cfg.blocks)
+    domtree = Core.Compiler.construct_domtree(cfg)
     reachability = CFGReachability(cfg, domtree)
     check_reachability(reachability, cfg, domtree, all_checks)
 
diff --git a/test/complex.jl b/test/complex.jl
index d798cfe16489c..63304652ee7d8 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -1215,3 +1215,9 @@ end
     @test !iseven(7+0im) && isodd(7+0im)
     @test !iseven(6+1im) && !isodd(7+1im)
 end
+
+@testset "issue #55266" begin
+    for T in (Float16, Float32, Float64)
+        @test isapprox(atanh(1+im*floatmin(T)), Complex{T}(atanh(1+im*big(floatmin(T)))))
+    end
+end
diff --git a/test/copy.jl b/test/copy.jl
index 11cfbb9f14d76..559bf5d3e757a 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -49,6 +49,15 @@ chnlprod(x) = Channel(c->for i in x; put!(c,i); end)
 
         @test_throws Union{BoundsError, ArgumentError} copyto!(dest, 1, src(), 2, 2)
     end
+
+    v = rand(Float32, 4)
+    a = Memory{Float32}(v)
+    b = similar(a)
+    copyto!(b, a)
+    @test a == b
+
+    c = Memory{Float32}(undef, 3)
+    @test_throws BoundsError copyto!(c, a)
 end
 
 @testset "with CartesianIndices" begin
@@ -189,7 +198,7 @@ end
         bar = Bar19921(foo, Dict(foo => 3))
         bar2 = deepcopy(bar)
         @test bar2.foo ∈ keys(bar2.fooDict)
-        @test bar2.fooDict[bar2.foo] != nothing
+        @test bar2.fooDict[bar2.foo] !== nothing
     end
 
     let d = IdDict(rand(2) => rand(2) for i = 1:100)
@@ -248,6 +257,22 @@ end
     @test (@inferred Base.deepcopy_internal(zeros(), IdDict())) == zeros()
 end
 
+@testset "deepcopy_internal inference" begin
+    @inferred Base.deepcopy_internal(1, IdDict())
+    @inferred Base.deepcopy_internal(1.0, IdDict())
+    @inferred Base.deepcopy_internal(big(1), IdDict())
+    @inferred Base.deepcopy_internal(big(1.0), IdDict())
+    @inferred Base.deepcopy_internal('a', IdDict())
+    @inferred Base.deepcopy_internal("abc", IdDict())
+    @inferred Base.deepcopy_internal([1,2,3], IdDict())
+
+    # structs without custom deepcopy_internal method
+    struct Immutable2; x::Int; end
+    mutable struct Mutable2; x::Int; end
+    @inferred Base.deepcopy_internal(Immutable2(1), IdDict())
+    @inferred Base.deepcopy_internal(Mutable2(1), IdDict())
+end
+
 @testset "`copyto!`'s unaliasing" begin
     a = view([1:3;], :)
     @test copyto!(a, 2, a, 1, 2) == [1;1:2;]
@@ -257,6 +282,8 @@ end
 
 @testset "`deepcopy` a `GenericCondition`" begin
     a = Base.GenericCondition(ReentrantLock())
+    # Test printing
+    @test repr(a) == "Base.GenericCondition(ReentrantLock())"
     @test !islocked(a.lock)
     lock(a.lock)
     @test islocked(a.lock)
@@ -269,4 +296,6 @@ end
     @test a.lock !== b.lock
     @test islocked(a.lock)
     @test !islocked(b.lock)
+    @inferred deepcopy(a)
+    @inferred deepcopy(a.lock)
 end
diff --git a/test/core.jl b/test/core.jl
index aa16380c8a866..4db7f0e401fa0 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -14,7 +14,7 @@ include("testenv.jl")
 # sanity tests that our built-in types are marked correctly for const fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:def, :owner, :rettype, :exctype, :rettype_const, :ipo_purity_bits, :analysis_results]),
+        (Core.CodeInstance, [:def, :owner, :rettype, :exctype, :rettype_const, :analysis_results]),
         (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :is_for_opaque_closure, :constprop=#]),
         (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals=#]),
         (Core.MethodTable, [:module]),
@@ -32,9 +32,9 @@ end
 # sanity tests that our built-in types are marked correctly for atomic fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :purity_bits, :invoke, :specptr, :specsigflags, :precompile]),
+        (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :specsigflags, :precompile]),
         (Core.Method, [:primary_world, :deleted_world]),
-        (Core.MethodInstance, [:uninferred, :cache, :precompiled]),
+        (Core.MethodInstance, [:cache, :precompiled]),
         (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]),
         (Core.TypeMapEntry, [:next, :min_world, :max_world]),
         (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]),
@@ -42,6 +42,7 @@ for (T, c) in (
         (DataType, [:types, :layout]),
         (Core.Memory, []),
         (Core.GenericMemoryRef, []),
+        (Task, [:_state])
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c)
 end
@@ -1924,9 +1925,9 @@ end
 
 # issue #4526
 f4526(x) = isa(x.a, Nothing)
-@test_throws ErrorException f4526(1)
-@test_throws ErrorException f4526(im)
-@test_throws ErrorException f4526(1+2im)
+@test_throws FieldError f4526(1)
+@test_throws FieldError f4526(im)
+@test_throws FieldError f4526(1+2im)
 
 # issue #4528
 function f4528(A, B)
@@ -3999,6 +4000,14 @@ end
 end
 @test f13432b(true) == true
 @test f13432b(false) == false
+@noinline function f13432c(x)
+    offset = x ? Base.Bottom : 1
+    # Barrier for inference, so the optimizer cannot optimize this,
+    # but codegen can still see this is a constant
+    return ===(offset, Base.inferencebarrier(Base.Bottom))
+end
+@test f13432c(true) == true
+@test f13432c(false) == false
 
 #13433, read!(::IO, a::Vector{UInt8}) should return a
 mutable struct IO13433 <: IO end
@@ -4298,13 +4307,13 @@ end
 abstract type abstest_14825 end
 
 mutable struct t1_14825{A <: abstest_14825, B}
-  x::A
-  y::B
+    x::A
+    y::B
 end
 
 mutable struct t2_14825{C, B} <: abstest_14825
-  x::C
-  y::t1_14825{t2_14825{C, B}, B}
+    x::C
+    y::t1_14825{t2_14825{C, B}, B}
 end
 
 @test t2_14825{Int,Int}.types[2] <: t1_14825
@@ -5602,6 +5611,26 @@ end
     x::Array{T} where T<:Integer
 end
 
+# issue #54757, type redefinitions with recursive reference in supertype
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+@test_throws ErrorException struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+
 let a = Vector{Core.TypeofBottom}(undef, 2)
     @test a[1] == Union{}
     @test a == [Union{}, Union{}]
@@ -6248,6 +6277,16 @@ let
     @test_throws ArgumentError unsafe_wrap(Array, convert(Ptr{Union{Int, Nothing}}, pointer(A5)), 6)
 end
 
+# More unsafe_wrap
+let
+    a = [1, 2, 3]
+    GC.@preserve a begin
+        m = unsafe_wrap(Memory{Int}, pointer(a), (3,))
+        @test m == a
+        @test m isa Memory{Int}
+    end
+end
+
 # copyto!
 A23567 = Vector{Union{Float64, Nothing}}(undef, 5)
 B23567 = collect(Union{Float64, Nothing}, 1.0:3.0)
@@ -6776,14 +6815,14 @@ primitive type TypeWith24Bits 24 end
 TypeWith24Bits(x::UInt32) = Core.Intrinsics.trunc_int(TypeWith24Bits, x)
 let x = TypeWith24Bits(0x112233), y = TypeWith24Bits(0x445566), z = TypeWith24Bits(0x778899)
     a = [x, x]
-    Core.memoryrefset!(Core.memoryref(a.ref, 2, true), y, :not_atomic, true)
+    Core.memoryrefset!(Core.memoryrefnew(a.ref, 2, true), y, :not_atomic, true)
     @test a == [x, y]
     a[2] = z
     @test a == [x, z]
     @test pointer(a, 2) - pointer(a, 1) == 4
 
     b = [(x, x), (x, x)]
-    Core.memoryrefset!(Core.memoryref(b.ref, 2, true), (x, y), :not_atomic, true)
+    Core.memoryrefset!(Core.memoryrefnew(b.ref, 2, true), (x, y), :not_atomic, true)
     @test b == [(x, x), (x, y)]
     b[2] = (y, z)
     @test b == [(x, x), (y, z)]
@@ -7012,7 +7051,7 @@ translate27368(::Type{Val{name}}) where {name} =
 # issue #27456
 @inline foo27456() = try baz_nonexistent27456(); catch; nothing; end
 bar27456() = foo27456()
-@test bar27456() == nothing
+@test bar27456() === nothing
 
 # issue #27365
 mutable struct foo27365
@@ -7210,6 +7249,20 @@ end
 @test_throws ArgumentError Array{Int, 2}(undef, -10, 0)
 @test_throws ArgumentError Array{Int, 2}(undef, -1, -1)
 
+# issue #54244
+# test that zero sized array doesn't throw even with large axes
+bignum = Int==Int64 ? 2^32 : 2^16
+Array{Int}(undef, 0, bignum, bignum)
+Array{Int}(undef, bignum, bignum, 0)
+Array{Int}(undef, bignum, bignum, 0, bignum, bignum)
+# but also test that it does throw if the axes multiply to a multiple of typemax(UInt)
+@test_throws ArgumentError Array{Int}(undef, bignum, bignum)
+@test_throws ArgumentError Array{Int}(undef, 1, bignum, bignum)
+# also test that we always throw erros for negative dims even if other dims are 0 or the product is positive
+@test_throws ArgumentError Array{Int}(undef, 0, -4, -4)
+@test_throws ArgumentError Array{Int}(undef, -4, 1, 0)
+@test_throws ArgumentError Array{Int}(undef, -4, -4, 1)
+
 # issue #28812
 @test Tuple{Vararg{Array{T} where T,3}} === Tuple{Array,Array,Array}
 
@@ -7463,6 +7516,13 @@ struct A43411{S, T}
 end
 @test isbitstype(A43411{(:a,), Tuple{Int}})
 
+# issue #55189
+struct A55189{N}
+    children::NTuple{N,A55189{N}}
+end
+@test fieldtype(A55189{2}, 1) === Tuple{A55189{2}, A55189{2}}
+@assert !isbitstype(A55189{2})
+
 # issue #44614
 struct T44614_1{T}
     m::T
@@ -7533,7 +7593,7 @@ end
 # issue #31696
 foo31696(x::Int8, y::Int8) = 1
 foo31696(x::T, y::T) where {T <: Int8} = 2
-@test length(methods(foo31696)) == 1
+@test length(methods(foo31696)) == 2
 let T1 = Tuple{Int8}, T2 = Tuple{T} where T<:Int8, a = T1[(1,)], b = T2[(1,)]
     b .= a
     @test b[1] == (1,)
@@ -7726,13 +7786,17 @@ struct ContainsPointerNopadding{T}
 end
 
 @test !Base.datatype_haspadding(PointerNopadding{Symbol})
+@test Base.datatype_isbitsegal(PointerNopadding{Int})
 @test !Base.datatype_haspadding(PointerNopadding{Int})
+@test Base.datatype_isbitsegal(PointerNopadding{Int})
 # Sanity check to make sure the meaning of haspadding didn't change.
-@test Base.datatype_haspadding(PointerNopadding{Any})
+@test !Base.datatype_haspadding(PointerNopadding{Any})
+@test !Base.datatype_isbitsegal(PointerNopadding{Any})
 @test !Base.datatype_haspadding(Tuple{PointerNopadding{Symbol}})
 @test !Base.datatype_haspadding(Tuple{PointerNopadding{Int}})
 @test !Base.datatype_haspadding(ContainsPointerNopadding{Symbol})
-@test Base.datatype_haspadding(ContainsPointerNopadding{Int})
+@test !Base.datatype_haspadding(ContainsPointerNopadding{Int})
+@test !Base.datatype_isbitsegal(ContainsPointerNopadding{Int})
 
 # Test the codegen optimized version as well as the unoptimized version of `jl_egal`
 @noinline unopt_jl_egal(@nospecialize(a), @nospecialize(b)) =
@@ -8063,10 +8127,6 @@ end
 @test Core.Compiler.is_foldable(Base.infer_effects(length, (Core.SimpleVector,)))
 @test Core.Compiler.is_foldable(Base.infer_effects(getindex, (Core.SimpleVector,Int)))
 
-let lin = Core.LineInfoNode(Base, first(methods(convert)), :foo, Int32(5), Int32(0))
-    @test convert(LineNumberNode, lin) == LineNumberNode(5, :foo)
-end
-
 # Test that a nothrow-globalref doesn't get outlined during lowering
 module WellKnownGlobal
     global well_known = 1
@@ -8118,12 +8178,113 @@ end
 @test_broken Int isa Union{Union, Type{Union{Int,T1}} where {T1}}
 
 let M = @__MODULE__
-    @test Core.set_binding_type!(M, :a_typed_global, Tuple{Union{Integer,Nothing}}) === nothing
+    Core.eval(M, :(global a_typed_global))
+    @test Core.eval(M, :(global a_typed_global::$(Tuple{Union{Integer,Nothing}}))) === nothing
     @test Core.get_binding_type(M, :a_typed_global) === Tuple{Union{Integer,Nothing}}
-    @test Core.set_binding_type!(M, :a_typed_global, Tuple{Union{Integer,Nothing}}) === nothing
-    @test Core.set_binding_type!(M, :a_typed_global, Union{Tuple{Integer},Tuple{Nothing}}) === nothing
+    @test Core.eval(M, :(global a_typed_global::$(Tuple{Union{Integer,Nothing}}))) === nothing
+    @test Core.eval(M, :(global a_typed_global::$(Union{Tuple{Integer},Tuple{Nothing}}))) === nothing
     @test_throws(ErrorException("cannot set type for global $(nameof(M)).a_typed_global. It already has a value or is already set to a different type."),
-                 Core.set_binding_type!(M, :a_typed_global, Union{Nothing,Tuple{Union{Integer,Nothing}}}))
-    @test Core.set_binding_type!(M, :a_typed_global) === nothing
-    @test Core.get_binding_type(M, :a_typed_global) === Tuple{Union{Integer,Nothing}}
+                 Core.eval(M, :(global a_typed_global::$(Union{Nothing,Tuple{Union{Integer,Nothing}}}))))
+    @test Core.eval(M, :(global a_typed_global)) === nothing
+    @test Core.get_binding_type(M, :a_typed_global) == Tuple{Union{Integer,Nothing}}
+end
+
+@test Base.unsafe_convert(Ptr{Int}, [1]) !== C_NULL
+
+# Test that new macros are allowed to be defined inside Expr(:toplevel) returned by macros
+macro macroception()
+    Expr(:toplevel, :(macro foo() 1 end), :(@foo))
+end
+
+@test (@macroception()) === 1
+
+# overlay method tables
+# =====================
+
+module OverlayModule
+
+using Base.Experimental: @MethodTable, @overlay
+
+@MethodTable mt
+# long function def
+@overlay mt function sin(x::Float64)
+    1
+end
+# short function def
+@overlay mt cos(x::Float64) = 2
+# parametric function def
+@overlay mt tan(x::T) where {T} = 3
+
+end # module OverlayModule
+
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
+    @test only(ms).method.module === Base.Math
+end
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
+    @test only(ms).method.module === OverlayModule
+end
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
+    @test isempty(ms)
 end
+
+# precompilation
+let load_path = mktempdir()
+    depot_path = mktempdir()
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, depot_path)
+
+        write(joinpath(load_path, "Foo.jl"),
+            """
+            module Foo
+            Base.Experimental.@MethodTable(mt)
+            Base.Experimental.@overlay mt sin(x::Int) = 1
+            end
+            """)
+
+        # precompiling Foo serializes the overlay method through the `mt` binding in the module
+        Foo = Base.require(Main, :Foo)
+        @test length(Foo.mt) == 1
+
+        write(joinpath(load_path, "Bar.jl"),
+            """
+            module Bar
+            Base.Experimental.@MethodTable(mt)
+            end
+            """)
+
+        write(joinpath(load_path, "Baz.jl"),
+            """
+            module Baz
+            using Bar
+            Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
+            end
+            """)
+
+        # when referring an method table in another module,
+        # the overlay method needs to be discovered explicitly
+        Bar = Base.require(Main, :Bar)
+        @test length(Bar.mt) == 0
+        Baz = Base.require(Main, :Baz)
+        @test length(Bar.mt) == 1
+    finally
+        filter!((≠)(load_path), LOAD_PATH)
+        filter!((≠)(depot_path), DEPOT_PATH)
+        rm(load_path, recursive=true, force=true)
+        try
+            rm(depot_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+    end
+end
+
+# merging va tuple unions
+@test Tuple === Union{Tuple{},Tuple{Any,Vararg}}
+@test Tuple{Any,Vararg} === Union{Tuple{Any},Tuple{Any,Any,Vararg}}
+@test Core.Compiler.return_type(Base.front, Tuple{Tuple{Int,Vararg{Int}}}) === Tuple{Vararg{Int}}
+@test Tuple{Vararg{Int}} === Union{Tuple{Int}, Tuple{}, Tuple{Int, Int, Vararg{Int}}}
+@test (Tuple{Vararg{T}} where T) === (Union{Tuple{T, T, Vararg{T}}, Tuple{}, Tuple{T}} where T)
+@test_broken (Tuple{Vararg{T}} where T) === Union{Tuple{T, T, Vararg{T}} where T, Tuple{}, Tuple{T} where T}
+
+@test sizeof(Pair{Union{typeof(Union{}),Nothing}, Union{Type{Union{}},Nothing}}(Union{}, Union{})) == 2
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 778e70aecd406..b8cd3716cad2e 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -140,9 +140,9 @@ end
     end
     @test length(logger.logs) == 1
     record = logger.logs[1]
-    @test record._module == nothing
-    @test record.file == nothing
-    @test record.line == nothing
+    @test record._module === nothing
+    @test record.file === nothing
+    @test record.line === nothing
 end
 
 # PR #28209
diff --git a/test/dict.jl b/test/dict.jl
index ca8a598de0b81..13c60d5a6a053 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -8,7 +8,7 @@ using Random
     @test isequal(p,10=>20)
     @test iterate(p)[1] == 10
     @test iterate(p, iterate(p)[2])[1] == 20
-    @test iterate(p, iterate(p, iterate(p)[2])[2]) == nothing
+    @test iterate(p, iterate(p, iterate(p)[2])[2]) === nothing
     @test firstindex(p) == 1
     @test lastindex(p) == length(p) == 2
     @test Base.indexed_iterate(p, 1, nothing) == (10,2)
@@ -683,9 +683,9 @@ end
     @inferred setindex!(d, -1, 10)
     @test d[10] == -1
     @test 1 == @inferred d[1]
-    @test get(d, -111, nothing) == nothing
+    @test get(d, -111, nothing) === nothing
     @test 1 == @inferred get(d, 1, 1)
-    @test pop!(d, -111, nothing) == nothing
+    @test pop!(d, -111, nothing) === nothing
     @test 1 == @inferred pop!(d, 1)
 
     # get! and delete!
@@ -1510,9 +1510,9 @@ end
 for T in (Int, Float64, String, Symbol)
     @testset let T=T
         @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-        @test_broken Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
         @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-        @test_broken Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
     end
 end
 
diff --git a/test/docs.jl b/test/docs.jl
index 36893b8614170..f62f7f8b63b2c 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -2,6 +2,9 @@
 
 import Base.Docs: meta, @var, DocStr, parsedoc
 
+# check that @doc can work before REPL is loaded
+@test !startswith(read(`$(Base.julia_cmd()) -E '@doc sin'`, String), "nothing")
+
 using Markdown
 using REPL
 
@@ -120,7 +123,7 @@ module NoDocStrings end
 # General tests for docstrings.
 
 const LINE_NUMBER = @__LINE__() + 1
-"DocsTest"
+"DocsTest, evaluating $(K)"     # test that module docstring is evaluated within module
 module DocsTest
 
 using Markdown
@@ -265,7 +268,7 @@ fnospecialize(@nospecialize(x::AbstractArray)) = 2
 end
 
 let md = meta(DocsTest)[@var(DocsTest)]
-    @test docstrings_equal(md.docs[Union{}], doc"DocsTest")
+    @test docstrings_equal(md.docs[Union{}], doc"DocsTest, evaluating K")
     # Check that plain docstrings store a module reference.
     # https://github.com/JuliaLang/julia/pull/13017#issuecomment-138618663
     @test md.docs[Union{}].data[:module] == DocsTest
@@ -1562,3 +1565,15 @@ Base.@ccallable c51586_long()::Int = 3
     @test_broken isempty(undoc)
     @test undoc == [Symbol("@var")]
 end
+
+# Docing the macroception macro
+macro docmacroception()
+    Expr(:toplevel, macroexpand(__module__, :(@Base.__doc__ macro docmacrofoo() 1 end); recursive=false), :(@docmacrofoo))
+end
+
+"""
+This docmacroception has a docstring
+"""
+@docmacroception()
+
+@test Docs.hasdoc(@__MODULE__, :var"@docmacrofoo")
diff --git a/test/error.jl b/test/error.jl
index 8cd9a07f6655e..8657c70720779 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -111,8 +111,8 @@ end
             for name in names(mod, all=true)
                 isdefined(mod, name) || continue
                 value = getfield(mod, name)
-
                 if value isa Module
+                    value === Main && continue
                     test_exceptions(value, visited)
                 elseif value isa Type
                     str = string(value)
@@ -127,3 +127,36 @@ end
     visited = test_exceptions(Base)
     test_exceptions(Core, visited)
 end
+
+# inference quality test for `error`
+@test Base.infer_return_type(error, (Any,)) === Union{}
+@test Base.infer_return_type(xs->error(xs...), (Vector{Any},)) === Union{}
+module Issue54029
+export raise54029
+Base.Experimental.@max_methods 1
+raise54029(x) = error(x)
+end
+using .Issue54029
+@test Base.infer_return_type(raise54029, (Any,)) === Union{}
+@test Base.infer_return_type(xs->raise54029(xs...), (Vector{Any},)) === Union{}
+
+@testset "CompositeException" begin
+    ce = CompositeException()
+    @test isempty(ce)
+    @test length(ce) == 0
+    @test eltype(ce) == Any
+    str = sprint(showerror, ce)
+    @test str == "CompositeException()\n"
+    push!(ce, ErrorException("something sad has happened"))
+    @test !isempty(ce)
+    @test length(ce) == 1
+    pushfirst!(ce, ErrorException("something sad has happened even earlier"))
+    @test length(ce) == 2
+    # test iterate
+    for ex in ce
+        @test ex isa ErrorException
+    end
+    push!(ce, ErrorException("something sad has happened yet again"))
+    str = sprint(showerror, ce)
+    @test str == "something sad has happened even earlier\n\n...and 2 more exceptions.\n"
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index d03de54aab496..a82ab7743dc5a 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -10,7 +10,7 @@ Base.Experimental.register_error_hint(Base.noncallable_number_hint_handler, Meth
 Base.Experimental.register_error_hint(Base.string_concatenation_hint_handler, MethodError)
 Base.Experimental.register_error_hint(Base.methods_on_iterable, MethodError)
 Base.Experimental.register_error_hint(Base.nonsetable_type_hint_handler, MethodError)
-
+Base.Experimental.register_error_hint(Base.fielderror_hint_handler, FieldError)
 
 @testset "SystemError" begin
     err = try; systemerror("reason", Cint(0)); false; catch ex; ex; end::SystemError
@@ -671,7 +671,7 @@ end
 
     str = sprint(Base.showerror, MethodError(Core.kwcall, ((; a=3.0), +, 1.0, 2.0), Base.get_world_counter()))
     @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64; a::Float64)")
-    @test occursin("This method may not support any keyword arguments", str)
+    @test occursin("This method does not support all of the given keyword arguments", str)
 
     @test_throws "MethodError: no method matching kwcall()" Core.kwcall()
 end
@@ -808,6 +808,44 @@ end
 @test_throws ArgumentError("invalid index: \"foo\" of type String") [1]["foo"]
 @test_throws ArgumentError("invalid index: nothing of type Nothing") [1][nothing]
 
+# issue #53618
+@testset "FieldErrorHint" begin
+    struct FieldFoo
+        a::Float32
+        b::Int
+    end
+
+    s = FieldFoo(1, 2)
+
+    test = @test_throws FieldError s.c
+
+    ex = test.value::FieldError
+
+    # Check error message first
+    errorMsg = sprint(Base.showerror, ex)
+    @test occursin("FieldError: type FieldFoo has no field c", errorMsg)
+
+    d = Dict(s => 1)
+
+    for fld in fieldnames(Dict)
+        ex = try
+            getfield(d, fld)
+        catch e
+            print(e)
+        end
+        @test !(ex isa Type) || ex <: FieldError
+    end
+    test = @test_throws FieldError d.c
+
+    ex = test.value::FieldError
+
+    errorMsg = sprint(Base.showerror, ex)
+    @test occursin("FieldError: type Dict has no field c", errorMsg)
+    # Check hint message
+    hintExpected = "Did you mean to access dict values using key: `:c` ? Consider using indexing syntax dict[:c]\n"
+    @test occursin(hintExpected, errorMsg)
+end
+
 # test showing MethodError with type argument
 struct NoMethodsDefinedHere; end
 let buf = IOBuffer()
@@ -1041,6 +1079,12 @@ let err_str
     @test occursin("String concatenation is performed with *", err_str)
 end
 
+# https://github.com/JuliaLang/julia/issues/55745
+let err_str
+    err_str = @except_str +() MethodError
+    @test !occursin("String concatenation is performed with *", err_str)
+end
+
 struct MissingLength; end
 struct MissingSize; end
 Base.IteratorSize(::Type{MissingSize}) = Base.HasShape{2}()
diff --git a/test/fastmath.jl b/test/fastmath.jl
index 34744f325ad7f..efca5b85c6642 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -1,7 +1,30 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using InteractiveUtils: code_llvm
 # fast math
 
+@testset "check fast present in LLVM" begin
+    for T in (Float16, Float32, Float64, ComplexF32, ComplexF64)
+        f(x) = @fastmath x + x + x
+        llvm = sprint(code_llvm, f, (T,))
+        @test occursin("fast", llvm)
+
+        g(x) = @fastmath x * x * x
+        llvm = sprint(code_llvm, g, (T,))
+        @test occursin("fast", llvm)
+    end
+
+    for T in (Float16, Float32, Float64)
+        f(x, y, z) = @fastmath min(x, y, z)
+        llvm = sprint(code_llvm, f, (T,T,T))
+        @test occursin("fast", llvm)
+
+        g(x, y, z) = @fastmath max(x, y, z)
+        llvm = sprint(code_llvm, g, (T,T,T))
+        @test occursin("fast", llvm)
+    end
+end
+
 @testset "check expansions" begin
     @test macroexpand(Main, :(@fastmath 1+2)) == :(Base.FastMath.add_fast(1,2))
     @test macroexpand(Main, :(@fastmath +)) == :(Base.FastMath.add_fast)
@@ -256,6 +279,28 @@ end
 
 @testset "literal powers" begin
     @test @fastmath(2^-2) == @fastmath(2.0^-2) == 0.25
+    # Issue #53817
+    # Note that exponent -2^63 fails testing because of issue #53881
+    # Therefore we test with -(2^63-1). For Int == Int32 there is an analogue restriction.
+    # See also PR #53860.
+    if Int == Int64
+        @test @fastmath(2^-9223372036854775807) === 0.0
+        @test_throws DomainError @fastmath(2^-9223372036854775809)
+        @test @fastmath(1^-9223372036854775807) isa Float64
+        @test @fastmath(1^-9223372036854775809) isa Int
+    elseif Int == Int32
+        @test @fastmath(2^-2147483647) === 0.0
+        @test_throws DomainError @fastmath(2^-2147483649)
+        @test @fastmath(1^-2147483647) isa Float64
+        @test @fastmath(1^-2147483649) isa Int
+    end
+    @test_throws MethodError @fastmath(^(2))
+end
+# issue #53857
+@testset "fast_pow" begin
+    n = Int64(2)^52
+    @test @fastmath (1 + 1 / n) ^ n ≈ ℯ
+    @test @fastmath (1 + 1 / n) ^ 4503599627370496 ≈ ℯ
 end
 
 @testset "sincos fall-backs" begin
@@ -298,3 +343,9 @@ end
     x = @fastmath maximum(Float16[1,2,3]; init = Float16(0))
     @test x == Float16(3)
 end
+
+@testset "Test promotion of >=3 arg fastmath" begin
+    # Bug caught in https://github.com/JuliaLang/julia/pull/54513#discussion_r1620553369
+    x = @fastmath 1. + 1. + 1f0
+    @test x == 3.0
+end
diff --git a/test/file.jl b/test/file.jl
index a5ab29a3bf8bc..de6d488056a02 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -442,8 +442,7 @@ end
                 for pth in ("afile",
                             joinpath("afile", "not_file"),
                             SubString(joinpath(dir, "afile")),
-                            Base.RawFD(-1),
-                            -1)
+                            Base.RawFD(-1))
                     test_stat_error(stat, pth)
                     test_stat_error(lstat, pth)
                 end
@@ -824,6 +823,303 @@ mktempdir() do tmpdir
     rm(b_tmpdir)
 end
 
+@testset "rename" begin
+    # some of the windows specific behavior may be fixed in new versions of julia
+    mktempdir() do dir
+        # see if can make symlinks
+        local can_symlink = try
+            symlink("foo", joinpath(dir, "link"))
+            rm(joinpath(dir, "link"))
+            true
+        catch
+            false
+        end
+        local f1 = joinpath(dir, "file1")
+        local f2 = joinpath(dir, "file2")
+        local d1 = joinpath(dir, "dir1")
+        local d2 = joinpath(dir, "dir2")
+        local subd1f1 = joinpath(d1, "file1")
+        local subd1f2 = joinpath(d1, "file2")
+        local subd2f1 = joinpath(d2, "file1")
+        local subd2f2 = joinpath(d2, "file2")
+        local h1 = joinpath(dir, "hlink1")
+        local h2 = joinpath(dir, "hlink2")
+        local s1 = joinpath(dir, "slink1")
+        local s2 = joinpath(dir, "slink2")
+        @testset "renaming to non existing newpath in same directory" begin
+            # file, make sure isexecutable is copied
+            for mode in (0o644, 0o755)
+                write(f1, b"data")
+                chmod(f1, mode)
+                Base.rename(f1, f2)
+                @test !isfile(f1)
+                @test isfile(f2)
+                @test read(f2) == b"data"
+                if mode == 0o644
+                    @test !isexecutable(f2)
+                else
+                    @test isexecutable(f2)
+                end
+                rm(f2)
+            end
+            # empty directory
+            mkdir(d1)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test isempty(readdir(d2))
+            rm(d2)
+            # non empty directory
+            mkdir(d1)
+            write(subd1f1, b"data")
+            chmod(subd1f1, 0o644)
+            write(subd1f2, b"exe")
+            chmod(subd1f2, 0o755)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test read(subd2f1) == b"data"
+            @test read(subd2f2) == b"exe"
+            @test !isexecutable(subd2f1)
+            @test isexecutable(subd2f2)
+            rm(d2; recursive=true)
+            # hardlink
+            write(f1, b"data")
+            hardlink(f1, h1)
+            Base.rename(h1, h2)
+            @test isfile(f1)
+            @test !isfile(h1)
+            @test isfile(h2)
+            @test read(h2) == b"data"
+            write(h2, b"data2")
+            @test read(f1) == b"data2"
+            rm(h2)
+            rm(f1)
+            # symlink
+            if can_symlink
+                symlink("foo", s1)
+                Base.rename(s1, s2)
+                @test !islink(s1)
+                @test islink(s2)
+                @test readlink(s2) == "foo"
+                rm(s2)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        # Get the error code from failed rename, or nothing if it worked
+        function rename_errorcodes(oldpath, newpath)
+            try
+                Base.rename(oldpath, newpath)
+                nothing
+            catch e
+                e.code
+            end
+        end
+        @testset "errors" begin
+            # invalid paths
+            @test_throws ArgumentError Base.rename(f1*"\0", "")
+            @test Base.UV_ENOENT == rename_errorcodes("", "")
+            write(f1, b"data")
+            @test Base.UV_ENOENT == rename_errorcodes(f1, "")
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes("", f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f2, f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f1, subd1f1)
+            @test read(f1) == b"data"
+            rm(f1)
+            # attempt to make a directory a subdirectory of itself
+            mkdir(d1)
+            if Sys.iswindows()
+                @test rename_errorcodes(d1, joinpath(d1, "subdir")) ∈ (Base.UV_EINVAL, Base.UV_EBUSY)
+            else
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(d1, "subdir"))
+            end
+            rm(d1)
+            # rename to child of a file
+            mkdir(d1)
+            write(f2, "foo")
+            if Sys.iswindows()
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            else
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            end
+            # replace a file with a directory
+            if !Sys.iswindows()
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, f2)
+            else
+                # this should work on windows
+                Base.rename(d1, f2)
+                @test isdir(f2)
+                @test !ispath(d1)
+            end
+            rm(f2; force=true)
+            rm(d1; force=true)
+            # symlink loop
+            if can_symlink
+                symlink(s1, s2)
+                symlink(s2, s1)
+                @test Base.UV_ELOOP == rename_errorcodes(joinpath(s1, "foo"), f2)
+                write(f2, b"data")
+                @test Base.UV_ELOOP == rename_errorcodes(f2, joinpath(s1, "foo"))
+                rm(s1)
+                rm(s2)
+                rm(f2)
+            end
+            # newpath is a nonempty directory
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f1, b"data")
+            write(f1, b"otherdata")
+            if Sys.iswindows()
+                @test Base.UV_EACCES == rename_errorcodes(f1, d1)
+                @test Base.UV_EACCES == rename_errorcodes(f1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(subd2f1, d2)
+            else
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d1)
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d2)
+                @test rename_errorcodes(d1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST)
+                @test rename_errorcodes(subd2f1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST, Base.UV_EISDIR)
+            end
+            rm(f1)
+            rm(d1)
+            rm(d2; recursive=true)
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        @testset "replacing existing file" begin
+            write(f2, b"olddata")
+            chmod(f2, 0o755)
+            write(f1, b"newdata")
+            chmod(f1, 0o644)
+            @test isexecutable(f2)
+            @test !isexecutable(f1)
+            Base.rename(f1, f2)
+            @test !ispath(f1)
+            @test read(f2) == b"newdata"
+            @test !isexecutable(f2)
+            rm(f2)
+        end
+
+        @testset "replacing file with itself" begin
+            write(f1, b"data")
+            Base.rename(f1, f1)
+            @test read(f1) == b"data"
+            hardlink(f1, h1)
+            Base.rename(f1, h1)
+            if Sys.iswindows()
+                # On Windows f1 gets deleted
+                @test !ispath(f1)
+            else
+                @test read(f1) == b"data"
+            end
+            @test read(h1) == b"data"
+            rm(h1)
+            rm(f1; force=true)
+        end
+
+        @testset "replacing existing file in different directories" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f2, b"olddata")
+            chmod(subd2f2, 0o755)
+            write(subd1f1, b"newdata")
+            chmod(subd1f1, 0o644)
+            @test isexecutable(subd2f2)
+            @test !isexecutable(subd1f1)
+            Base.rename(subd1f1, subd2f2)
+            @test !ispath(subd1f1)
+            @test read(subd2f2) == b"newdata"
+            @test !isexecutable(subd2f2)
+            @test isdir(d1)
+            @test isdir(d2)
+            rm(d1; recursive=true)
+            rm(d2; recursive=true)
+        end
+
+        @testset "rename with open files" begin
+            # both open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                open(f2) do handle2
+                    if Sys.iswindows()
+                        # currently this doesn't work on windows
+                        @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                    else
+                        Base.rename(f1, f2)
+                        @test !ispath(f1)
+                        @test read(f2) == b"newdata"
+                    end
+                    # rename doesn't break already opened files
+                    @test read(handle1) == b"newdata"
+                    @test read(handle2) == b"olddata"
+                end
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # oldpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle1) == b"newdata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # newpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f2) do handle2
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EACCES == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle2) == b"olddata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+        end
+
+        @testset "replacing empty directory with directory" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd1f1, b"data")
+            if Sys.iswindows()
+                # currently this doesn't work on windows
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                rm(d1; recursive=true)
+                rm(d2)
+            else
+                Base.rename(d1, d2)
+                @test isdir(d2)
+                @test read(subd2f1) == b"data"
+                @test !ispath(d1)
+                rm(d2; recursive=true)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+    end
+end
+
 # issue #10506 #10434
 ## Tests for directories and links to directories
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
@@ -1032,7 +1328,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=false)
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=true)
         # mv
-        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
+        @test_throws Base._UVError("rename($(repr(nonexisting_src)), $(repr(dst)))", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
     end
 end
 
@@ -1473,7 +1769,7 @@ rm(dir)
 
 
 ##################
-# Return values of mkpath, mkdir, cp, mv and touch
+# Return values of mkpath, mkdir, cp, mv, rename and touch
 ####################
 mktempdir() do dir
     name1 = joinpath(dir, "apples")
@@ -1490,8 +1786,11 @@ mktempdir() do dir
     @test cp(name2, name1) == name1
     @test isfile(name1)
     @test isfile(name2)
+    @test Base.rename(name1, name2) == name2
+    @test !ispath(name1)
+    @test isfile(name2)
     namedir = joinpath(dir, "chalk")
-    namepath = joinpath(dir, "chalk","cheese","fresh")
+    namepath = joinpath(dir, "chalk", "cheese", "fresh")
     @test !ispath(namedir)
     @test mkdir(namedir) == namedir
     @test isdir(namedir)
@@ -1500,7 +1799,12 @@ mktempdir() do dir
     @test isdir(namepath)
     @test mkpath(namepath) == namepath
     @test isdir(namepath)
+    # issue 54826
+    namepath_dirpath = joinpath(dir, "x", "y", "z", "")
+    @test mkpath(namepath_dirpath) == namepath_dirpath
 end
+@test mkpath("") == ""
+@test mkpath("/") == "/"
 
 # issue #30588
 @test realpath(".") == realpath(pwd())
@@ -1671,7 +1975,7 @@ else
     )
 end
 
-@testset "chmod/isexecutable/isreadable/iswriteable" begin
+@testset "chmod/isexecutable/isreadable/iswritable" begin
     mktempdir() do dir
         subdir = joinpath(dir, "subdir")
         fpath = joinpath(dir, "subdir", "foo")
@@ -1688,19 +1992,19 @@ end
         chmod(fpath, 0o644)
         @test !Sys.isexecutable(fpath)
         @test Sys.isreadable(fpath)
-        @test Sys.iswriteable(fpath) skip=Sys.iswindows()
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
         chmod(fpath, 0o755)
         @test Sys.isexecutable(fpath)
         @test Sys.isreadable(fpath)
-        @test Sys.iswriteable(fpath) skip=Sys.iswindows()
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
         chmod(fpath, 0o444)
         @test !Sys.isexecutable(fpath)
         @test Sys.isreadable(fpath)
-        @test !Sys.iswriteable(fpath)
+        @test !Sys.iswritable(fpath)
         chmod(fpath, 0o244)
         @test !Sys.isexecutable(fpath)
         @test !Sys.isreadable(fpath) skip=Sys.iswindows()
-        @test Sys.iswriteable(fpath) skip=Sys.iswindows()
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
 
         # Ensure that, on Windows, where inheritance is default,
         # chmod still behaves as we expect.
@@ -1708,7 +2012,7 @@ end
             chmod(subdir, 0o666)
             @test !Sys.isexecutable(fpath)
             @test Sys.isreadable(fpath)
-            @test_skip Sys.iswriteable(fpath)
+            @test_skip Sys.iswritable(fpath)
         end
 
         # Reset permissions to all at the end, so it can be deleted properly.
@@ -1749,8 +2053,18 @@ end
     @test s.blocks isa Int64
     @test s.mtime isa Float64
     @test s.ctime isa Float64
+
+    @test s === stat((f,))
+    @test s === lstat((f,))
+    @test s === stat(".", f)
+    @test s === lstat(".", f)
 end
 
+mutable struct URI50890; f::String; end
+Base.joinpath(x::URI50890) = URI50890(x.f)
+@test_throws "stat not implemented" stat(URI50890("."))
+@test_throws "lstat not implemented" lstat(URI50890("."))
+
 @testset "StatStruct show's extended details" begin
     f, io = mktemp()
     s = stat(f)
diff --git a/test/filesystem.jl b/test/filesystem.jl
index 566ca08ec910e..036a3dda30cca 100644
--- a/test/filesystem.jl
+++ b/test/filesystem.jl
@@ -2,47 +2,51 @@
 
 mktempdir() do dir
 
-  # Create test file
-  filename = joinpath(dir, "file.txt")
-  text = "123456"
-  write(filename, text)
-
-  # test filesystem truncate (shorten)
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.truncate(file, 2)
-  text = text[1:2]
-  @test length(read(file)) == 2
-  close(file)
-
-  # test filesystem truncate (lengthen)
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.truncate(file, 20)
-  @test length(read(file)) == 20
-  close(file)
-
-  # test filesystem futime
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.futime(file, 1.0, 2.0)
-  @test Base.Filesystem.stat(file).mtime == 2.0
-  close(file)
-
-  # test filesystem readbytes!
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  res = ones(UInt8, 80)
-  Base.Filesystem.readbytes!(file, res)
-  @test res == UInt8[text..., (i > 20 for i in (length(text) + 1):length(res))...]
-  close(file)
+    # Create test file
+    filename = joinpath(dir, "file.txt")
+    text = "123456"
+    write(filename, text)
+
+    # test filesystem truncate (shorten)
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.truncate(file, 2)
+    text = text[1:2]
+    @test length(read(file)) == 2
+    close(file)
+
+    # test filesystem truncate (lengthen)
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.truncate(file, 20)
+    @test length(read(file)) == 20
+    close(file)
+
+    # test filesystem futime
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.futime(file, 1.0, 2.0)
+    @test Base.Filesystem.stat(file).mtime == 2.0
+    close(file)
+
+    # test filesystem readbytes!
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    res = ones(UInt8, 80)
+    Base.Filesystem.readbytes!(file, res)
+    @test res == UInt8[text..., (i > 20 for i in (length(text) + 1):length(res))...]
+    close(file)
 
 end
 
 import Base.Filesystem: S_IRUSR, S_IRGRP, S_IROTH
 @testset "types of permission mask constants" begin
-  @test S_IRUSR & ~S_IRGRP == S_IRUSR
-  @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
+    @test S_IRUSR & ~S_IRGRP == S_IRUSR
+    @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
 end
 
 @testset "Base.Filesystem docstrings" begin
-  undoc = Docs.undocumented_names(Base.Filesystem)
-  @test_broken isempty(undoc)
-  @test undoc == [:File, :Filesystem, :cptree, :futime, :rename, :sendfile, :unlink]
+    undoc = Docs.undocumented_names(Base.Filesystem)
+    @test_broken isempty(undoc)
+    @test undoc == [:File, :Filesystem, :cptree, :futime, :sendfile, :unlink]
+end
+
+@testset "write return type" begin
+    @test Base.return_types(write, (Base.Filesystem.File, UInt8)) == [Int]
 end
diff --git a/test/functional.jl b/test/functional.jl
index 3436fb8911cc1..84c4098308ebd 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -235,3 +235,129 @@ end
 let (:)(a,b) = (i for i in Base.:(:)(1,10) if i%2==0)
     @test Int8[ i for i = 1:2 ] == [2,4,6,8,10]
 end
+
+@testset "Basic tests of Fix1, Fix2, and Fix" begin
+    function test_fix1(Fix1=Base.Fix1)
+        increment = Fix1(+, 1)
+        @test increment(5) == 6
+        @test increment(-1) == 0
+        @test increment(0) == 1
+        @test map(increment, [1, 2, 3]) == [2, 3, 4]
+
+        concat_with_hello = Fix1(*, "Hello ")
+        @test concat_with_hello("World!") == "Hello World!"
+        # Make sure inference is good:
+        @inferred concat_with_hello("World!")
+
+        one_divided_by = Fix1(/, 1)
+        @test one_divided_by(10) == 1/10.0
+        @test one_divided_by(-5) == 1/-5.0
+
+        return nothing
+    end
+
+    function test_fix2(Fix2=Base.Fix2)
+        return_second = Fix2((x, y) -> y, 999)
+        @test return_second(10) == 999
+        @inferred return_second(10)
+        @test return_second(-5) == 999
+
+        divide_by_two = Fix2(/, 2)
+        @test map(divide_by_two, (2, 4, 6)) == (1.0, 2.0, 3.0)
+        @inferred map(divide_by_two, (2, 4, 6))
+
+        concat_with_world = Fix2(*, " World!")
+        @test concat_with_world("Hello") == "Hello World!"
+        @inferred concat_with_world("Hello World!")
+
+        return nothing
+    end
+
+    # Test with normal Base.Fix1 and Base.Fix2
+    test_fix1()
+    test_fix2()
+
+    # Now, repeat the Fix1 and Fix2 tests, but
+    # with a Fix lambda function used in their place
+    test_fix1((op, arg) -> Base.Fix{1}(op, arg))
+    test_fix2((op, arg) -> Base.Fix{2}(op, arg))
+
+    # Now, we do more complex tests of Fix:
+    let Fix=Base.Fix
+        @testset "Argument Fixation" begin
+            let f = (x, y, z) -> x + y * z
+                fixed_f1 = Fix{1}(f, 10)
+                @test fixed_f1(2, 3) == 10 + 2 * 3
+
+                fixed_f2 = Fix{2}(f, 5)
+                @test fixed_f2(1, 4) == 1 + 5 * 4
+
+                fixed_f3 = Fix{3}(f, 3)
+                @test fixed_f3(1, 2) == 1 + 2 * 3
+            end
+        end
+        @testset "Helpful errors" begin
+            let g = (x, y) -> x - y
+                # Test minimum N
+                fixed_g1 = Fix{1}(g, 100)
+                @test fixed_g1(40) == 100 - 40
+
+                # Test maximum N
+                fixed_g2 = Fix{2}(g, 100)
+                @test fixed_g2(150) == 150 - 100
+
+                # One over
+                fixed_g3 = Fix{3}(g, 100)
+                @test_throws ArgumentError("expected at least 2 arguments to `Fix{3}`, but got 1") fixed_g3(1)
+            end
+        end
+        @testset "Type Stability and Inference" begin
+            let h = (x, y) -> x / y
+                fixed_h = Fix{2}(h, 2.0)
+                @test @inferred(fixed_h(4.0)) == 2.0
+            end
+        end
+        @testset "Interaction with varargs" begin
+            vararg_f = (x, y, z...) -> x + 10 * y + sum(z; init=zero(x))
+            fixed_vararg_f = Fix{2}(vararg_f, 6)
+
+            # Can call with variable number of arguments:
+            @test fixed_vararg_f(1, 2, 3, 4) == 1 + 10 * 6 + sum((2, 3, 4))
+            @inferred fixed_vararg_f(1, 2, 3, 4)
+            @test fixed_vararg_f(5) == 5 + 10 * 6
+            @inferred fixed_vararg_f(5)
+        end
+        @testset "Errors should propagate normally" begin
+            error_f = (x, y) -> sin(x * y)
+            fixed_error_f = Fix{2}(error_f, Inf)
+            @test_throws DomainError fixed_error_f(10)
+        end
+        @testset "Chaining Fix together" begin
+            f1 = Fix{1}(*, "1")
+            f2 = Fix{1}(f1, "2")
+            f3 = Fix{1}(f2, "3")
+            @test f3() == "123"
+
+            g1 = Fix{2}(*, "1")
+            g2 = Fix{2}(g1, "2")
+            g3 = Fix{2}(g2, "3")
+            @test g3("") == "123"
+        end
+        @testset "Zero arguments" begin
+            f = Fix{1}(x -> x, 'a')
+            @test f() == 'a'
+        end
+        @testset "Dummy-proofing" begin
+            @test_throws ArgumentError("expected `N` in `Fix{N}` to be integer greater than 0, but got 0") Fix{0}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `0.5::Float64`") Fix{0.5}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `1::UInt64`") Fix{UInt64(1)}(>, 1)
+        end
+        @testset "Specialize to structs not in `Base`" begin
+            struct MyStruct
+                x::Int
+            end
+            f = Fix{1}(MyStruct, 1)
+            @test f isa Fix{1,Type{MyStruct},Int}
+        end
+    end
+end
diff --git a/test/gc.jl b/test/gc.jl
index d5990582cc00a..f924f4952cfb0 100644
--- a/test/gc.jl
+++ b/test/gc.jl
@@ -5,11 +5,13 @@ using Test
 function run_gctest(file)
     let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file`
         @testset for test_nthreads in (1, 2, 4)
-            @testset for concurrent_sweep in (0, 1)
-                new_env = copy(ENV)
-                new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
-                new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
-                @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+            @testset for test_nithreads in (0, 1)
+                @testset for concurrent_sweep in (0, 1)
+                    new_env = copy(ENV)
+                    new_env["JULIA_NUM_THREADS"] = "$test_nthreads,$test_nithreads"
+                    new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
+                    @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+                end
             end
         end
     end
@@ -22,6 +24,31 @@ function run_nonzero_page_utilization_test()
     @test any(page_utilization .> 0)
 end
 
+function run_pg_size_test()
+    page_size = @ccall jl_get_pg_size()::UInt64
+    # supported page sizes: 4KB and 16KB
+    @test page_size == (1 << 12) || page_size == (1 << 14)
+end
+
+function issue_54275_alloc_string()
+    String(UInt8['a' for i in 1:10000000])
+end
+
+function issue_54275_test()
+    GC.gc(true)
+    baseline = Base.gc_live_bytes()
+    live_bytes_has_grown_too_much = false
+    for _ in 1:10
+        issue_54275_alloc_string()
+        GC.gc(true)
+        if Base.gc_live_bytes() - baseline > 1_000_000
+            live_bytes_has_grown_too_much = true
+            break
+        end
+    end
+    @test !live_bytes_has_grown_too_much
+end
+
 # !!! note:
 #     Since we run our tests on 32bit OS as well we confine ourselves
 #     to parameters that allocate about 512MB of objects. Max RSS is lower
@@ -31,7 +58,15 @@ end
     run_gctest("gc/linkedlist.jl")
     run_gctest("gc/objarray.jl")
     run_gctest("gc/chunks.jl")
+end
+
+@testset "GC page metrics" begin
     run_nonzero_page_utilization_test()
+    run_pg_size_test()
+end
+
+@testset "issue-54275" begin
+    issue_54275_test()
 end
 
 @testset "Base.GC docstrings" begin
diff --git a/test/gc/linkedlist.jl b/test/gc/linkedlist.jl
index 669e5f8ec21d9..3eb1480417e50 100644
--- a/test/gc/linkedlist.jl
+++ b/test/gc/linkedlist.jl
@@ -1,11 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 mutable struct ListNode
-  key::Int64
-  next::ListNode
-  ListNode() = new()
-  ListNode(x)= new(x)
-  ListNode(x,y) = new(x,y);
+    key::Int64
+    next::ListNode
+    ListNode() = new()
+    ListNode(x)= new(x)
+    ListNode(x,y) = new(x,y);
 end
 
 function list(N=16*1024^2)
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index b155370dd6465..7f19d60fe31fb 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -43,7 +43,7 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
     @test mapf(f, Int[], Int[], Complex{Int}[]) == Union{}[]
 
     # In-place map
-    if inplace_mapf != nothing
+    if inplace_mapf !== nothing
         A = Float64[1:10...]
         inplace_mapf(x -> x*x, A, A)
         @test A == map(x -> x*x, Float64[1:10...])
diff --git a/test/interpreter.jl b/test/interpreter.jl
index 0fea42e0aecdb..e25b5f0c8511a 100644
--- a/test/interpreter.jl
+++ b/test/interpreter.jl
@@ -30,3 +30,9 @@ let p = Pipe(),
     wait(proc)
     close(p)
 end
+
+# Test generated function behavior in interpreter
+@test success(pipeline(`$(Base.julia_cmd()) --compile=min -E 'include("staged.jl")'`; stderr))
+
+# Test contextual execution mechanism in interpreter (#54360)
+@test success(pipeline(`$(Base.julia_cmd()) --compile=min -E 'include("compiler/contextual.jl")'`; stderr))
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index ed661b2806fb5..deb1dd10681e8 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -4,40 +4,44 @@ using Random
 
 is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args...))
 
+⟷(a::T, b::T) where T <: Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128} = a === b
+⟷(a::T, b::T) where T <: BigInt = a == b
+
 @testset "gcd/lcm" begin
     # All Integer data types take different code paths -- test all
-    # TODO: Test gcd and lcm for BigInt.
-    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128)
-        @test gcd(T(3)) === T(3)
-        @test gcd(T(3), T(5)) === T(1)
-        @test gcd(T(3), T(15)) === T(3)
-        @test gcd(T(0), T(15)) === T(15)
-        @test gcd(T(15), T(0)) === T(15)
+    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt)
+        @test gcd(T(3)) ⟷ T(3)
+        @test gcd(T(3), T(5)) ⟷ T(1)
+        @test gcd(T(3), T(15)) ⟷ T(3)
+        @test gcd(T(0), T(15)) ⟷ T(15)
+        @test gcd(T(15), T(0)) ⟷ T(15)
         if T <: Signed
-            @test gcd(T(-12)) === T(12)
-            @test gcd(T(0), T(-15)) === T(15)
-            @test gcd(T(-15), T(0)) === T(15)
-            @test gcd(T(3), T(-15)) === T(3)
-            @test gcd(T(-3), T(-15)) === T(3)
+            @test gcd(T(-12)) ⟷ T(12)
+            @test gcd(T(0), T(-15)) ⟷ T(15)
+            @test gcd(T(-15), T(0)) ⟷ T(15)
+            @test gcd(T(3), T(-15)) ⟷ T(3)
+            @test gcd(T(-3), T(-15)) ⟷ T(3)
         end
-        @test gcd(T(0), T(0)) === T(0)
+        @test gcd(T(0), T(0)) ⟷ T(0)
 
-        @test gcd(T(2), T(4), T(6)) === T(2)
+        @test gcd(T(2), T(4), T(6)) ⟷ T(2)
         if T <: Signed
-            @test gcd(T(2), T(4), T(-6)) === T(2)
-            @test gcd(T(2), T(-4), T(-6)) === T(2)
-            @test gcd(T(-2), T(4), T(-6)) === T(2)
-            @test gcd(T(-2), T(-4), T(-6)) === T(2)
+            @test gcd(T(2), T(4), T(-6)) ⟷ T(2)
+            @test gcd(T(2), T(-4), T(-6)) ⟷ T(2)
+            @test gcd(T(-2), T(4), T(-6)) ⟷ T(2)
+            @test gcd(T(-2), T(-4), T(-6)) ⟷ T(2)
         end
 
-        @test gcd(typemax(T), T(1)) === T(1)
-        @test gcd(T(1), typemax(T)) === T(1)
-        @test gcd(typemax(T), T(0)) === typemax(T)
-        @test gcd(T(0), typemax(T)) === typemax(T)
-        @test gcd(typemax(T), typemax(T)) === typemax(T)
-        @test gcd(typemax(T), typemax(T)-T(1)) === T(1)     # gcd(n, n-1) = 1. n and n-1 are always coprime.
+        if T != BigInt
+            @test gcd(typemax(T), T(1)) === T(1)
+            @test gcd(T(1), typemax(T)) === T(1)
+            @test gcd(typemax(T), T(0)) === typemax(T)
+            @test gcd(T(0), typemax(T)) === typemax(T)
+            @test gcd(typemax(T), typemax(T)) === typemax(T)
+            @test gcd(typemax(T), typemax(T)-T(1)) === T(1)     # gcd(n, n-1) = 1. n and n-1 are always coprime.
+        end
 
-        if T <: Signed
+        if T <: Signed && T != BigInt
             @test gcd(-typemax(T), T(1)) === T(1)
             @test gcd(T(1), -typemax(T)) === T(1)
             @test gcd(-typemax(T), T(0)) === typemax(T)
@@ -52,7 +56,7 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
             @test_throws OverflowError gcd(typemin(T), typemin(T))
             @test_throws OverflowError gcd(typemin(T), T(0))
             @test_throws OverflowError gcd(T(0), typemin(T))
-        else
+        elseif T != BigInt
             # For Unsigned Integer types, -typemax(T) == 1.
             @test gcd(-typemax(T), T(1)) === T(1)
             @test gcd(T(1), -typemax(T)) === T(1)
@@ -71,83 +75,86 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
             @test gcd(T(0), typemin(T)) === T(0)
         end
 
-        @test lcm(T(0)) === T(0)
-        @test lcm(T(2)) === T(2)
-        @test lcm(T(2), T(3)) === T(6)
-        @test lcm(T(3), T(2)) === T(6)
-        @test lcm(T(4), T(6)) === T(12)
-        @test lcm(T(6), T(4)) === T(12)
-        @test lcm(T(3), T(0)) === T(0)
-        @test lcm(T(0), T(3)) === T(0)
-        @test lcm(T(0), T(0)) === T(0)
+        @test lcm(T(0)) ⟷ T(0)
+        @test lcm(T(2)) ⟷ T(2)
+        @test lcm(T(2), T(3)) ⟷ T(6)
+        @test lcm(T(3), T(2)) ⟷ T(6)
+        @test lcm(T(4), T(6)) ⟷ T(12)
+        @test lcm(T(6), T(4)) ⟷ T(12)
+        @test lcm(T(3), T(0)) ⟷ T(0)
+        @test lcm(T(0), T(3)) ⟷ T(0)
+        @test lcm(T(0), T(0)) ⟷ T(0)
         if T <: Signed
-            @test lcm(T(-12)) === T(12)
-            @test lcm(T(0), T(-4)) === T(0)
-            @test lcm(T(-4), T(0)) === T(0)
-            @test lcm(T(4), T(-6)) === T(12)
-            @test lcm(T(-4), T(-6)) === T(12)
+            @test lcm(T(-12)) ⟷ T(12)
+            @test lcm(T(0), T(-4)) ⟷ T(0)
+            @test lcm(T(-4), T(0)) ⟷ T(0)
+            @test lcm(T(4), T(-6)) ⟷ T(12)
+            @test lcm(T(-4), T(-6)) ⟷ T(12)
         end
 
-        @test lcm(T(2), T(4), T(6)) === T(12)
-        @test lcm(T(2), T(4), T(0)) === T(0)
+        @test lcm(T(2), T(4), T(6)) ⟷ T(12)
+        @test lcm(T(2), T(4), T(0)) ⟷ T(0)
         if T <: Signed
-            @test lcm(T(2), T(4), T(-6)) === T(12)
-            @test lcm(T(2), T(-4), T(-6)) === T(12)
-            @test lcm(T(-2), T(-4), T(-6)) === T(12)
-            @test lcm(T(-2), T(0), T(-6)) === T(0)
+            @test lcm(T(2), T(4), T(-6)) ⟷ T(12)
+            @test lcm(T(2), T(-4), T(-6)) ⟷ T(12)
+            @test lcm(T(-2), T(-4), T(-6)) ⟷ T(12)
+            @test lcm(T(-2), T(0), T(-6)) ⟷ T(0)
         end
 
-        @test lcm(typemax(T), T(1)) === typemax(T)
-        @test lcm(T(1), typemax(T)) === typemax(T)
-        @test lcm(typemax(T), T(0)) === T(0)
-        @test lcm(T(0), typemax(T)) === T(0)
-        @test lcm(typemax(T), typemax(T)) === typemax(T)
-        @test_throws OverflowError lcm(typemax(T), typemax(T)-T(1)) # lcm(n, n-1) = n*(n-1). Since n and n-1 are always coprime.
-        @test_throws OverflowError lcm(typemax(T), T(2))
-
-        let x = isqrt(typemax(T))+T(1) # smallest number x such that x^2 > typemax(T)
-            @test lcm(x, x) === x
-            @test_throws OverflowError lcm(x, x+T(1))   # lcm(n, n+1) = n*(n+1). Since n and n+1 are always coprime.
-        end
-
-        if T <: Signed
-            @test lcm(-typemax(T), T(1)) === typemax(T)
-            @test lcm(T(1), -typemax(T)) === typemax(T)
-            @test lcm(-typemax(T), T(0)) === T(0)
-            @test lcm(T(0), -typemax(T)) === T(0)
-            @test lcm(-typemax(T), -typemax(T)) === typemax(T)
-            @test lcm(typemax(T), -typemax(T)) === typemax(T)
-            @test lcm(-typemax(T), typemax(T)) === typemax(T)
-
-            @test_throws OverflowError lcm(typemin(T), T(1))
-            @test_throws OverflowError lcm(T(1), typemin(T))
-            @test lcm(typemin(T), T(0)) === T(0)
-            @test lcm(T(0), typemin(T)) === T(0)
-            @test_throws OverflowError lcm(typemin(T), typemin(T)+T(1)) # lcm(n, n+1) = n*(n+1).
-            @test_throws OverflowError lcm(typemin(T), typemin(T))
-        else
-            # For Unsigned Integer types, -typemax(T) == 1.
-            @test lcm(-typemax(T), T(1)) === T(1)
-            @test lcm(T(1), -typemax(T)) === T(1)
-            @test lcm(-typemax(T), T(0)) === T(0)
-            @test lcm(T(0), -typemax(T)) === T(0)
-            @test lcm(-typemax(T), -typemax(T)) === T(1)
-            @test lcm(-typemax(T), typemax(T)) === typemax(T)
-            @test lcm(typemax(T), -typemax(T)) === typemax(T)
+        if T != BigInt
+            @test lcm(typemax(T), T(1)) === typemax(T)
+            @test lcm(T(1), typemax(T)) === typemax(T)
+            @test lcm(typemax(T), T(0)) === T(0)
+            @test lcm(T(0), typemax(T)) === T(0)
+            @test lcm(typemax(T), typemax(T)) === typemax(T)
+            @test_throws OverflowError lcm(typemax(T), typemax(T)-T(1)) # lcm(n, n-1) = n*(n-1). Since n and n-1 are always coprime.
+            @test_throws OverflowError lcm(typemax(T), T(2))
+
+            let x = isqrt(typemax(T))+T(1) # smallest number x such that x^2 > typemax(T)
+                @test lcm(x, x) === x
+                @test_throws OverflowError lcm(x, x+T(1))   # lcm(n, n+1) = n*(n+1). Since n and n+1 are always coprime.
+            end
 
-            # For Unsigned Integer types, typemin(T) == 0.
-            @test lcm(typemin(T), T(1)) === lcm(T(0), T(1)) === T(0)
-            @test lcm(T(1), typemin(T)) === T(0)
-            @test lcm(typemin(T), T(0)) === T(0)
-            @test lcm(T(0), typemin(T)) === T(0)
-            @test lcm(typemin(T), typemin(T)) === T(0)
-            @test lcm(typemin(T), typemin(T)+T(1)) === T(0)
+            if T <: Signed
+                @test lcm(-typemax(T), T(1)) === typemax(T)
+                @test lcm(T(1), -typemax(T)) === typemax(T)
+                @test lcm(-typemax(T), T(0)) === T(0)
+                @test lcm(T(0), -typemax(T)) === T(0)
+                @test lcm(-typemax(T), -typemax(T)) === typemax(T)
+                @test lcm(typemax(T), -typemax(T)) === typemax(T)
+                @test lcm(-typemax(T), typemax(T)) === typemax(T)
+
+                @test_throws OverflowError lcm(typemin(T), T(1))
+                @test_throws OverflowError lcm(T(1), typemin(T))
+                @test lcm(typemin(T), T(0)) === T(0)
+                @test lcm(T(0), typemin(T)) === T(0)
+                @test_throws OverflowError lcm(typemin(T), typemin(T)+T(1)) # lcm(n, n+1) = n*(n+1).
+                @test_throws OverflowError lcm(typemin(T), typemin(T))
+            else
+                # For Unsigned Integer types, -typemax(T) == 1.
+                @test lcm(-typemax(T), T(1)) === T(1)
+                @test lcm(T(1), -typemax(T)) === T(1)
+                @test lcm(-typemax(T), T(0)) === T(0)
+                @test lcm(T(0), -typemax(T)) === T(0)
+                @test lcm(-typemax(T), -typemax(T)) === T(1)
+                @test lcm(-typemax(T), typemax(T)) === typemax(T)
+                @test lcm(typemax(T), -typemax(T)) === typemax(T)
+
+                # For Unsigned Integer types, typemin(T) == 0.
+                @test lcm(typemin(T), T(1)) === lcm(T(0), T(1)) === T(0)
+                @test lcm(T(1), typemin(T)) === T(0)
+                @test lcm(typemin(T), T(0)) === T(0)
+                @test lcm(T(0), typemin(T)) === T(0)
+                @test lcm(typemin(T), typemin(T)) === T(0)
+                @test lcm(typemin(T), typemin(T)+T(1)) === T(0)
+            end
         end
     end
     @test lcm(0x5, 3) == 15
     @test gcd(0xf, 20) == 5
     @test gcd(UInt32(6), Int8(-50)) == 2
     @test gcd(typemax(UInt), -16) == 1
+    @test gcd(typemax(UInt), BigInt(1236189723689716298376189726398761298361892)) == 1
 
     @testset "effects" begin
         @test is_effect_free(gcd, Tuple{Int,Int})
@@ -156,45 +163,48 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
 end
 
 @testset "gcd/lcm for arrays" begin
-    # TODO: Test gcd and lcm for BigInt arrays.
-    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128)
-        @test gcd(T[]) === T(0)
-        @test gcd(T[3, 5]) === T(1)
-        @test gcd(T[3, 15]) === T(3)
-        @test gcd(T[0, 15]) === T(15)
+    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt)
+        @test gcd(T[]) ⟷ T(0)
+        @test gcd(T[3, 5]) ⟷ T(1)
+        @test gcd(T[3, 15]) ⟷ T(3)
+        @test gcd(T[0, 15]) ⟷ T(15)
         if T <: Signed
-            @test gcd(T[-12]) === T(12)
-            @test gcd(T[3,-15]) === T(3)
-            @test gcd(T[-3,-15]) === T(3)
+            @test gcd(T[-12]) ⟷ T(12)
+            @test gcd(T[3,-15]) ⟷ T(3)
+            @test gcd(T[-3,-15]) ⟷ T(3)
         end
-        @test gcd(T[0, 0]) === T(0)
+        @test gcd(T[0, 0]) ⟷ T(0)
 
-        @test gcd(T[2, 4, 6]) === T(2)
-        @test gcd(T[2, 4, 3, 5]) === T(1)
+        @test gcd(T[2, 4, 6]) ⟷ T(2)
+        @test gcd(T[2, 4, 3, 5]) ⟷ T(1)
 
-        @test lcm(T[]) === T(1)
-        @test lcm(T[2, 3]) === T(6)
-        @test lcm(T[4, 6]) === T(12)
-        @test lcm(T[3, 0]) === T(0)
-        @test lcm(T[0, 0]) === T(0)
+        @test lcm(T[]) ⟷ T(1)
+        @test lcm(T[2, 3]) ⟷ T(6)
+        @test lcm(T[4, 6]) ⟷ T(12)
+        @test lcm(T[3, 0]) ⟷ T(0)
+        @test lcm(T[0, 0]) ⟷ T(0)
         if T <: Signed
-            @test lcm(T[-2]) === T(2)
-            @test lcm(T[4, -6]) === T(12)
-            @test lcm(T[-4, -6]) === T(12)
+            @test lcm(T[-2]) ⟷ T(2)
+            @test lcm(T[4, -6]) ⟷ T(12)
+            @test lcm(T[-4, -6]) ⟷ T(12)
         end
 
-        @test lcm(T[2, 4, 6]) === T(12)
+        @test lcm(T[2, 4, 6]) ⟷ T(12)
     end
 end
 
+⟷(a::Tuple{T, T, T}, b::Tuple{T, T, T}) where T <: Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128} = a === b
+⟷(a::Tuple{T, T, T}, b::Tuple{T, T, T}) where T <: BigInt = a == b
 @testset "gcdx" begin
-    # TODO: Test gcdx for BigInt.
-    for T in (Int8, Int16, Int32, Int64, Int128)
-        @test gcdx(T(5), T(12)) === (T(1), T(5), T(-2))
-        @test gcdx(T(5), T(-12)) === (T(1), T(5), T(2))
-        @test gcdx(T(-5), T(12)) === (T(1), T(-5), T(-2))
-        @test gcdx(T(-5), T(-12)) === (T(1), T(-5), T(2))
-        @test gcdx(T(-25), T(-4)) === (T(1), T(-1), T(6))
+    for T in (Int8, Int16, Int32, Int64, Int128, BigInt)
+        @test gcdx(T(5), T(12)) ⟷ (T(1), T(5), T(-2))
+        @test gcdx(T(5), T(-12)) ⟷ (T(1), T(5), T(2))
+        @test gcdx(T(-5), T(12)) ⟷ (T(1), T(-5), T(-2))
+        @test gcdx(T(-5), T(-12)) ⟷ (T(1), T(-5), T(2))
+        @test gcdx(T(-25), T(-4)) ⟷ (T(1), T(-1), T(6))
+        @test gcdx(T(0), T(0)) ⟷ (T(0), T(0), T(0))
+        @test gcdx(T(8), T(0)) ⟷ (T(8), T(1), T(0))
+        @test gcdx(T(0), T(-8)) ⟷ (T(8), T(0), T(-1))
     end
     x, y = Int8(-12), UInt(100)
     d, u, v = gcdx(x, y)
@@ -583,6 +593,10 @@ end
             x>=0 && @test binomial(x,x-T(2)) == div(x*(x-1), 2)
         end
         @test @inferred(binomial(one(T),one(T))) isa T
+
+        # Arguments of different Integer types do not lead to computation of
+        # generalized binomial coefficient (issue #54296)
+        @test @inferred(binomial(Int64(5), T(2))) === Int64(10)
     end
     for x in ((false,false), (false,true), (true,false), (true,true))
         @test binomial(x...) == (x != (false,true))
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 8e4ab932f5eb6..7a63cd1c0a62e 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -197,8 +197,8 @@ for order in (:not_atomic, :monotonic, :acquire, :release, :acquire_release, :se
     @test (order -> Core.Intrinsics.atomic_fence(order))(order) === nothing
     @test Base.invokelatest(@eval () -> Core.Intrinsics.atomic_fence($(QuoteNode(order)))) === nothing
 end
-@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
-@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) == nothing
+@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) === nothing
+@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) === nothing
 
 primitive type Int256 <: Signed 256 end
 Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
@@ -220,7 +220,7 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(10), S(3), :sequentially_consistent, :sequentially_consistent)
             end
             @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
-            if sizeof(r) > 8
+            if sizeof(r) > 2*sizeof(Int)
                 @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") unsafe_load(p, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") unsafe_store!(p, T(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") unsafe_swap!(p, T(100), :sequentially_consistent)
@@ -345,3 +345,16 @@ Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")")
         @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
     end
 end)()
+
+@testset "issue #54548" begin
+    @inline passthrough(ptr::Core.LLVMPtr{T,A}) where {T,A} = Base.llvmcall(("""
+            define ptr addrspace(1) @entry(ptr addrspace(1) %0) #0 {
+            entry:
+                ret ptr addrspace(1) %0
+            }
+
+            attributes #0 = { alwaysinline }""", "entry"),
+        Core.LLVMPtr{T,A}, Tuple{Core.LLVMPtr{T,A}}, ptr)
+    f(gws) = passthrough(Core.bitcast(Core.LLVMPtr{UInt32,1}, gws))
+    f(C_NULL)
+end
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index 6151f90f297ee..b5b34a2dbed8c 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -196,6 +196,31 @@ end
     @test position(skip(io, -3)) == 0
 end
 
+@testset "issue #53908" begin
+    @testset "offset $first" for first in (false, true)
+        b = collect(0x01:0x05)
+        sizehint!(b, 100; first) # make offset non zero
+        io = IOBuffer(b)
+        @test position(skip(io, 4)) == 4
+        @test position(skip(io, typemax(Int))) == 5
+        @test position(skip(io, typemax(Int128))) == 5
+        @test position(skip(io, typemax(Int32))) == 5
+        @test position(skip(io, typemin(Int))) == 0
+        @test position(skip(io, typemin(Int128))) == 0
+        @test position(skip(io, typemin(Int32))) == 0
+        @test position(skip(io, 4)) == 4
+        @test position(skip(io, -2)) == 2
+        @test position(skip(io, -2)) == 0
+        @test position(seek(io, -2)) == 0
+        @test position(seek(io, typemax(Int))) == 5
+        @test position(seek(io, typemax(Int128))) == 5
+        @test position(seek(io, typemax(Int32))) == 5
+        @test position(seek(io, typemin(Int))) == 0
+        @test position(seek(io, typemin(Int128))) == 0
+        @test position(seek(io, typemin(Int32))) == 0
+    end
+end
+
 @testset "pr #11554" begin
     io  = IOBuffer(SubString("***αhelloworldω***", 4, 16))
     io2 = IOBuffer(Vector{UInt8}(b"goodnightmoon"), read=true, write=true)
@@ -252,6 +277,7 @@ end
     c = zeros(UInt8,8)
     @test bytesavailable(bstream) == 8
     @test !eof(bstream)
+    @test Base.reseteof(bstream) === nothing # TODO: Actually test intended effect
     read!(bstream,c)
     @test c == a[3:10]
     @test closewrite(bstream) === nothing
@@ -325,7 +351,7 @@ end
     a = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
     mark(a) # mark at position 0
     write(a, "Hello!")
-    @test Base.compact(a) == nothing # because pointer > mark
+    @test Base.compact(a) === nothing # because pointer > mark
     close(a)
     b = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
     write(b, "Hello!")
@@ -358,3 +384,8 @@ end
     seek(io,0)
     @test Base.read_sub(io,v,1,1) == [1,0]
 end
+
+@testset "with offset" begin
+    b = pushfirst!([0x02], 0x01)
+    @test take!(IOBuffer(b)) == [0x01, 0x02]
+end
diff --git a/test/iostream.jl b/test/iostream.jl
index 4ba2423f0f558..13d01e61bbf8c 100644
--- a/test/iostream.jl
+++ b/test/iostream.jl
@@ -190,3 +190,7 @@ end
     @test all(T -> T <: Union{UInt, Int}, Base.return_types(unsafe_write, (IO, Ptr{UInt8}, UInt)))
     @test all(T -> T === Bool, Base.return_types(eof, (IO,)))
 end
+
+@testset "fd" begin
+    @test open(fd, tempname(), "w") isa RawFD
+end
diff --git a/test/iterators.jl b/test/iterators.jl
index 3616a17b31d31..0df4d9afd371a 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -499,7 +499,7 @@ end
 @test Base.IteratorSize(product(1:2, countfrom(1))) == Base.IsInfinite()
 
 @test Base.iterate(product()) == ((), true)
-@test Base.iterate(product(), 1) == nothing
+@test Base.iterate(product(), 1) === nothing
 
 # intersection
 @test intersect(product(1:3, 4:6), product(2:4, 3:5)) == Iterators.ProductIterator((2:3, 4:5))
@@ -978,13 +978,6 @@ end
     @test accumulate(+, (x^2 for x in 1:3); init=100) == [101, 105, 114]
 end
 
-
-@testset "Iterators.tail_if_any" begin
-    @test Iterators.tail_if_any(()) == ()
-    @test Iterators.tail_if_any((1, 2)) == (2,)
-    @test Iterators.tail_if_any((1,)) == ()
-end
-
 @testset "IteratorSize trait for zip" begin
     @test Base.IteratorSize(zip()) == Base.IsInfinite()                     # for zip of empty tuple
     @test Base.IteratorSize(zip((1,2,3), repeated(0))) == Base.HasLength()  # for zip of ::HasLength and ::IsInfinite
@@ -1000,7 +993,7 @@ end
 end
 
 @testset "Iterators.peel" begin
-    @test Iterators.peel([]) == nothing
+    @test Iterators.peel([]) === nothing
     @test Iterators.peel(1:10)[1] == 1
     @test Iterators.peel(1:10)[2] |> collect == 2:10
     @test Iterators.peel(x^2 for x in 2:4)[1] == 4
@@ -1032,17 +1025,50 @@ end
     @test collect(Iterators.partition(lstrip("01111", '0'), 2)) == ["11", "11"]
 end
 
+@testset "IterableStringPairs" begin
+    for s in ["", "a", "abcde", "γ", "∋γa"]
+        for T in (String, SubString, GenericString)
+            sT = T(s)
+            p = pairs(sT)
+            @test collect(p) == [k=>v for (k,v) in zip(keys(sT), sT)]
+            rv = Iterators.reverse(p)
+            @test collect(rv) == reverse([k=>v for (k,v) in zip(keys(sT), sT)])
+            rrv = Iterators.reverse(rv)
+            @test collect(rrv) == collect(p)
+        end
+    end
+end
+
 let itr = (i for i in 1:9) # Base.eltype == Any
     @test first(Iterators.partition(itr, 3)) isa Vector{Any}
     @test collect(zip(repeat([Iterators.Stateful(itr)], 3)...)) == [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
 end
 
-@testset "no single-argument map methods" begin
-    maps = (tuple, Returns(nothing), (() -> nothing))
-    mappers = (Iterators.map, map, foreach)
-    for f ∈ maps, m ∈ mappers
-        @test !applicable(m, f)
-        @test !hasmethod(m, Tuple{typeof(f)})
+@testset "map/reduce/mapreduce without an iterator argument" begin
+    maps = map(Returns, (nothing, 3, 3:2, 3:3, (), (3,)))
+    mappers1 = (Iterators.map, map, foreach, reduce, foldl, foldr)
+    mappers2 = (mapreduce, mapfoldl, mapfoldr)
+
+    @testset "map/reduce" begin
+        @testset "r: $r" for r ∈ mappers1
+            @testset "f: $f" for f ∈ maps
+                @test_throws MethodError r(f)
+                @test !applicable(r, f)
+                @test !hasmethod(r, Tuple{typeof(f)})
+            end
+        end
+    end
+
+    @testset "mapreduce" begin
+        @testset "mr: $mr" for mr ∈ mappers2
+            @testset "f: $f" for f ∈ maps
+                @testset "g: $g" for g ∈ maps
+                    @test_throws MethodError mr(f, g)
+                    @test !applicable(mr, f, g)
+                    @test !hasmethod(mr, Tuple{typeof(f),typeof(g)})
+                end
+            end
+        end
     end
 end
 
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index 98968bfcdf8bc..c83ac05b1ec48 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -70,13 +70,13 @@ end
        ret i32 %3""", Int32, Tuple{Int32, Int32},
         Int32(1), Int32(2))) # llvmcall must be compiled to be called
 
-# Test whether declarations work properly
+#Since LLVM 18, LLVM does a best effort to automatically include the intrinsics
 function undeclared_ceil(x::Float64)
     llvmcall("""%2 = call double @llvm.ceil.f64(double %0)
         ret double %2""", Float64, Tuple{Float64}, x)
 end
-@test_throws ErrorException undeclared_ceil(4.2)
-@test_throws ErrorException undeclared_ceil(4.2)
+@test undeclared_ceil(4.2) == 5.0
+@test undeclared_ceil(4.2) == 5.0
 
 function declared_floor(x::Float64)
     llvmcall(
diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile
index 7318d1b67da02..d9fdfa190f3cf 100644
--- a/test/llvmpasses/Makefile
+++ b/test/llvmpasses/Makefile
@@ -30,4 +30,7 @@ update-help:
 	$(JULIAHOME)/deps/srccache/llvm/llvm/utils/update_test_checks.py \
 	--help
 
-.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all .
+clean:
+	rm -rf .lit_test_times.txt Output
+
+.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all clean update-help .
diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll
new file mode 100644
index 0000000000000..e19093f46f815
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-bits.ll
@@ -0,0 +1,37 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s 
+
+
+@tag = external addrspace(10) global {}
+
+@glob = external addrspace(10) global {}
+
+; Test that the gc_preserve intrinsics are deleted directly.
+
+; CHECK-LABEL: @ptr_and_bits
+; CHECK-NOT: alloca 
+; CHECK: call noalias ptr addrspace(10) @julia.gc_alloc_obj
+
+define void @ptr_and_bits(ptr %fptr, i1 %b, i1 %b2, i32 %idx) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 16, ptr addrspace(10) @tag)
+  
+  %g0 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 1
+  store ptr addrspace(10) @glob, ptr addrspace(10) %g0
+  
+  %g1 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 0
+  store i64 7, ptr addrspace(10) %g1
+
+  %res = load ptr addrspace(10), ptr addrspace(10) %g0
+  %res2 = load i64, ptr addrspace(10) %g1
+  ret void
+}
+
+declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
index 9f9dc7056152a..b96c9385e38eb 100644
--- a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
+++ b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -17,12 +15,6 @@ declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 ; Test that non-0 addrspace allocas are properly emitted and handled
 
 ; CHECK-LABEL: @non_zero_addrspace
-; TYPED: %1 = alloca i32, align 8, addrspace(5)
-
-; TYPED: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)*
-; TYPED: %var1 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)*
-; TYPED: %3 = addrspacecast {} addrspace(5)* %var1 to {}*
-; TYPED: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2)
 
 ; OPAQUE: %var1 = alloca i32, align 8, addrspace(5)
 ; OPAQUE: %1 = addrspacecast ptr addrspace(5) %var1 to ptr
diff --git a/test/llvmpasses/alloc-opt-gcframe.ll b/test/llvmpasses/alloc-opt-gcframe.ll
index e01bd900c71e7..f53a4d5c01df7 100644
--- a/test/llvmpasses/alloc-opt-gcframe.ll
+++ b/test/llvmpasses/alloc-opt-gcframe.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -11,18 +9,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-LABEL: @return_obj
 ; CHECK-NOT: @julia.gc_alloc_obj
 
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-; TYPED: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
-
 ; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12
-; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
 ; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc_instrumented(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
+; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 
 define {} addrspace(10)* @return_obj() {
@@ -37,8 +27,7 @@ define {} addrspace(10)* @return_obj() {
 ; CHECK-LABEL: @return_load
 ; CHECK: alloca i64
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; CHECK-NOT: @tag
 ; CHECK-NOT: @llvm.lifetime.end
@@ -57,11 +46,9 @@ define i64 @return_load(i64 %i) {
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @ccall_obj
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_obj(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -76,17 +63,12 @@ define void @ccall_obj(i8* %fptr) {
 
 ; CHECK-LABEL: @ccall_ptr
 ; CHECK: alloca i64
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-; TYPED: %f = bitcast i8* %fptr to void (i8*)*
-
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; Currently the GC frame lowering pass strips away all operand bundles
-; TYPED-NEXT: call void %f(i8*
 ; OPAQUE-NEXT: call void %f(ptr
 ; CHECK-NEXT: ret void
 define void @ccall_ptr(i8* %fptr) {
@@ -104,11 +86,9 @@ define void @ccall_ptr(i8* %fptr) {
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @ccall_unknown_bundle
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_unknown_bundle(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -126,13 +106,10 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 
 ; CHECK-LABEL: @lifetime_branches
 ; CHECK: alloca i64
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK: L1:
 ; CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
 
-; TYPED: %f = bitcast i8* %fptr to void (i8*)*
-; TYPED-NEXT: call void %f(i8*
 
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; OPAQUE-NEXT: call void %f(ptr
@@ -140,10 +117,8 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 ; CHECK-NEXT: br i1 %b2, label %L2, label %L3
 
 ; CHECK: L2:
-; TYPED-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
 ; OPAQUE-NEXT: %f2 = bitcast ptr %fptr to ptr
 ; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-; TYPED-NEXT: call void %f2({}* null)
 ; OPAQUE-NEXT: call void %f2(ptr null)
 
 ; CHECK: L3:
@@ -174,10 +149,9 @@ L3:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @object_field
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
 define void @object_field({} addrspace(10)* %field) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -193,11 +167,9 @@ define void @object_field({} addrspace(10)* %field) {
 
 ; CHECK-LABEL: @memcpy_opt
 ; CHECK: alloca [16 x i8], align 16
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.memcpy.p0.p0.i64
 define void @memcpy_opt(i8* %v22) {
 top:
@@ -213,10 +185,9 @@ top:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_opt
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: @llvm.lifetime.end
 ; CHECK: @external_function
 define void @preserve_opt(i8* %v22) {
@@ -236,7 +207,6 @@ top:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_branches
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK: L1:
 ; CHECK-NEXT: @external_function()
@@ -268,11 +238,8 @@ L3:
 }
 ; CHECK-LABEL: }{{$}}
 
-; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*,
-; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*,
-
-; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc_instrumented(ptr,
-; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc_instrumented(ptr,
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_small_alloc(ptr,
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc(ptr,
 declare void @external_function()
 declare {}*** @julia.get_pgcstack()
 declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll
index 6bee0fd325105..b962157120456 100644
--- a/test/llvmpasses/alloc-opt-pass.ll
+++ b/test/llvmpasses/alloc-opt-pass.ll
@@ -1,15 +1,12 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}
 
 ; Test that the gc_preserve intrinsics are deleted directly.
 
 ; CHECK-LABEL: @preserve_branches
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
 ; CHECK: L1:
 ; CHECK-NOT: @llvm.julia.gc_preserve_begin
@@ -21,32 +18,30 @@
 ; CHECK-NEXT: br label %L3
 
 ; CHECK: L3:
-define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
+define void @preserve_branches(ptr %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
   br i1 %b, label %L1, label %L3
 
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %v)
+L1:                                               ; preds = %0
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) nonnull %v)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
 
-L2:
+L2:                                               ; preds = %L1
   call void @external_function()
   br label %L3
 
-L3:
+L3:                                               ; preds = %L2, %L1, %0
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_branches2
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
 ; CHECK: L1:
-; TYPED-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
 ; OPAQUE-NEXT: @llvm.julia.gc_preserve_begin{{.*}}ptr addrspace(10) %v2
 ; CHECK-NEXT: @external_function()
 ; CHECK-NEXT: br i1 %b2, label %L2, label %L3
@@ -56,24 +51,24 @@ L3:
 ; CHECK-NEXT: br label %L3
 
 ; CHECK: L3:
-define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v2 = call {} addrspace(10)* @external_function2()
+define void @preserve_branches2(ptr %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v2 = call ptr addrspace(10) @external_function2()
   br i1 %b, label %L1, label %L3
 
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* nonnull %v2)
+L1:                                               ; preds = %0
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) %v, ptr addrspace(10) nonnull %v2)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
 
-L2:
+L2:                                               ; preds = %L1
   call void @external_function()
   br label %L3
 
-L3:
+L3:                                               ; preds = %L2, %L1, %0
   ret void
 }
 ; CHECK-LABEL: }{{$}}
@@ -84,60 +79,61 @@ L3:
 ; CHECK: store [12 x i8] zeroinitializer,
 ; CHECK: ret void
 define void @legal_int_types() {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 12, {} addrspace(10)* @tag)
-  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
-  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 12, ptr addrspace(10) @tag)
+  %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
+  %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
-
 declare void @external_function()
-declare {} addrspace(10)* @external_function2()
-declare {}*** @julia.ptls_states()
-declare {}*** @julia.get_pgcstack()
-declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
-declare {}* @julia.pointer_from_objref({} addrspace(11)*)
-declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+declare ptr addrspace(10) @external_function2()
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.get_pgcstack()
+
+declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
+
+declare ptr @julia.pointer_from_objref(ptr addrspace(11))
+
 declare token @llvm.julia.gc_preserve_begin(...)
+
 declare void @llvm.julia.gc_preserve_end(token)
 
 ; CHECK-LABEL: @memref_collision
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
-; TYPED-NOT: store {}
 ; OPAQUE-NOT: store ptr
 ; CHECK: store i
-; TYPED-NOT: store {}
 ; OPAQUE-NOT: store ptr
 ; CHECK: L1:
-; TYPED: load {}
 ; OPAQUE: load ptr
 ; CHECK: L2:
 ; CHECK: load i
 define void @memref_collision(i64 %x) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %v_p = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  store i64 %x, i64 addrspace(10)* %v_p
-  br i1 0, label %L1, label %L2
-
-L1:
-  %v1 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-  %v1_x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %v1
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %v_p = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  store i64 %x, ptr addrspace(10) %v_p, align 4
+  br i1 false, label %L1, label %L2
+
+L1:                                               ; preds = %0
+  %v1 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  %v1_x = load ptr addrspace(10), ptr addrspace(10) %v1, align 8
   ret void
 
-L2:
-  %v2 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  %v2_x = load i64, i64 addrspace(10)* %v2
+L2:                                               ; preds = %0
+  %v2 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  %v2_x = load i64, ptr addrspace(10) %v2, align 4
   ret void
 }
+
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @lifetime_no_preserve_end
@@ -146,19 +142,19 @@ L2:
 ; CHECK: call void @llvm.lifetime.start
 ; CHECK: store [8 x i8] zeroinitializer,
 ; CHECK-NOT: call void @llvm.lifetime.end
-define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret({}) %0) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
-  %v_derived = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %v_derived)
-  %ptr_raw = bitcast {}* %ptr to i8*
-  call void @external_function() ; safepoint
-  %ret_raw = bitcast {}* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %ret_raw, i8 * align 8 %ptr_raw, i64 0, i1 false)
-  %ret_raw2 = bitcast {}* %0 to i8*
+define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret({}) %0) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %token = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) %v)
+  %v_derived = addrspacecast ptr addrspace(10) %v to ptr addrspace(11)
+  %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %v_derived)
+  %ptr_raw = bitcast ptr %ptr to ptr
+  call void @external_function()
+  %ret_raw = bitcast ptr %0 to ptr
+  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %ret_raw, ptr align 8 %ptr_raw, i64 0, i1 false)
+  %ret_raw2 = bitcast ptr %0 to ptr
   ret void
 }
 ; CHECK-LABEL: }{{$}}
@@ -175,26 +171,49 @@ define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret
 ; CHECK-NOT: zeroinitializer
 ; CHECK: ret void
 define void @initializers() {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-
-  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 1, {} addrspace(10)* @tag) #0
-  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
-  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
-
-  %var4 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 2, {} addrspace(10)* @tag) #1
-  %var5 = addrspacecast {} addrspace(10)* %var4 to {} addrspace(11)*
-  %var6 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var5)
-
-  %var7 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 3, {} addrspace(10)* @tag) #2
-  %var8 = addrspacecast {} addrspace(10)* %var7 to {} addrspace(11)*
-  %var9 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var8)
-
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1
+  %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
+  %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
+  %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2
+  %var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
+  %var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
+  %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3
+  %var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
+  %var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
-attributes #0 = { allockind("alloc") }
-attributes #1 = { allockind("alloc,uninitialized") }
-attributes #2 = { allockind("alloc,zeroed") }
+; Test that the pass handles dead basic blocks with references to the allocation
+; CHECK-LABEL: @nopreds
+; CHECK: alloca i8, i64 0, align 1
+; CHECK: call void @llvm.lifetime.start
+define swiftcc { ptr addrspace(10), i8 } @nopreds() {
+top:
+  %0 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr null, i64 0, ptr addrspace(10) null)
+  %1 = addrspacecast ptr addrspace(10) %0 to ptr addrspace(11)
+  br label %common.ret
+
+common.ret:                                       ; preds = %union_move9, %top
+  ret { ptr addrspace(10), i8 } zeroinitializer
+
+union_move9:                                      ; No predecessors!
+  call void @llvm.memcpy.p0.p11.i64(ptr null, ptr addrspace(11) %1, i64 0, i1 false)
+  br label %common.ret
+}
+; CHECK-LABEL: }{{$}}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+attributes #1 = { allockind("alloc") }
+attributes #2 = { allockind("alloc,uninitialized") }
+attributes #3 = { allockind("alloc,zeroed") }
diff --git a/test/llvmpasses/alloc-opt-pipeline.jl b/test/llvmpasses/alloc-opt-pipeline.jl
index 9437913e4054b..e84348ec4a8c6 100644
--- a/test/llvmpasses/alloc-opt-pipeline.jl
+++ b/test/llvmpasses/alloc-opt-pipeline.jl
@@ -17,7 +17,7 @@ end
 
 # CHECK-LABEL: @julia_haszerolayout
 # CHECK: top:
-# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK-NOT: @jl_gc_small_alloc
 # CHECK: extractelement
 # CHECK: ret i8
 emit(haszerolayout, NTuple{32,VecElement{UInt8}})
diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll
index c3ea626c57f45..d435ab1490cfc 100644
--- a/test/llvmpasses/alloc-opt-unsized.ll
+++ b/test/llvmpasses/alloc-opt-unsized.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
 
 source_filename = "text"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
@@ -29,15 +27,6 @@ entry:
   ret void
 }
 
-; TYPED:   %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16
-; TYPED:   %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8*
-; TYPED:   %i18 = bitcast i8* %[[i1]] to {}*
-; TYPED:   %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)**
-; TYPED:   %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i
-; TYPED:   store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8
-; TYPED:   %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}*
-; TYPED:   %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8
-
 ; OPAQUE:   %[[i0:.+]] = alloca ptr addrspace(10), i64 1000, align 16
 ; OPAQUE:   %i23 = getelementptr inbounds ptr addrspace(10), ptr %i18, i64 %iv.i
 ; OPAQUE:   store ptr addrspace(10) %arg, ptr %i23, align 8
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
index 323f5e24015e9..073597fbcdc66 100644
--- a/test/llvmpasses/cpu-features.ll
+++ b/test/llvmpasses/cpu-features.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
 target triple = "x86_64-linux-gnu"
 
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
index dd0892be56a0b..3c4c1d491ec28 100644
--- a/test/llvmpasses/fastmath.jl
+++ b/test/llvmpasses/fastmath.jl
@@ -16,29 +16,3 @@ import Base.FastMath
 
 # CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32")
 emit(FastMath.sqrt_fast, Float32)
-
-
-# Float16 operations should be performed as Float32, unless @fastmath is specified
-# TODO: this is not true for platforms that natively support Float16
-
-foo(x::T,y::T) where T = x-y == zero(T)
-# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
-# CHECK-DAG: %[[XEXT:[0-9]+]] = fpext half %[[X]] to float
-# CHECK-DAG: %[[YEXT:[0-9]+]] = fpext half %[[Y]] to float
-# CHECK: %[[DIFF:[0-9]+]] = fsub float %[[XEXT]], %[[YEXT]]
-# CHECK: %[[TRUNC:[0-9]+]] = fptrunc float %[[DIFF]] to half
-# CHECK: %[[DIFFEXT:[0-9]+]] = fpext half %[[TRUNC]] to float
-# CHECK: %[[CMP:[0-9]+]] = fcmp oeq float %[[DIFFEXT]], 0.000000e+00
-# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
-# CHECK: ret i8 %[[ZEXT]]
-# CHECK: }
-emit(foo, Float16, Float16)
-
-@fastmath foo(x::T,y::T) where T = x-y == zero(T)
-# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
-# CHECK: %[[DIFF:[0-9]+]] = fsub fast half %[[X]], %[[Y]]
-# CHECK: %[[CMP:[0-9]+]] = fcmp fast oeq half %[[DIFF]], 0xH0000
-# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
-# CHECK: ret i8 %[[ZEXT]]
-# CHECK: }
-emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll
index 4632c312ae0e3..db80188fc206d 100644
--- a/test/llvmpasses/final-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/final-lower-gc-addrspaces.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -25,11 +23,8 @@ attributes #0 = { allocsize(1) }
 define void @gc_frame_addrspace(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_addrspace
-; TYPED: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5)
 ; OPAQUE: %0 = alloca ptr addrspace(10), i32 4, align 16, addrspace(5)
-; TYPED: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)**
 ; OPAQUE: %gcframe = addrspacecast ptr addrspace(5) %0 to ptr
-; TYPED: %1 = bitcast {} addrspace(10)** %gcframe to i8*
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
   %pgcstack = call {}*** @julia.get_pgcstack()
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index eb3b68662c2b4..f8e123fdc6aea 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 @tag = external addrspace(10) global {}
@@ -23,21 +21,10 @@ attributes #0 = { allocsize(1) }
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
 ; OPAQUE: [[GCFRAME_SLOT:%.*]] = call ptr @julia.get_pgcstack()
   %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
-; TYPED-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
-; TYPED-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
-; TYPED-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; TYPED-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
-; TYPED-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
-; TYPED-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0
-; TYPED-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
-; TYPED-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
 
 ; OPAQUE-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 0
 ; OPAQUE-DAG: store i64 8, ptr [[GCFRAME_SIZE_PTR]], align 8, !tbaa !0
@@ -47,22 +34,15 @@ top:
 ; OPAQUE-NEXT: store ptr %gcframe, ptr [[GCFRAME_SLOT]], align 8
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
   %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
-; TYPED: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
 ; OPAQUE: %frame_slot_1 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 3
   %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
   store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
   %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
-; TYPED: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
 ; OPAQUE: %frame_slot_2 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
   %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
   store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
-; TYPED: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
 ; OPAQUE: call void @boxed_simple(ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed)
   call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
-; TYPED-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; TYPED-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
-; TYPED-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
-; TYPED-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
 
 ; OPAQUE-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1
 ; OPAQUE-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load ptr addrspace(10), ptr [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
@@ -78,8 +58,7 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; TYPED: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc_instrumented
-; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc_instrumented
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc
   %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
@@ -93,9 +72,7 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %0 = add i64 %size, 8
-; TYPED: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i64 12341234)
-; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %0, i64 12341234)
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %size, i64 12341234)
   %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
index 0c37be449d959..d1dfb6aca11dd 100644
--- a/test/llvmpasses/float16.ll
+++ b/test/llvmpasses/float16.ll
@@ -1,7 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
+; RUN: opt  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
 
 define half @demote_half_test(half %a, half %b) #0 {
 top:
@@ -101,7 +99,7 @@ top:
   ret half %13
 }
 
-define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) {
+define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) #2 {
 top:
 ; CHECK-LABEL: @demote_bfloat_test(
 ; CHECK-NEXT:  top:
@@ -162,5 +160,70 @@ top:
   ret bfloat %13
 }
 
-attributes #0 = { "target-features"="-avx512fp16" }
-attributes #1 = { "target-features"="+avx512fp16" }
+define bfloat @native_bfloat_test(bfloat %a, bfloat %b) #3 {
+top:
+; CHECK-LABEL: @native_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fadd bfloat %a, %b
+; CHECK-NEXT:    %1 = fadd bfloat %0, %b
+; CHECK-NEXT:    %2 = fadd bfloat %1, %b
+; CHECK-NEXT:    %3 = fmul bfloat %2, %b
+; CHECK-NEXT:    %4 = fdiv bfloat %3, %b
+; CHECK-NEXT:    %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+; CHECK-NEXT:    %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+; CHECK-NEXT:    %9 = fadd <2 x bfloat> %6, %8
+; CHECK-NEXT:    %10 = extractelement <2 x bfloat> %9, i32 0
+; CHECK-NEXT:    %11 = extractelement <2 x bfloat> %9, i32 1
+; CHECK-NEXT:    %12 = fadd bfloat %10, %11
+; CHECK-NEXT:    %13 = fadd bfloat %12, %4
+; CHECK-NEXT:    ret bfloat %13
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define i1 @fast_half_test(half %0, half %1) #0 {
+top:
+; CHECK-LABEL: @fast_half_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast half %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq half %2, 0xH0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast half %0, %1
+  %3 = fcmp fast oeq half %2, 0xH0000
+  ret i1 %3
+}
+
+define i1 @fast_bfloat_test(bfloat %0, bfloat %1) #2 {
+top:
+; CHECK-LABEL: @fast_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast bfloat %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq bfloat %2, 0xR0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast bfloat %0, %1
+  %3 = fcmp fast oeq bfloat %2, 0xR0000
+  ret i1 %3
+}
+
+attributes #0 = { "julia.hasfp16"="false" }
+attributes #1 = { "julia.hasfp16"="true" }
+attributes #2 = { "julia.hasbf16"="false" }
+attributes #3 = { "julia.hasbf16"="true" }
diff --git a/test/llvmpasses/gc-invariant-verifier.ll b/test/llvmpasses/gc-invariant-verifier.ll
new file mode 100644
index 0000000000000..652fabc742aad
--- /dev/null
+++ b/test/llvmpasses/gc-invariant-verifier.ll
@@ -0,0 +1,13 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(GCInvariantVerifier)' -S %s | FileCheck %s
+
+; CHECK-LABEL: @vectorized_addrspacecast
+define ptr addrspace(10) @vectorized_addrspacecast() {
+top:
+  ret ptr addrspace(10) null
+
+vector.ph:
+  %0 = addrspacecast <4 x ptr addrspace(10)> zeroinitializer to <4 x ptr addrspace(11)>
+  unreachable
+}
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index 90d7980e862c6..9f9282cd3c870 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -17,13 +15,9 @@ top:
 ; CHECK-LABEL: @simple
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
-; TYPED: call {} addrspace(10)* @jl_box_int64
 ; OPAQUE: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
@@ -31,9 +25,6 @@ top:
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
 
-; TYPED-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
 ; OPAQUE-NOT: getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
 ; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]
@@ -49,7 +40,6 @@ define void @leftover_alloca({} addrspace(10)* %a) {
 ; If this pass encounters an alloca, it'll just sink it into the gcframe,
 ; relying on mem2reg to catch simple cases such as this earlier
 ; CHECK-LABEL: @leftover_alloca
-; TYPED: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
 ; OPAQUE: %var = getelementptr inbounds ptr addrspace(10), ptr %gcframe
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -68,12 +58,8 @@ define void @simple_union() {
 ; CHECK-LABEL: @simple_union
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %a = call { {} addrspace(10)*, i8 } @union_ret()
 ; OPAQUE: %a = call { ptr addrspace(10), i8 } @union_ret()
     %a = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0
-; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-NEXT: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %a, 0
@@ -99,7 +85,6 @@ define void @select_simple(i64 %a, i64 %b) {
 define void @phi_simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_simple
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -113,8 +98,6 @@ blabel:
     br label %common
 common:
     %phi = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
-; TYPED:  [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]]
 
 ; OPAQUE:  [[GEP:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP]]
@@ -126,7 +109,6 @@ declare void @one_arg_decayed(i64 addrspace(12)*)
 
 define void @select_lift(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -135,7 +117,6 @@ define void @select_lift(i64 %a, i64 %b) {
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     %cmp = icmp eq i64 %a, %b
-; TYPED: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed
 ; OPAQUE: %gclift = select i1 %cmp, ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed
     %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed
     call void @one_arg_decayed(i64 addrspace(12)* %selectb)
@@ -145,7 +126,6 @@ define void @select_lift(i64 %a, i64 %b) {
 define void @phi_lift(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift
-; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
 ; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -175,7 +155,6 @@ top:
     br i1 %cmp, label %alabel, label %blabel
 alabel:
     %u = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
 ; OPAQUE: %aboxed = extractvalue { ptr addrspace(10), i8 } %u, 0
     %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -186,7 +165,6 @@ blabel:
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     br label %common
 common:
-; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
 ; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ]
     %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ]
     call void @one_arg_decayed(i64 addrspace(12)* %phi)
@@ -196,7 +174,6 @@ common:
 define void @live_if_live_out(i64 %a, i64 %b) {
 ; CHECK-LABEL: @live_if_live_out
 top:
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -215,12 +192,10 @@ succ:
 ; safepoint
 define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ret_use
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     ret {} addrspace(10)* %aboxed
@@ -228,16 +203,11 @@ define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 
 define {{} addrspace(10)*, i8} @ret_use_struct() {
 ; CHECK-LABEL: @ret_use_struct
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
 ; OPAQUE: %aunion = call { ptr addrspace(10), i8 } @union_ret()
     %aunion = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0
-; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE-DAG: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-DAG: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %aunion, 0
@@ -271,12 +241,10 @@ top:
 
 define void @global_ref() {
 ; CHECK-LABEL: @global_ref
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1)
-; TYPED: store {} addrspace(10)* %loaded, {} addrspace(10)**
 ; OPAQUE: store ptr addrspace(10) %loaded, ptr
     call void @one_arg_boxed({} addrspace(10)* %loaded)
     ret void
@@ -284,13 +252,11 @@ define void @global_ref() {
 
 define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
 ; CHECK-LABEL: @no_redundant_rerooting
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
 ; CHECK-NEXT: call void @jl_safepoint()
     call void @jl_safepoint()
@@ -311,13 +277,11 @@ declare void @llvm.memcpy.p064.p10i8.i64(i64*, i8 addrspace(10)*, i64, i32, i1)
 
 define void @memcpy_use(i64 %a, i64 *%aptr) {
 ; CHECK-LABEL: @memcpy_use
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %acast = bitcast {} addrspace(10)* %aboxed to i8 addrspace(10)*
@@ -330,23 +294,19 @@ declare void @llvm.julia.gc_preserve_end(token)
 
 define void @gc_preserve(i64 %a) {
 ; CHECK-LABEL: @gc_preserve
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %aboxed)
     %aboxed2 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed2
 ; OPAQUE: store ptr addrspace(10) %aboxed2
     call void @jl_safepoint()
     call void @llvm.julia.gc_preserve_end(token %tok)
     %aboxed3 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed3
 ; OPAQUE: store ptr addrspace(10) %aboxed3
     call void @jl_safepoint()
     call void @one_arg_boxed({} addrspace(10)* %aboxed2)
@@ -356,24 +316,11 @@ top:
 
 define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapture nonnull readonly dereferenceable(16)) {
 ; CHECK-LABEL: @gc_preserve_vec
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 6
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 6
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8
-; TYPED-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; TYPED-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; TYPED-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; TYPED-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; TYPED-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0
-; TYPED-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1
-; TYPED-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0
-; TYPED-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1
-; TYPED-DAG: store {} addrspace(10)* [[V11]]
-; TYPED-DAG: store {} addrspace(10)* [[V12]]
-; TYPED-DAG: store {} addrspace(10)* [[V21]]
-; TYPED-DAG: store {} addrspace(10)* [[V22]]
 
 ; OPAQUE-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
 ; OPAQUE-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
@@ -426,7 +373,6 @@ declare {} addrspace(10) *@alloc()
 
 define {} addrspace(10)* @vec_loadobj() {
 ; CHECK-LABEL: @vec_loadobj
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
@@ -441,7 +387,6 @@ define {} addrspace(10)* @vec_loadobj() {
 
 define {} addrspace(10)* @vec_gep() {
 ; CHECK-LABEL: @vec_gep
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
@@ -457,7 +402,6 @@ define {} addrspace(10)* @vec_gep() {
 declare i1 @check_property({} addrspace(10)* %val)
 define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) {
 ; CHECK-LABEL: @loopyness
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -471,8 +415,6 @@ header:
 a:
 ; This needs a store
 ; CHECK-LABEL: a:
-; TYPED:  [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]]
 
 ; OPAQUE:  [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP1]]
@@ -481,8 +423,6 @@ a:
 
 latch:
 ; This as well in case we went the other path
-; TYPED:  [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]]
 
 ; OPAQUE:  [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP2]]
@@ -496,7 +436,6 @@ exit:
 
 define {} addrspace(10)* @phi_union(i1 %cond) {
 ; CHECK-LABEL: @phi_union
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -522,7 +461,6 @@ join:
 
 define {} addrspace(10)* @select_union(i1 %cond) {
 ; CHECK-LABEL: @select_union
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -539,7 +477,6 @@ top:
 
 define i8 @simple_arrayptr() {
 ; CHECK-LABEL: @simple_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -557,7 +494,6 @@ top:
 
 define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecstoreload
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -572,7 +508,6 @@ top:
 
 define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecphi
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -599,7 +534,6 @@ common:
 
 define i8 @phi_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @phi_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -639,7 +573,6 @@ common:
 
 define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -657,14 +590,12 @@ top:
 
 define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
 ; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
@@ -677,14 +608,12 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecvecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
 ; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
@@ -697,7 +626,6 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 ; CHECK-LABEL: @vecscalarselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -705,7 +633,6 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed
 ; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
@@ -718,7 +645,6 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 
 define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 ; CHECK-LABEL: @scalarvecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -726,7 +652,6 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed
 ; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
@@ -739,7 +664,6 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 
 define i8 @select_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @select_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -767,11 +691,8 @@ top:
 
 define i8 @vector_arrayptrs() {
 ; CHECK-LABEL: @vector_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
@@ -793,12 +714,8 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8 (<2 x i8 ad
 
 define i8 @masked_arrayptrs() {
 ; CHECK-LABEL: @masked_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.load.v2p13.p11(ptr addrspace(11) %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -821,12 +738,8 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8 (<2 x i8
 
 define i8 @gather_arrayptrs() {
 ; CHECK-LABEL: @gather_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -848,12 +761,8 @@ top:
 
 define i8 @gather_arrayptrs_alltrue() {
 ; CHECK-LABEL: @gather_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -875,11 +784,8 @@ top:
 
 define i8 @lost_select_decayed(i1 %arg1) {
 ; CHECK-LABEL: @lost_select_decayed
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) [[SOMETHING:%.*]], ptr [[GEP0]]
diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll
index 4f28239257ec0..76ce19af96e94 100644
--- a/test/llvmpasses/julia-licm-fail.ll
+++ b/test/llvmpasses/julia-licm-fail.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 ; COM: This file contains functions that should not trigger allocations to be hoisted out of loops
 
@@ -23,10 +21,8 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
 ; OPAQUE-NEXT: %ignore = call ptr addrspace(10) @escape(ptr addrspace(10) %alloc)
   %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
   br i1 %ret, label %return, label %loop
@@ -49,13 +45,10 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
 ; OPAQUE-NEXT: %cast = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
   %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
-; TYPED-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
 ; OPAQUE-NEXT: %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %cast)
   %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
   br i1 %ret, label %return, label %loop
@@ -80,7 +73,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll
index e1684c7577578..4f25a99f7e615 100644
--- a/test/llvmpasses/julia-licm-memoryssa.ll
+++ b/test/llvmpasses/julia-licm-memoryssa.ll
@@ -1,8 +1,6 @@
 ; COM: NewPM-only test, tests that memoryssa is preserved correctly
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -116,8 +114,6 @@ top:
 preheader:
 ; CHECK-NEXT: [[ALLOC:[0-9]+]] = MemoryDef([[PGCSTACK]])
 
-; TYPED-NEXT: %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag)
-; TYPED-NEXT: %[[BCAST:.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
 
 ; OPAQUE-NEXT: %alloc = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %current_task, i64 0, ptr addrspace(10) @tag)
 
diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll
index 99b493cb6316b..37a547c9861b7 100644
--- a/test/llvmpasses/julia-licm-missed.ll
+++ b/test/llvmpasses/julia-licm-missed.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 ; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops
 ; COM: i.e. they are missed optimizations
@@ -27,16 +25,12 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
 ; OPAQUE-NEXT: %derived = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
   %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
-; TYPED-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
 ; OPAQUE-NEXT: %ptr = bitcast ptr addrspace(11) %derived to ptr addrspace(11)
   %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
-; TYPED-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
 ; OPAQUE-NEXT: store ptr addrspace(10) %obj, ptr addrspace(11) %ptr, align 8
   store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
   br i1 %ret, label %return, label %loop
@@ -61,13 +55,11 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
   br label %other
 ; CHECK: other:
 other:
-; TYPED-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ]
 ; OPAQUE-NEXT: %phi = phi ptr addrspace(10) [ %alloc, %loop ]
   %phi = phi {} addrspace(10)* [ %alloc, %loop ]
   br i1 %ret, label %return, label %loop
@@ -94,7 +86,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
index 1b0e7078e32ce..732b62788f13c 100644
--- a/test/llvmpasses/julia-licm.ll
+++ b/test/llvmpasses/julia-licm.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -98,9 +96,6 @@ L3:                                               ; preds = %L3.loopexit, %top
 L4:                                               ; preds = %top
   %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12
   %current_task1 = bitcast {}*** %current_task112 to {}**
-  ; TYPED: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
-  ; TYPED-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)*
-  ; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false)
 
   ; OPAQUE: %3 = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task1, i64 8, ptr addrspace(10) @tag)
   ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %3, i8 0, i64 8, i1 false)
@@ -110,8 +105,6 @@ L4:                                               ; preds = %top
 
 L22:                                              ; preds = %L4, %L22
   %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; TYPED: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ]
-  ; TYPED-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
 
   ; OPAQUE: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
   ; OPAQUE-NEXT %4 = bitcast ptr addrspace(10) %3 to ptr addrspace(10)
@@ -133,9 +126,6 @@ top:
   br label %preheader
 ; CHECK: preheader:
 preheader:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
-; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false)
 
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
 ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %alloc, i8 0, i64 8, i1 false)
@@ -162,7 +152,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll
index 863e5234caa67..b99fb4f57db20 100644
--- a/test/llvmpasses/julia-simdloop-memoryssa.ll
+++ b/test/llvmpasses/julia-simdloop-memoryssa.ll
@@ -1,8 +1,6 @@
 ; COM: NewPM-only test, tests that memoryssa is preserved correctly
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
 
 ; CHECK-LABEL: MemorySSA for function: simd_test
 ; CHECK-LABEL: @simd_test(
diff --git a/test/llvmpasses/julia-simdloop.ll b/test/llvmpasses/julia-simdloop.ll
index df96e34979a3d..a8d5ea3342b20 100644
--- a/test/llvmpasses/julia-simdloop.ll
+++ b/test/llvmpasses/julia-simdloop.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
 
 ; CHECK-LABEL: @simd_test(
 define void @simd_test(double *%a, double *%b) {
diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll
index 77f8e2ac685ce..9c041664a9682 100644
--- a/test/llvmpasses/late-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/late-lower-gc-addrspaces.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -20,39 +18,29 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -62,25 +50,15 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: ret {} addrspace(10)* %v
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -95,34 +73,21 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 6dee18da5975f..d294847db8f9d 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s -check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -17,39 +15,29 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -59,25 +47,15 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: ret {} addrspace(10)* %v
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -92,34 +70,21 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
@@ -196,21 +161,14 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
 }
 
 ; CHECK-LABEL: @decayar
-; TYPED:  %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  [[gc_slot_addr_:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
-; TYPED:  store {} addrspace(10)* %l0, {} addrspace(10)** [[gc_slot_addr_:%.*]], align 8
-; TYPED:  [[gc_slot_addr_:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
-; TYPED: store {} addrspace(10)* %l1, {} addrspace(10)** [[gc_slot_addr_:%.*]], align 8
-; TYPED: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
-; TYPED: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-
-; OPAQUE:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
-; OPAQUE:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
-; OPAQUE:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
-; OPAQUE: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
-; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
-; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe)
+
+; CHECK:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+; CHECK:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+; CHECK: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
+; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
 
 !0 = !{i64 0, i64 23}
 !1 = !{!1}
diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl
index 3e0df7a8885a7..294c657196142 100644
--- a/test/llvmpasses/llvmcall.jl
+++ b/test/llvmpasses/llvmcall.jl
@@ -1,11 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no %s %t
-# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,TYPED
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+# RUN: export JULIA_LLVM_ARGS=""
 
 # RUN: julia --startup-file=no %s %t
 # RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,OPAQUE
@@ -17,7 +12,7 @@ struct Foo
     y::Int32
 end
 
-@generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x))
+@generated foo(x) = :(ccall("extern foo", llvmcall, $x, ($x,), x))
 bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2)
 
 # CHECK: define
@@ -48,7 +43,7 @@ emit(foo, Float16)
 # CHECK: ret [2 x half]
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, NTuple{2, Float16})
+emit(foo, NTuple{2,Float16})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -67,7 +62,7 @@ emit(foo, NTuple{2, Float16})
 # CHECK: ret <2 x half>
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, NTuple{2, VecElement{Float16}})
+emit(foo, NTuple{2,VecElement{Float16}})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -89,7 +84,7 @@ emit(foo, NTuple{2, VecElement{Float16}})
 # OPAQUE: ret ptr addrspace(3)
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, Core.LLVMPtr{Float32, 3})
+emit(foo, Core.LLVMPtr{Float32,3})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -127,7 +122,7 @@ emit(foo, Foo)
 # CHECK: ret <2 x half>
 # CHECK-NOT: define
 # CHECK: }
-emit(bar, NTuple{2, Float16})
+emit(bar, NTuple{2,Float16})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl
index c812f9c05a967..759ff09499deb 100644
--- a/test/llvmpasses/loopinfo.jl
+++ b/test/llvmpasses/loopinfo.jl
@@ -2,7 +2,7 @@
 
 # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
-# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER
+# RUN: cat %t/module.ll | opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER
 # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL
 
@@ -26,11 +26,11 @@ function simdf(X)
     acc = zero(eltype(X))
     @simd for x in X
         acc += x
-# CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
-# LOWER-NOT: llvm.mem.parallel_loop_access
-# LOWER: fadd reassoc contract double
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
-# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
+        # LOWER-NOT: llvm.mem.parallel_loop_access
+        # LOWER: fadd reassoc contract double
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
+        # FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
     end
     acc
 end
@@ -41,10 +41,10 @@ function simdf2(X)
     acc = zero(eltype(X))
     @simd ivdep for x in X
         acc += x
-# CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
-# LOWER: llvm.mem.parallel_loop_access
-# LOWER: fadd reassoc contract double
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
+        # LOWER: llvm.mem.parallel_loop_access
+        # LOWER: fadd reassoc contract double
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
     end
     acc
 end
@@ -58,13 +58,13 @@ end
     for i in 1:N
         iteration(i)
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3)))
-# CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
-# FINAL: br
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: br
     end
 end
 
@@ -79,23 +79,23 @@ end
     for i in 1:10
         for j in J
             1 <= j <= I && continue
-            @show (i,j)
+            @show (i, j)
             iteration(i)
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),)))
-# CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
     end
 end
 
@@ -104,10 +104,10 @@ end
     for i in 1:10
         for j in J
             1 <= j <= I && continue
-            @show (i,j)
+            @show (i, j)
             iteration(i)
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+            # FINAL: call {{(swiftcc )?}}void @j_iteration
+            # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
     end
diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll
index 8b85a71705f60..ce3bdc6401b91 100644
--- a/test/llvmpasses/lower-handlers-addrspaces.ll
+++ b/test/llvmpasses/lower-handlers-addrspaces.ll
@@ -1,15 +1,13 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
 
 attributes #1 = { returns_twice }
-declare i32 @julia.except_enter() #1
-declare void @ijl_pop_handler(i32)
+declare {i32, i8*} @julia.except_enter({}*) #1
+declare void @ijl_pop_handler({}*, i32)
 declare i8**** @julia.ptls_states()
 declare i8**** @julia.get_pgcstack()
 
@@ -19,7 +17,8 @@ top:
 ; CHECK: call void @llvm.lifetime.start
 ; CHECK: call void @ijl_enter_handler
 ; CHECK: setjmp
-    %r = call i32 @julia.except_enter()
+    %rb = call {i32, i8*} @julia.except_enter({}* null)
+    %r = extractvalue {i32, i8*} %rb, 0
     %cmp = icmp eq i32 %r, 0
     br i1 %cmp, label %try, label %catch
 try:
@@ -27,7 +26,7 @@ try:
 catch:
     br label %after
 after:
-    call void @ijl_pop_handler(i32 1)
+    call void @ijl_pop_handler({}* null, i32 1)
 ; CHECK: llvm.lifetime.end
     ret void
 }
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index a250edddcaa81..7f0648a1a8bf5 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -1,12 +1,10 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
 attributes #1 = { returns_twice }
-declare i32 @julia.except_enter() #1
-declare void @ijl_pop_handler(i32)
+declare {i32, i8*} @julia.except_enter({}*) #1
+declare void @ijl_pop_handler({}*, i32)
 declare i8**** @julia.ptls_states()
 declare i8**** @julia.get_pgcstack()
 
@@ -16,15 +14,18 @@ top:
 ; CHECK: call void @llvm.lifetime.start
 ; CHECK: call void @ijl_enter_handler
 ; CHECK: setjmp
-    %r = call i32 @julia.except_enter()
+    %rb = call {i32, i8*} @julia.except_enter({}* null)
+    %r = extractvalue {i32, i8*} %rb, 0
+    %b = extractvalue {i32, i8*} %rb, 1
     %cmp = icmp eq i32 %r, 0
     br i1 %cmp, label %try, label %catch
 try:
+    %lcssa = phi {i32, i8*} [ %rb, %top ]
     br label %after
 catch:
     br label %after
 after:
-    call void @ijl_pop_handler(i32 1)
+    call void @ijl_pop_handler({}* null, i32 1)
 ; CHECK: llvm.lifetime.end
     ret void
 }
diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll
index 3c1c995ce7376..079582305ee72 100644
--- a/test/llvmpasses/muladd.ll
+++ b/test/llvmpasses/muladd.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
 
 
 ; CHECK-LABEL: @fast_muladd1
diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll
index 4e90e3cb9bc6b..849cf57c78aa3 100644
--- a/test/llvmpasses/multiversioning-annotate-only.ll
+++ b/test/llvmpasses/multiversioning-annotate-only.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
 
 ; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning
 ; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars
diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll
index 59a3f64a25c35..00f0db0aa1e91 100644
--- a/test/llvmpasses/multiversioning-clone-only.ll
+++ b/test/llvmpasses/multiversioning-clone-only.ll
@@ -1,29 +1,19 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
 
 ; CHECK: @jl_gvar_base = hidden constant i64 0
 ; CHECK: @jl_gvar_offsets = hidden constant [0 x i32] zeroinitializer
 ; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer
 ; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer
-; TYPED: @subtarget_cloned_gv = hidden global i64* null
 ; OPAQUE: @subtarget_cloned_gv = hidden global ptr null
-; TYPED: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null
 ; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null
 ; CHECK: @jl_fvar_count = hidden constant i64 1
-; TYPED: @jl_fvar_ptrs = hidden global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)] 
-; OPAQUE: @jl_fvar_ptrs = hidden global [1 x ptr] [ptr @subtarget_cloned] 
+; OPAQUE: @jl_fvar_ptrs = hidden global [1 x ptr] [ptr @subtarget_cloned]
 ; CHECK: @jl_clone_slots = hidden constant [5 x i32]
 ; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_clone_slots
 ; CHECK: @jl_clone_idxs = hidden constant [13 x i32]
 ; COM: TODO actually check the clone idxs maybe?
-; TYPED: @jl_clone_ptrs = hidden constant [4 x i64*]
-; TYPED-SAME: @subtarget_cloned.1
-; TYPED-SAME: @subtarget_cloned.2
-; TYPED-SAME: @subtarget_cloned
-; TYPED-SAME: @subtarget_cloned
 ; OPAQUE: @jl_clone_ptrs = hidden constant [4 x ptr] [ptr @subtarget_cloned.1, ptr @subtarget_cloned.2, ptr @subtarget_cloned, ptr @subtarget_cloned]
 
 @jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 8
diff --git a/test/llvmpasses/multiversioning-x86.ll b/test/llvmpasses/multiversioning-x86.ll
index 1bc1b288a5879..1fd0ce2d5f40c 100644
--- a/test/llvmpasses/multiversioning-x86.ll
+++ b/test/llvmpasses/multiversioning-x86.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
 
 
 ; COM: This test checks that multiversioning actually happens from start to finish
@@ -10,18 +8,13 @@
 
 
 
-; TYPED: @jl_gvar_ptrs = global [0 x i64*] zeroinitializer, align 8
 ; OPAQUE: @jl_gvar_ptrs = global [0 x ptr] zeroinitializer, align 8
 ; CHECK: @jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 8
 ; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
-; TYPED: @simd_test.reloc_slot = hidden global i32 (<4 x i32>)* null
 ; OPAQUE: @simd_test.reloc_slot = hidden global ptr null
-; TYPED: @jl_fvar_ptrs = hidden global [5 x i64*] [i64* bitcast (i32 (i32)* @boring to i64*), i64* bitcast (float (float, float)* @fastmath_test to i64*), i64* bitcast (i32 (i32)* @loop_test to i64*), i64* bitcast (i32 (<4 x i32>)* @simd_test to i64*), i64* bitcast (i32 (<4 x i32>)* @simd_test_call to i64*)]
 ; OPAQUE: @jl_fvar_ptrs = hidden global [5 x ptr] [ptr @boring, ptr @fastmath_test, ptr @loop_test, ptr @simd_test, ptr @simd_test_call]
-; TYPED: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)** @simd_test.reloc_slot to i64), i64 ptrtoint ([3 x i32]* @jl_clone_slots to i64)) to i32)]
 ; OPAQUE: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.reloc_slot to i64), i64 ptrtoint (ptr @jl_clone_slots to i64)) to i32)]
 ; CHECK: @jl_clone_idxs = hidden constant [10 x i32] [i32 -2147483647, i32 3, i32 -2147483647, i32 3, i32 4, i32 1, i32 1, i32 2, i32 -2147483645, i32 4]
-; TYPED: @jl_clone_ptrs = hidden constant [9 x i64*] [i64* bitcast (i32 (i32)* @boring.1 to i64*), i64* bitcast (float (float, float)* @fastmath_test.1 to i64*), i64* bitcast (i32 (i32)* @loop_test.1 to i64*), i64* bitcast (i32 (<4 x i32>)* @simd_test.1 to i64*), i64* bitcast (i32 (<4 x i32>)* @simd_test_call.1 to i64*), i64* bitcast (float (float, float)* @fastmath_test.2 to i64*), i64* bitcast (i32 (i32)* @loop_test.2 to i64*), i64* bitcast (i32 (<4 x i32>)* @simd_test.2 to i64*), i64* bitcast (i32 (<4 x i32>)* @simd_test_call.2 to i64*)]
 ; OPAQUE: @jl_clone_ptrs = hidden constant [9 x ptr] [ptr @boring.1, ptr @fastmath_test.1, ptr @loop_test.1, ptr @simd_test.1, ptr @simd_test_call.1, ptr @fastmath_test.2, ptr @loop_test.2, ptr @simd_test.2, ptr @simd_test_call.2]
 
 
@@ -102,7 +95,6 @@ define noundef i32 @simd_test_call(<4 x i32> noundef %0) {
 ; CHECK: @simd_test{{.*}}#[[SIMD_CLONE2:[0-9]+]]
 
 ; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
-; TYPED: %2 = load i32 (<4 x i32>)*, i32 (<4 x i32>)** @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
 ; OPAQUE: %2 = load ptr, ptr @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
 ; CHECK: %3 = call noundef i32 %2(<4 x i32> noundef %0)
 
diff --git a/test/llvmpasses/names.jl b/test/llvmpasses/names.jl
index fe692d0fab787..1ab2204044804 100644
--- a/test/llvmpasses/names.jl
+++ b/test/llvmpasses/names.jl
@@ -135,7 +135,8 @@ emit(f2, Float64, Float64, Float64, Float64, Float64, Float64, Float64)
 
 # CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f5
 # CHECK-SAME: %"a::A"
-# CHECK: %"a::A.b_ptr.c_ptr.d
+# CHECK: %"a::A.d
+# COM: this text check relies on our LLVM code emission being relatively poor, which is not always the case
 emit(f5, A)
 
 # CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f6
diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll
index 6a5909ff5fd40..e0a726176b225 100644
--- a/test/llvmpasses/parsing.ll
+++ b/test/llvmpasses/parsing.ll
@@ -1,6 +1,9 @@
 ; COM: NewPM-only test, tests for ability to parse Julia passes
 
-; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_enable_vector_pipeline>" -S %s -o /dev/null
 
 define void @test() {
     ret void
diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl
index e48a5f7df111f..5dab675f2b547 100644
--- a/test/llvmpasses/pipeline-o0.jl
+++ b/test/llvmpasses/pipeline-o0.jl
@@ -1,14 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
@@ -19,7 +10,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 # CHECK-LABEL: @julia_simple
 # CHECK-NOT: julia.get_pgcstack
 # CHECK-NOT: julia.gc_alloc_obj
-# CHECK: ijl_gc_pool_alloc
+# CHECK: ijl_gc_small_alloc
 # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes
 function simple()
     Ref(0)
diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl
index 86ab9125f2f27..999e63e2725c4 100644
--- a/test/llvmpasses/pipeline-o2-allocs.jl
+++ b/test/llvmpasses/pipeline-o2-allocs.jl
@@ -1,12 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 
@@ -60,8 +53,7 @@ end
 # CHECK-NOT: julia.gc_preserve_end
 function nopreserve()
     ref = Ref(0)
-    GC.@preserve ref begin
-    end
+    GC.@preserve ref begin end
 end
 
 # COM: this cordons off the attributes/function declarations from the actual
diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl
index 83a4450522c79..584e8855f0f8c 100644
--- a/test/llvmpasses/pipeline-o2-broadcast.jl
+++ b/test/llvmpasses/pipeline-o2-broadcast.jl
@@ -1,12 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
 
diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl
index 3ce2f692fc32e..ceb2fe3bf65d6 100644
--- a/test/llvmpasses/pipeline-o2.jl
+++ b/test/llvmpasses/pipeline-o2.jl
@@ -1,18 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUNx: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-# RUNx: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-
-# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
-# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
-
-# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
-# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUNx: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
 # RUNx: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
 
diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll
index 0c0d81420d9fe..ecb70953026c2 100644
--- a/test/llvmpasses/pipeline-prints.ll
+++ b/test/llvmpasses/pipeline-prints.ll
@@ -1,46 +1,25 @@
 ; COM: This is a newpm-only test, no legacypm command
 ; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
 
 ; ModuleID = 'f'
 source_filename = "f"
@@ -319,12 +298,12 @@ attributes #2 = { inaccessiblemem_or_argmemonly }
 
 ; COM: Loop simplification makes the exit condition obvious
 ; AFTERLOOPSIMPLIFICATION: L35.lr.ph:
-; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw
+; AFTERLOOPSIMPLIFICATION: add nuw nsw
 
 ; COM: Scalar optimization removes the previous add from the preheader
-; AFTERSCALAROPTIMIZATION: L35.preheader:
+; AFTERSCALAROPTIMIZATION: L35.lr.ph:
 ; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw
-; AFTERSCALAROPTIMIZATION-NEXT: br label %L35
+; AFTERSCALAROPTIMIZATION: br label %L35
 
 ; COM: Vectorization does stuff
 ; AFTERVECTORIZATION: vector.body
diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll
index e90d6f97abe14..996b995a58556 100644
--- a/test/llvmpasses/propagate-addrspace-non-zero.ll
+++ b/test/llvmpasses/propagate-addrspace-non-zero.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll
index aa3bce3760540..033fbd6f0386e 100644
--- a/test/llvmpasses/propagate-addrspace.ll
+++ b/test/llvmpasses/propagate-addrspace.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 define i64 @simple() {
 ; CHECK-LABEL: @simple
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index c92d45300fb81..da32758c1dc5b 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare {}*** @julia.ptls_states()
@@ -28,14 +26,12 @@ define void @argument_refinement({} addrspace(10)* %a) {
 ; Check that we reuse the gc slot from the box
 define void @heap_refinement1(i64 %a) {
 ; CHECK-LABEL: @heap_refinement1
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
@@ -47,14 +43,12 @@ define void @heap_refinement1(i64 %a) {
 ; Check that we don't root the allocated value here, just the derived value
 define void @heap_refinement2(i64 %a) {
 ; CHECK-LABEL: @heap_refinement2
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %loaded1
 ; OPAQUE: store ptr addrspace(10) %loaded1
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
@@ -65,20 +59,14 @@ define void @heap_refinement2(i64 %a) {
 ; Check that the way we compute rooting is compatible with refinements
 define void @issue22770() {
 ; CHECK-LABEL: @issue22770
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %y = call {} addrspace(10)* @allocate_some_value()
     %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)*
     %x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %y,
 ; OPAQUE: store ptr addrspace(10) %y,
     %a = call {} addrspace(10)* @allocate_some_value()
-; TYPED: store {} addrspace(10)* %a
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %x)
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %a)
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %y)
 
 ; OPAQUE: store ptr addrspace(10) %a
 ; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %x)
@@ -87,10 +75,8 @@ define void @issue22770() {
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %a)
     call void @one_arg_boxed({} addrspace(10)* %y)
-; TYPED: store {} addrspace(10)* %x
 ; OPAQUE: store ptr addrspace(10) %x
     %c = call {} addrspace(10)* @allocate_some_value()
-; TYPED: store {} addrspace(10)* %c
 ; OPAQUE: store ptr addrspace(10) %c
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %c)
@@ -121,7 +107,6 @@ L3:
 
 define void @dont_refine_loop({} addrspace(10)* %x) {
 ; CHECK-LABEL: @dont_refine_loop
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -165,7 +150,6 @@ L2:
 
 define void @refine_loop_indirect({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -191,7 +175,6 @@ L2:
 
 define void @refine_loop_indirect2({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect2
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index 90043a7d85cf4..fbd84de85a4a3 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -1,14 +1,11 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 ; COM: check that package image fptrs work
 @pjlsys_BoundsError_32 = internal global {} addrspace(10)* ({}***, {} addrspace(10)*, [1 x i64] addrspace(11)*)* null
 ; CHECK: @pjlsys_BoundsError_32 = internal global
-; TYPED-SAME: {}* ({}***, {}*, [1 x i64]*)* null
 ; OPAQUE-SAME: ptr null
 
 define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) {
@@ -41,7 +38,6 @@ top:
 define nonnull {} addrspace(10)* @constexpr(i64) {
 ; CHECK-LABEL: @constexpr
 top:
-; TYPED: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1)
 ; OPAQUE: call ptr inttoptr (i64 139806640486784 to ptr)(ptr inttoptr (i64 139806425039920 to ptr), i64 1)
   %1 = call {} addrspace(10)* inttoptr (i64 139806640486784 to {} addrspace(10)* ({} addrspace(10)*, i64)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 139806425039920 to {}*) to {} addrspace(10)*), i64 1)
 ; CHECK-NOT: addrspacecast
@@ -71,22 +67,16 @@ top:
   %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1
 ; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix.
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %a
 ; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %a
 ; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %b
 ; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %b
 ; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %c
 ; OPAQUE-SAME: ptr %c
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %c
 ; OPAQUE-SAME: ptr %c
   store i64 111, i64* %a.car
   store i64 222, i64* %b.car
@@ -116,7 +106,6 @@ exit:
 
 ; COM: check that address spaces in byval types are processed correctly
 define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) {
-; TYPED: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
 ; OPAQUE: define void @byval_type(ptr byval([1 x ptr]) %0)
   ret void
 }
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index d99c0aa02f85b..511cbb505519b 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -1,8 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=OPAQUE
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -15,7 +13,6 @@ declare void @one_arg_boxed({} addrspace(10)*)
 define void @try_catch(i64 %a, i64 %b)
 {
 ; Because of the returns_twice function, we need to keep aboxed live everywhere
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %sigframe = alloca [208 x i8], align 16
diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl
index e02aee7206b12..173058df12fb1 100644
--- a/test/llvmpasses/safepoint_stress.jl
+++ b/test/llvmpasses/safepoint_stress.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S - | FileCheck %s
 
 
 println("""
diff --git a/test/loading.jl b/test/loading.jl
index 8ba2cf3026120..8db8405ef2a83 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -167,7 +167,7 @@ end
 
             @test root.uuid == root_uuid
             @test this.uuid == this_uuid
-            @test that == nothing
+            @test that === nothing
 
             write(project_file, """
             name = "Root"
@@ -180,8 +180,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == proj_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
         finally
             copy!(LOAD_PATH, old_load_path)
         end
@@ -213,8 +213,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == root_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
 
             @test Base.get_uuid_name(project_file, this_uuid) == "This"
         finally
@@ -273,8 +273,8 @@ end
         @test joinpath(@__DIR__, normpath(path)) == locate_package(pkg)
         @test Base.compilecache_path(pkg, UInt64(0)) == Base.compilecache_path(pkg, UInt64(0))
     end
-    @test identify_package("Baz") == nothing
-    @test identify_package("Qux") == nothing
+    @test identify_package("Baz") === nothing
+    @test identify_package("Qux") === nothing
     @testset "equivalent package names" begin
          classes = [
             ["Foo"],
@@ -848,7 +848,7 @@ end
         proj = joinpath(tmp, "Project.toml")
         touch(proj)
         touch(joinpath(tmp, "Manifest-v1.5.toml"))
-        @test Base.project_file_manifest_path(proj) == nothing
+        @test Base.project_file_manifest_path(proj) === nothing
         touch(joinpath(tmp, "Manifest.toml"))
         man = basename(Base.project_file_manifest_path(proj))
         @test man == "Manifest.toml"
@@ -1034,6 +1034,16 @@ end
 end
 
 @testset "Extensions" begin
+    test_ext = """
+    function test_ext(parent::Module, ext::Symbol)
+        _ext = Base.get_extension(parent, ext)
+        _ext isa Module || error("expected extension \$ext to be loaded")
+        _pkgdir = pkgdir(_ext)
+        _pkgdir == pkgdir(parent) != nothing || error("unexpected extension \$ext pkgdir path: \$_pkgdir")
+        _pkgversion = pkgversion(_ext)
+        _pkgversion == pkgversion(parent) || error("unexpected extension \$ext version: \$_pkgversion")
+    end
+    """
     depot_path = mktempdir()
     try
         proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
@@ -1044,6 +1054,7 @@ end
             cmd = """
             $load_distr
             begin
+                $ew $test_ext
                 $ew push!(empty!(DEPOT_PATH), $(repr(depot_path)))
                 using HasExtensions
                 $ew using HasExtensions
@@ -1051,13 +1062,16 @@ end
                 $ew HasExtensions.ext_loaded && error("ext_loaded set")
                 using HasDepWithExtensions
                 $ew using HasDepWithExtensions
+                $ew test_ext(HasExtensions, :Extension)
                 $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
                 $ew HasExtensions.ext_loaded || error("ext_loaded not set")
                 $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
                 $ew HasDepWithExtensions.do_something() || error("do_something errored")
                 using ExtDep2
                 $ew using ExtDep2
-                $ew HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set")
+                using ExtDep3
+                $ew using ExtDep3
+                $ew HasExtensions.ext_dep_loaded || error("ext_dep_loaded not set")
             end
             """
             return `$(Base.julia_cmd()) $compile --startup-file=no -e $cmd`
@@ -1100,11 +1114,14 @@ end
 
         test_ext_proj = """
         begin
+            $test_ext
             using HasExtensions
             using ExtDep
-            Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :Extension)
             using ExtDep2
-            Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :ExtensionFolder)
+            using ExtDep3
+            test_ext(HasExtensions, :ExtensionDep)
         end
         """
         for compile in (`--compiled-modules=no`, ``)
@@ -1206,36 +1223,42 @@ end
 empty!(Base.DEPOT_PATH)
 append!(Base.DEPOT_PATH, original_depot_path)
 
+module loaded_pkgid1 end
+module loaded_pkgid2 end
+module loaded_pkgid3 end
+module loaded_pkgid4 end
+
 @testset "loading deadlock detector" begin
     pkid1 = Base.PkgId("pkgid1")
     pkid2 = Base.PkgId("pkgid2")
     pkid3 = Base.PkgId("pkgid3")
     pkid4 = Base.PkgId("pkgid4")
+    build_id = UInt128(0)
     e = Base.Event()
-    @test nothing === @lock Base.require_lock Base.start_loading(pkid4)     # module pkgid4
-    @test nothing === @lock Base.require_lock Base.start_loading(pkid1)     # module pkgid1
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid4, build_id, false)     # module pkgid4
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid1, build_id, false)     # module pkgid1
     t1 = @async begin
-        @test nothing === @lock Base.require_lock Base.start_loading(pkid2) # @async module pkgid2; using pkgid1; end
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid2, build_id, false) # @async module pkgid2; using pkgid1; end
         notify(e)
-        @test "loaded_pkgid1" == @lock Base.require_lock Base.start_loading(pkid1)
-        @lock Base.require_lock Base.end_loading(pkid2, "loaded_pkgid2")
+        @test loaded_pkgid1 == @lock Base.require_lock Base.start_loading(pkid1, build_id, false)
+        @lock Base.require_lock Base.end_loading(pkid2, loaded_pkgid2)
     end
     wait(e)
     reset(e)
     t2 = @async begin
-        @test nothing === @lock Base.require_lock Base.start_loading(pkid3) # @async module pkgid3; using pkgid2; end
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid3, build_id, false) # @async module pkgid3; using pkgid2; end
         notify(e)
-        @test "loaded_pkgid2" == @lock Base.require_lock Base.start_loading(pkid2)
-        @lock Base.require_lock Base.end_loading(pkid3, "loaded_pkgid3")
+        @test loaded_pkgid2 == @lock Base.require_lock Base.start_loading(pkid2, build_id, false)
+        @lock Base.require_lock Base.end_loading(pkid3, loaded_pkgid3)
     end
     wait(e)
     reset(e)
     @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid3 -> pkgid2 -> pkgid1 -> pkgid3 && pkgid4"),
-        @lock Base.require_lock Base.start_loading(pkid3)).value            # try using pkgid3
+        @lock Base.require_lock Base.start_loading(pkid3, build_id, false)).value            # try using pkgid3
     @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid4 -> pkgid4 && pkgid1"),
-        @lock Base.require_lock Base.start_loading(pkid4)).value            # try using pkgid4
-    @lock Base.require_lock Base.end_loading(pkid1, "loaded_pkgid1")        # end
-    @lock Base.require_lock Base.end_loading(pkid4, "loaded_pkgid4")        # end
+        @lock Base.require_lock Base.start_loading(pkid4, build_id, false)).value            # try using pkgid4
+    @lock Base.require_lock Base.end_loading(pkid1, loaded_pkgid1)        # end
+    @lock Base.require_lock Base.end_loading(pkid4, loaded_pkgid4)        # end
     wait(t2)
     wait(t1)
 end
@@ -1337,7 +1360,10 @@ end
 
 @testset "relocatable upgrades #51989" begin
     mktempdir() do depot
-        project_path = joinpath(depot, "project")
+        # realpath is needed because Pkg is used for one of the precompile paths below, and Pkg calls realpath on the
+        # project path so the cache file slug will be different if the tempdir is given as a symlink
+        # (which it often is on MacOS) which would break the test.
+        project_path = joinpath(realpath(depot), "project")
         mkpath(project_path)
 
         # Create fake `Foo.jl` package with two files:
@@ -1542,11 +1568,120 @@ end
         end
 
         file = joinpath(depot, "dev", "non-existent.jl")
-        @test_throws SystemError("opening file $(repr(file))") include(file)
+        @test try
+            include(file); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file $(repr(file))"
+            true
+        end
+        touch(file)
+        @test include_dependency(file) === nothing
+        chmod(file, 0x000)
+
+        # same for include_dependency: #52063
+        dir = mktempdir() do dir
+            @test include_dependency(dir) === nothing
+            dir
+        end
+        @test try
+            include_dependency(dir); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file or folder $(repr(dir))"
+            true
+        end
     end
 end
 
 @testset "-m" begin
     rot13proj = joinpath(@__DIR__, "project", "Rot13")
     @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13 --project nowhere ABJURER`) == "--cebwrpg abjurer NOWHERE "
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13.Rot26 --project nowhere ABJURER`) == "--project nowhere ABJURER "
+end
+
+@testset "workspace loading" begin
+   old_load_path = copy(LOAD_PATH)
+   try
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject"))
+       @test Base.get_preferences()["value"] == 1
+       @test Base.get_preferences()["x"] == 1
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "sub"))
+       id = Base.identify_package("Devved")
+       @test isfile(Base.locate_package(id))
+       @test Base.identify_package("Devved2") === nothing
+       id3 = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id3))
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "PackageThatIsSub"))
+       id_pkg = Base.identify_package("PackageThatIsSub")
+       @test Base.identify_package(id_pkg, "Devved") === nothing
+       id_dev2 = Base.identify_package(id_pkg, "Devved2")
+       @test isfile(Base.locate_package(id_dev2))
+       id_mypkg = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id_mypkg))
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.get_preferences()["value"] == 2
+       @test Base.get_preferences()["x"] == 1
+       @test Base.get_preferences()["y"] == 2
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "PackageThatIsSub", "test"))
+       id_pkg = Base.identify_package("PackageThatIsSub")
+       @test isfile(Base.locate_package(id_pkg))
+       @test Base.identify_package(id_pkg, "Devved") === nothing
+       id_dev2 = Base.identify_package(id_pkg, "Devved2")
+       @test isfile(Base.locate_package(id_dev2))
+       id_mypkg = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id_mypkg))
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.get_preferences()["value"] == 3
+       @test Base.get_preferences()["x"] == 1
+       @test Base.get_preferences()["y"] == 2
+       @test Base.get_preferences()["z"] == 3
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "test"))
+       id_mypkg = Base.identify_package("MyPkg")
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.identify_package("Devved2") === nothing
+
+    finally
+       copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "project path handling" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "ProjectPath"))
+        id_project = Base.identify_package("ProjectPath")
+        Base.locate_package(id_project)
+        @test Base.locate_package(id_project) == joinpath(@__DIR__, "project", "ProjectPath", "CustomPath.jl")
+
+        id_dep = Base.identify_package("ProjectPathDep")
+        @test Base.locate_package(id_dep) == joinpath(@__DIR__, "project", "ProjectPath", "ProjectPathDep", "CustomPath.jl")
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "extension path computation name collision" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        empty!(LOAD_PATH)
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ExtNameCollision_A"))
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ExtNameCollision_B"))
+        ext_B = Base.PkgId(Base.uuid5(Base.identify_package("ExtNameCollision_B").uuid, "REPLExt"), "REPLExt")
+        @test Base.locate_package(ext_B) == joinpath(@__DIR__, "project",  "Extensions", "ExtNameCollision_B", "ext", "REPLExt.jl")
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
 end
diff --git a/test/math.jl b/test/math.jl
index f551bb3a5d4b7..c0a2d8bf8c9f8 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -47,6 +47,20 @@ has_fma = Dict(
         clamp!(x, 1, 3)
         @test x == [1.0, 1.0, 2.0, 3.0, 3.0]
     end
+
+    @test clamp(typemax(UInt64), Int64) === typemax(Int64)
+    @test clamp(typemin(Int), UInt64) === typemin(UInt64)
+    @test clamp(Int16(-1), UInt16) === UInt16(0)
+    @test clamp(-1, 2, UInt(0)) === UInt(2)
+    @test clamp(typemax(UInt16), Int16) === Int16(32767)
+
+    # clamp should not allocate a BigInt for typemax(Int16)
+    x = big(2) ^ 100
+    @test (@allocated clamp(x, Int16)) == 0
+
+    x = clamp(2.0, BigInt)
+    @test x isa BigInt
+    @test x == big(2)
 end
 
 @testset "constants" begin
@@ -871,14 +885,14 @@ end
 end
 
 @testset "isapprox" begin
-  # #22742: updated isapprox semantics
-  @test !isapprox(1.0, 1.0+1e-12, atol=1e-14)
-  @test isapprox(1.0, 1.0+0.5*sqrt(eps(1.0)))
-  @test !isapprox(1.0, 1.0+1.5*sqrt(eps(1.0)), atol=sqrt(eps(1.0)))
+    # #22742: updated isapprox semantics
+    @test !isapprox(1.0, 1.0+1e-12, atol=1e-14)
+    @test isapprox(1.0, 1.0+0.5*sqrt(eps(1.0)))
+    @test !isapprox(1.0, 1.0+1.5*sqrt(eps(1.0)), atol=sqrt(eps(1.0)))
 
-  # #13132: Use of `norm` kwarg for scalar arguments
-  @test isapprox(1, 1+1.0e-12, norm=abs)
-  @test !isapprox(1, 1+1.0e-12, norm=x->1)
+    # #13132: Use of `norm` kwarg for scalar arguments
+    @test isapprox(1, 1+1.0e-12, norm=abs)
+    @test !isapprox(1, 1+1.0e-12, norm=x->1)
 end
 
 # test AbstractFloat fallback pr22716
@@ -1098,7 +1112,7 @@ pcnfloat(x) = prevfloat(x), x, nextfloat(x)
 import Base.Math: COSH_SMALL_X, H_SMALL_X, H_MEDIUM_X, H_LARGE_X
 
 @testset "sinh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test sinh(zero(T)) === zero(T)
         @test sinh(-zero(T)) === -zero(T)
         @test sinh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1106,15 +1120,17 @@ import Base.Math: COSH_SMALL_X, H_SMALL_X, H_MEDIUM_X, H_LARGE_X
         @test sinh(T(1000)) === T(Inf)
         @test sinh(-T(1000)) === -T(Inf)
         @test isnan_type(T, sinh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
-            @test sinh(x) ≈ sinh(big(x)) rtol=eps(T)
-            @test sinh(-x) ≈ sinh(big(-x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
+                @test sinh(x) ≈ sinh(big(x)) rtol=eps(T)
+                @test sinh(-x) ≈ sinh(big(-x)) rtol=eps(T)
+            end
         end
     end
 end
 
 @testset "cosh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test cosh(zero(T)) === one(T)
         @test cosh(-zero(T)) === one(T)
         @test cosh(nextfloat(zero(T))) === one(T)
@@ -1122,15 +1138,17 @@ end
         @test cosh(T(1000)) === T(Inf)
         @test cosh(-T(1000)) === T(Inf)
         @test isnan_type(T, cosh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([COSH_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
-            @test cosh(x) ≈ cosh(big(x)) rtol=eps(T)
-            @test cosh(-x) ≈ cosh(big(-x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([COSH_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
+                @test cosh(x) ≈ cosh(big(x)) rtol=eps(T)
+                @test cosh(-x) ≈ cosh(big(-x)) rtol=eps(T)
+            end
         end
     end
 end
 
 @testset "tanh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test tanh(zero(T)) === zero(T)
         @test tanh(-zero(T)) === -zero(T)
         @test tanh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1138,9 +1156,11 @@ end
         @test tanh(T(1000)) === one(T)
         @test tanh(-T(1000)) === -one(T)
         @test isnan_type(T, tanh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
-            @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
-            @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
+                @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
+                @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
+            end
         end
     end
     @test tanh(18.0) ≈ tanh(big(18.0)) rtol=eps(Float64)
@@ -1148,7 +1168,7 @@ end
 end
 
 @testset "asinh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test asinh(zero(T)) === zero(T)
         @test asinh(-zero(T)) === -zero(T)
         @test asinh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1162,7 +1182,7 @@ end
 end
 
 @testset "acosh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test_throws DomainError acosh(T(0.1))
         @test acosh(one(T)) === zero(T)
         @test isnan_type(T, acosh(T(NaN)))
@@ -1173,7 +1193,7 @@ end
 end
 
 @testset "atanh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test_throws DomainError atanh(T(1.1))
         @test atanh(zero(T)) === zero(T)
         @test atanh(-zero(T)) === -zero(T)
@@ -1459,6 +1479,25 @@ end
     # two cases where we have observed > 1 ULP in the past
     @test 0.0013653274095082324^-97.60372292227069 == 4.088393948750035e279
     @test 8.758520413376658e-5^70.55863059215994 == 5.052076767078296e-287
+
+    # issue #53881
+    c53881 = 2.2844135865398217e222 # check correctness within 2 ULPs
+    @test prevfloat(1.0) ^ -Int64(2)^62 ≈ c53881 atol=2eps(c53881)
+    @test 2.0 ^ typemin(Int) == 0.0
+    @test (-1.0) ^ typemin(Int) == 1.0
+    Z = Int64(2)
+    E = prevfloat(1.0)
+    @test E ^ (-Z^54) ≈ 7.38905609893065
+    @test E ^ (-Z^62) ≈ 2.2844135865231613e222
+    @test E ^ (-Z^63) == Inf
+    @test abs(E ^ (Z^62-1) * E ^ (-Z^62+1) - 1) <= eps(1.0)
+    n, x = -1065564664, 0.9999997040311492
+    @test abs(x^n - Float64(big(x)^n)) / eps(x^n) == 0 # ULPs
+    @test E ^ (big(2)^100 + 1) == 0
+    @test E ^ 6705320061009595392 == nextfloat(0.0)
+    n = Int64(1024 / log2(E))
+    @test E^n == Inf
+    @test E^float(n) == Inf
 end
 
 # Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding.
@@ -1556,13 +1595,19 @@ end
         @testset let T = T
             for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
                         asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
-                @testset let f = f
-                    @test Base.infer_return_type(f, (T,)) != Union{}
-                    @test Core.Compiler.is_foldable(Base.infer_effects(f, (T,)))
+                @testset let f = f,
+                             rt = Base.infer_return_type(f, (T,)),
+                             effects = Base.infer_effects(f, (T,))
+                    @test rt != Union{}
+                    @test Core.Compiler.is_foldable(effects)
                 end
             end
-            @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,Int)))
-            @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,T)))
+            @testset let effects = Base.infer_effects(^, (T,Int))
+                @test Core.Compiler.is_foldable(effects)
+            end
+            @testset let effects = Base.infer_effects(^, (T,T))
+                @test Core.Compiler.is_foldable(effects)
+            end
         end
     end
 end;
diff --git a/test/misc.jl b/test/misc.jl
index e870c7f491c13..66b70956935cd 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -132,7 +132,7 @@ end
 # Lockable{T, L<:AbstractLock}
 using Base: Lockable
 let
-    @test_broken Base.isexported(Base, :Lockable)
+    @test Base.isexported(Base, :Lockable)
     lockable = Lockable(Dict("foo" => "hello"), ReentrantLock())
     # note field access is non-public
     @test lockable.value["foo"] == "hello"
@@ -159,6 +159,16 @@ let
     @test @lock(lockable2, lockable2[]["foo"]) == "hello"
 end
 
+@testset "`show` for ReentrantLock" begin
+    l = ReentrantLock()
+    @test repr(l) == "ReentrantLock()"
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+    @lock l begin
+        @test startswith(repr("text/plain", l), "ReentrantLock() (locked by current Task (")
+    end
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+end
+
 for l in (Threads.SpinLock(), ReentrantLock())
     @test get_finalizers_inhibited() == 0
     @test lock(get_finalizers_inhibited, l) == 1
@@ -251,12 +261,14 @@ let c = Ref(0),
     @test c[] == 100
 end
 
-@test_throws ErrorException("deadlock detected: cannot wait on current task") wait(current_task())
+@test_throws ConcurrencyViolationError("deadlock detected: cannot wait on current task") wait(current_task())
+
+@test_throws ConcurrencyViolationError("Cannot yield to currently running task!") yield(current_task())
 
 # issue #41347
 let t = @async 1
     wait(t)
-    @test_throws ErrorException yield(t)
+    @test_throws ConcurrencyViolationError yield(t)
 end
 
 let t = @async error(42)
@@ -319,25 +331,34 @@ v11801, t11801 = @timed sin(1)
 @test names(@__MODULE__, all = true) == names_before_timing
 
 redirect_stdout(devnull) do # suppress time prints
+
 # Accepted @time argument formats
 @test @time true
 @test @time "message" true
+@test @time 1 true
 let msg = "message"
     @test @time msg true
 end
 let foo() = "message"
     @test @time foo() true
 end
+let foo() = 1
+    @test @time foo() true
+end
 
 # Accepted @timev argument formats
 @test @timev true
 @test @timev "message" true
+@test @timev 1 true
 let msg = "message"
     @test @timev msg true
 end
 let foo() = "message"
     @test @timev foo() true
 end
+let foo() = 1
+    @test @timev foo() true
+end
 
 # @showtime
 @test @showtime true
@@ -571,6 +592,24 @@ end
 # issue #44780
 @test summarysize(BigInt(2)^1000) > summarysize(BigInt(2))
 
+# issue #53061
+mutable struct S53061
+    x::Union{Float64, Tuple{Float64, Float64}}
+    y::Union{Float64, Tuple{Float64, Float64}}
+end
+let s = S53061[S53061(rand(), (rand(),rand())) for _ in 1:10^4]
+    @test allequal(summarysize(s) for i in 1:10)
+end
+struct Z53061
+    x::S53061
+    y::Int64
+end
+let z = Z53061[Z53061(S53061(rand(), (rand(),rand())), 0) for _ in 1:10^4]
+    @test allequal(summarysize(z) for i in 1:10)
+    # broken on i868 linux. issue #54895
+    @test abs(summarysize(z) - 640000)/640000 <= 0.01 broken = Sys.WORD_SIZE == 32 && Sys.islinux()
+end
+
 ## test conversion from UTF-8 to UTF-16 (for Windows APIs)
 
 # empty arrays
@@ -1522,9 +1561,29 @@ end
 @testset "Base docstrings" begin
     undoc = Docs.undocumented_names(Base)
     @test_broken isempty(undoc)
-    @test undoc == [:BufferStream, :CanonicalIndexError, :CapturedException, :Filesystem, :IOServer, :InvalidStateException, :Order, :PipeEndpoint, :Sort, :TTY]
+    @test undoc == [:BufferStream, :CanonicalIndexError, :CapturedException, :Filesystem, :IOServer, :InvalidStateException, :Order, :PipeEndpoint, :ScopedValues, :Sort, :TTY]
 end
 
 @testset "Base.Libc docstrings" begin
     @test isempty(Docs.undocumented_names(Libc))
 end
+
+@testset "Silenced missed transformations" begin
+    # Ensure the WarnMissedTransformationsPass is not on by default
+    src = """
+        @noinline iteration(i) = (@show(i); return nothing)
+        @eval function loop_unroll_full_fail(N)
+            for i in 1:N
+              iteration(i)
+              \$(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"), 1)))
+          end
+       end
+       loop_unroll_full_fail(3)
+    """
+    out_err = mktemp() do _, f
+        run(`$(Base.julia_cmd()) -e "$src"`, devnull, devnull, f)
+        seekstart(f)
+        read(f, String)
+    end
+    @test !occursin("loop not unrolled", out_err)
+end
diff --git a/test/mpfr.jl b/test/mpfr.jl
index a0dd15d97f70c..9a9698ba72c2c 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -1046,3 +1046,45 @@ end
         @test Float16(bf) == Float16(2.0e-7)
     end
 end
+
+# PR #54284
+import Base.MPFR: clear_flags, had_underflow, had_overflow, had_divbyzero,
+    had_nan, had_inexact_exception, had_range_exception
+
+function all_flags_54284()
+    (
+        had_underflow(),
+        had_overflow(),
+        had_divbyzero(),
+        had_nan(),
+        had_inexact_exception(),
+        had_range_exception(),
+    )
+end
+@testset "MPFR flags" begin
+    let x, a = floatmin(BigFloat), b = floatmax(BigFloat), c = zero(BigFloat)
+        clear_flags()
+        @test !any(all_flags_54284())
+
+        x = a - a # normal
+        @test all_flags_54284() == (false, false, false, false, false, false)
+        x = 1 / c # had_divbyzero
+        @test all_flags_54284() == (false, false, true, false, false, false)
+        clear_flags()
+        x = nextfloat(a) - a # underflow
+        @test all_flags_54284() == (true, false, false, false, true, false)
+        clear_flags()
+        x = 1 / a # overflow
+        @test all_flags_54284() == (false, true, false, false, true, false)
+        clear_flags()
+        x = c / c # nan
+        @test all_flags_54284() == (false, false, false, true, false, false)
+        clear_flags()
+        x = prevfloat(BigFloat(1.0)) * 100 # inexact
+        @test all_flags_54284() == (false, false, false, false, true, false)
+        clear_flags()
+        try convert(Int, b); catch; end # range exception
+        @test all_flags_54284() == (false, false, false, false, false, true)
+        clear_flags()
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index 9c16d24b0c575..b8dba5c06422e 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base: delete
+
 @test_throws TypeError NamedTuple{1,Tuple{}}
 @test_throws TypeError NamedTuple{(),1}
 @test_throws TypeError NamedTuple{(:a,1),Tuple{Int}}
@@ -28,13 +30,13 @@
 @test (x=4, y=5, z=6)[()] == NamedTuple()
 @test (x=4, y=5, z=6)[:] == (x=4, y=5, z=6)
 @test NamedTuple()[()] == NamedTuple()
-@test_throws ErrorException (x=4, y=5, z=6).a
+@test_throws FieldError (x=4, y=5, z=6).a
 @test_throws BoundsError (a=2,)[0]
 @test_throws BoundsError (a=2,)[2]
-@test_throws ErrorException (x=4, y=5, z=6)[(:a,)]
-@test_throws ErrorException (x=4, y=5, z=6)[(:x, :a)]
-@test_throws ErrorException (x=4, y=5, z=6)[[:a]]
-@test_throws ErrorException (x=4, y=5, z=6)[[:x, :a]]
+@test_throws FieldError (x=4, y=5, z=6)[(:a,)]
+@test_throws FieldError (x=4, y=5, z=6)[(:x, :a)]
+@test_throws FieldError (x=4, y=5, z=6)[[:a]]
+@test_throws FieldError (x=4, y=5, z=6)[[:x, :a]]
 @test_throws ErrorException (x=4, y=5, z=6)[(:x, :x)]
 
 @test length(NamedTuple()) == 0
@@ -255,7 +257,7 @@ function abstr_nt_22194_2()
     a = NamedTuple[(a=1,), (b=2,)]
     return a[1].b
 end
-@test_throws ErrorException abstr_nt_22194_2()
+@test_throws FieldError abstr_nt_22194_2()
 @test Base.return_types(abstr_nt_22194_2, ()) == Any[Any]
 
 mutable struct HasAbstractNamedTuples
@@ -282,6 +284,11 @@ end
 abstr_nt_22194_3()
 @test Base.return_types(abstr_nt_22194_3, ()) == Any[Any]
 
+@test delete((a=1,), :a) == NamedTuple()
+@test delete((a=1, b=2), :a) == (b=2,)
+@test delete((a=1, b=2, c=3), :b) == (a=1, c=3)
+@test delete((a=1, b=2, c=3), :z) == (a=1, b=2, c=3)
+
 @test Base.structdiff((a=1, b=2), (b=3,)) == (a=1,)
 @test Base.structdiff((a=1, b=2, z=20), (b=3,)) == (a=1, z=20)
 @test Base.structdiff((a=1, b=2, z=20), (b=3, q=20, z=1)) == (a=1,)
@@ -398,14 +405,14 @@ for f in (Base.merge, Base.structdiff)
         fallback_func(a::NamedTuple, b::NamedTuple) = @invoke f(a::NamedTuple, b::NamedTuple)
         @testset let eff = Base.infer_effects(fallback_func)
             @test Core.Compiler.is_foldable(eff)
-            @test eff.nonoverlayed
+            @test Core.Compiler.is_nonoverlayed(eff)
         end
         @test only(Base.return_types(fallback_func)) == NamedTuple
         # test if `max_methods = 4` setting works as expected
         general_func(a::NamedTuple, b::NamedTuple) = f(a, b)
         @testset let eff = Base.infer_effects(general_func)
             @test Core.Compiler.is_foldable(eff)
-            @test eff.nonoverlayed
+            @test Core.Compiler.is_nonoverlayed(eff)
         end
         @test only(Base.return_types(general_func)) == NamedTuple
     end
@@ -437,3 +444,18 @@ let c = (a=1, b=2),
     d = (b=3, c=(d=1,))
     @test @inferred(mergewith51009((x,y)->y, c, d)) === (a = 1, b = 3, c = (d = 1,))
 end
+
+@test_throws ErrorException NamedTuple{(), Union{}}
+for NT in (NamedTuple{(:a, :b), Union{}}, NamedTuple{(:a, :b), T} where T<:Union{})
+    @test fieldtype(NT, 1) == Union{}
+    @test fieldtype(NT, :b) == Union{}
+    @test_throws FieldError fieldtype(NT, :c)
+    @test_throws BoundsError fieldtype(NT, 0)
+    @test_throws BoundsError fieldtype(NT, 3)
+    @test Base.return_types((Type{NT},)) do NT; fieldtype(NT, :a); end == Any[Type{Union{}}]
+    @test fieldtype(NamedTuple{<:Any, Union{}}, 1) == Union{}
+end
+let NT = NamedTuple{<:Any, Union{}}
+    @test fieldtype(NT, 100) == Union{}
+    @test only(Base.return_types((Type{NT},)) do NT; fieldtype(NT, 100); end) >: Type{Union{}}
+end
diff --git a/test/numbers.jl b/test/numbers.jl
index 47d7e87753b11..fc3dc2c06bb7c 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -262,7 +262,7 @@ end
 
 # GMP allocation overflow should not cause crash
 if Base.GMP.ALLOC_OVERFLOW_FUNCTION[] && sizeof(Int) > 4
-  @test_throws OutOfMemoryError BigInt(2)^(typemax(Culong))
+    @test_throws OutOfMemoryError BigInt(2)^(typemax(Culong))
 end
 
 # exponentiating with a negative base
@@ -1114,10 +1114,30 @@ end
 end
 
 @testset "Irrational zero and one" begin
-    @test one(pi) === true
-    @test zero(pi) === false
-    @test one(typeof(pi)) === true
-    @test zero(typeof(pi)) === false
+    for i in (π, ℯ, γ, catalan)
+        @test one(i) === true
+        @test zero(i) === false
+        @test one(typeof(i)) === true
+        @test zero(typeof(i)) === false
+    end
+end
+
+@testset "Irrational iszero, isfinite, isinteger, and isone" begin
+    for i in (π, ℯ, γ, catalan)
+        @test !iszero(i)
+        @test !isone(i)
+        @test !isinteger(i)
+        @test isfinite(i)
+    end
+end
+
+@testset "Irrational promote_type" begin
+    for T in (Float16, Float32, Float64)
+        for i in (π, ℯ, γ, catalan)
+            @test T(2.0) * i ≈ T(2.0) * T(i)
+            @test T(2.0) * i isa T
+        end
+    end
 end
 
 @testset "Irrationals compared with Irrationals" begin
@@ -1138,6 +1158,8 @@ end
 end
 
 @testset "Irrationals compared with Rationals and Floats" begin
+    @test pi != Float64(pi)
+    @test Float64(pi) != pi
     @test Float64(pi,RoundDown) < pi
     @test Float64(pi,RoundUp) > pi
     @test !(Float64(pi,RoundDown) > pi)
@@ -1156,6 +1178,7 @@ end
     @test nextfloat(big(pi)) > pi
     @test !(prevfloat(big(pi)) > pi)
     @test !(nextfloat(big(pi)) < pi)
+    @test big(typeof(pi)) == BigFloat
 
     @test 2646693125139304345//842468587426513207 < pi
     @test !(2646693125139304345//842468587426513207 > pi)
@@ -2776,6 +2799,20 @@ Base.literal_pow(::typeof(^), ::PR20530, ::Val{p}) where {p} = 2
     @test [2,4,8].^-2 == [0.25, 0.0625, 0.015625]
     @test [2, 4, 8].^-2 .* 4 == [1.0, 0.25, 0.0625] # nested literal_pow
     @test ℯ^-2 == exp(-2) ≈ inv(ℯ^2) ≈ (ℯ^-1)^2 ≈ sqrt(ℯ^-4)
+
+    if Int === Int32
+        p = 2147483647
+        @test x^p == 1
+        @test x^2147483647 == 2
+        @test (@fastmath x^p) == 1
+        @test (@fastmath x^2147483647) == 2
+    elseif Int === Int64
+        p = 9223372036854775807
+        @test x^p == 1
+        @test x^9223372036854775807 == 2
+        @test (@fastmath x^p) == 1
+        @test (@fastmath x^9223372036854775807) == 2
+    end
 end
 module M20889 # do we get the expected behavior without importing Base.^?
     using Test
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 9d6a8b08c0b1f..fb5855dfbaa0d 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -383,6 +383,18 @@ v2 = copy(v)
 @test v2[end-1] == 2
 @test v2[end] == 1
 
+# push!(v::AbstractVector, x...)
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 3)) === v2
+@test v2[axes(v,1)] == v
+@test v2[end] == 3
+@test v2[begin] == v[begin] == v[-2]
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 5, 6)) == v2
+@test v2[axes(v,1)] == v
+@test v2[end-1] == 5
+@test v2[end] == 6
+
 # append! from array
 v2 = copy(v)
 @test append!(v2, [2, 1]) === v2
@@ -399,6 +411,23 @@ v2 = copy(v)
 @test v2[axes(v, 1)] == v
 @test v2[lastindex(v)+1:end] == [2, 1]
 
+# append!(::AbstractVector, ...)
+# append! from array
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, [2, 1]::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from HasLength iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1])::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from SizeUnknown iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1] if true)::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+
 # other functions
 v = OffsetArray(v0, (-3,))
 @test lastindex(v) == 1
@@ -865,7 +894,23 @@ end
     @test CartesianIndices(A) == CartesianIndices(B)
 end
 
+@testset "overflowing show" begin
+    A = OffsetArray(repeat([1], 1), typemax(Int)-1)
+    b = IOBuffer(maxsize=10)
+    show(b, A)
+    @test String(take!(b)) == "[1]"
+    show(b, (A, A))
+    @test String(take!(b)) == "([1], [1])"
+end
+
 @testset "indexing views (#53249)" begin
     v = view([1,2,3,4], :)
     @test v[Base.IdentityUnitRange(2:3)] == OffsetArray(2:3, 2:3)
 end
+
+@testset "mapreduce with OffsetRanges" begin
+    r = 5:100
+    a = OffsetArray(r, 2)
+    b = sum(a, dims=1)
+    @test b[begin] == sum(r)
+end
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index e9f1601019a59..79c456de127bd 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -10,7 +10,7 @@ const lno = LineNumberNode(1, :none)
 
 let ci = @code_lowered const_int()
     @eval function oc_trivial()
-        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
@@ -19,7 +19,7 @@ end
 
 let ci = @code_lowered const_int()
     @eval function oc_simple_inf()
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
@@ -33,7 +33,7 @@ end
 (a::OcClos2Int)() = getfield(a, 1) + getfield(a, 2)
 let ci = @code_lowered OcClos2Int(1, 2)();
     @eval function oc_trivial_clos()
-        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             1, 2))
     end
@@ -42,7 +42,7 @@ end
 
 let ci = @code_lowered OcClos2Int(1, 2)();
     @eval function oc_self_call_clos()
-        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             1, 2))()
     end
@@ -59,7 +59,7 @@ end
 (a::OcClos1Any)() = getfield(a, 1)
 let ci = @code_lowered OcClos1Any(1)()
     @eval function oc_pass_clos(x)
-        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :x))
     end
@@ -69,7 +69,7 @@ end
 
 let ci = @code_lowered OcClos1Any(1)()
     @eval function oc_infer_pass_clos(x)
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :x))
     end
@@ -81,7 +81,7 @@ end
 
 let ci = @code_lowered identity(1)
     @eval function oc_infer_pass_id()
-        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 1, false, lno, ci)))
     end
 end
@@ -103,7 +103,7 @@ end
 
 let ci = @code_lowered OcOpt([1 2])()
     @eval function oc_opt_ndims(A)
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :A))
     end
@@ -151,26 +151,33 @@ end # module test_world_age
 
 function maybe_vararg(isva::Bool)
     T = isva ? Vararg{Int} : Int
-    @opaque Tuple{T} (x...)->x
+    @opaque Tuple{T}->_ (x...)->x
 end
 @test maybe_vararg(false)(1) == (1,)
 @test_throws MethodError maybe_vararg(false)(1,2,3)
 @test maybe_vararg(true)(1) == (1,)
 @test maybe_vararg(true)(1,2,3) == (1,2,3)
-@test (@opaque Tuple{Int, Int} (a, b, x...)->x)(1,2) === ()
-@test (@opaque Tuple{Int, Int} (a, x...)->x)(1,2) === (2,)
-@test (@opaque Tuple{Int, Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque Tuple{Int, Int}->_ (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int}->Tuple{} (a, b, x...)->x)(1,2) === ()
+@test (@opaque _->Tuple{Vararg{Int}} (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int}->_ (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Int}->Tuple{Int} (a, x...)->x)(1,2) === (2,)
+@test (@opaque _->Tuple{Vararg{Int}} (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Vararg{Int}}->_ (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque Tuple{Int, Vararg{Int}}->Tuple{Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
 @test (@opaque (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+@test (@opaque _->Tuple{Vararg{Int}} (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+@test (@opaque _->_ (a::Int, x::Int...)->x)(1,2,3) === (2,3)
 
-@test_throws ErrorException (@opaque Tuple{Vararg{Int}} x->x)
-@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}} x->x)
-@test_throws ErrorException (@opaque Tuple{Int, Int} x->x)
-@test_throws ErrorException (@opaque Tuple{Any} (x,y)->x)
-@test_throws ErrorException (@opaque Tuple{Vararg{Int}} (x,y...)->x)
-@test_throws ErrorException (@opaque Tuple{Int} (x,y,z...)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Int}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Any}->_ (x,y)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}}->_ (x,y...)->x)
+@test_throws ErrorException (@opaque Tuple{Int}->_ (x,y,z...)->x)
 
 # cannot specify types both on arguments and separately
-@test_throws ErrorException @eval @opaque Tuple{Any} (x::Int)->x
+@test_throws ErrorException @eval @opaque Tuple{Any}->_ (x::Int)->x
 
 # Vargarg in complied mode
 mk_va_opaque() = @opaque (x...)->x
@@ -178,17 +185,19 @@ mk_va_opaque() = @opaque (x...)->x
 @test mk_va_opaque()(1,2) == (1,2)
 
 # OpaqueClosure show method
-@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)::Any->◌"
+@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)->◌::Any"
 
 # Opaque closure in CodeInfo returned from generated functions
 let ci = @code_lowered const_int()
     global function mk_ocg(world::UInt, source, args...)
         @nospecialize
-        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
         cig.slotnames = Symbol[Symbol("#self#")]
         cig.slottypes = Any[Any]
         cig.slotflags = UInt8[0x00]
+        cig.nargs = 1
+        cig.isva = false
         return cig
     end
 end
@@ -239,12 +248,21 @@ let foo::Int = 42
 end
 
 let oc = @opaque a->sin(a)
-    @test length(code_typed(oc, (Int,))) == 1
+    let opt = code_typed(oc, (Int,))
+        @test length(opt) == 1
+        @test opt[1][2] === Float64
+    end
+    let unopt = code_typed(oc, (Int,); optimize=false)
+        @test length(unopt) == 1
+    end
 end
 
 # constructing an opaque closure from IRCode
 let src = first(only(code_typed(+, (Int, Int))))
     ir = Core.Compiler.inflate_ir(src, Core.Compiler.VarState[], src.slottypes)
+    ir.argtypes[1] = Tuple{}
+    @test ir.debuginfo.def === nothing
+    ir.debuginfo.def = Symbol(@__FILE__)
     @test OpaqueClosure(src; sig=Tuple{Int, Int}, rettype=Int, nargs=2)(40, 2) == 42
     oc = OpaqueClosure(ir)
     @test oc(40, 2) == 42
@@ -253,9 +271,12 @@ let src = first(only(code_typed(+, (Int, Int))))
     @test OpaqueClosure(ir)(40, 2) == 42 # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
 let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
     @test OpaqueClosure(ir)(42) == sin(42)
     @test OpaqueClosure(ir)(42) == sin(42) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+    @test length(code_typed(OpaqueClosure(ir))) == 1
     ir = first(only(Base.code_ircode(sin, (Float64,))))
+    ir.argtypes[1] = Tuple{}
     @test OpaqueClosure(ir)(42.) == sin(42.)
     @test OpaqueClosure(ir)(42.) == sin(42.) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
@@ -264,11 +285,14 @@ end
 let src = code_typed((Int,Int)) do x, y...
         return (x, y)
     end |> only |> first
+    src.slottypes[1] = Tuple{}
     let oc = OpaqueClosure(src; rettype=Tuple{Int, Tuple{Int}}, sig=Tuple{Int, Int}, nargs=2, isva=true)
         @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
     end
     ir = Core.Compiler.inflate_ir(src, Core.Compiler.VarState[], src.slottypes)
+    @test ir.debuginfo.def === nothing
+    ir.debuginfo.def = Symbol(@__FILE__)
     let oc = OpaqueClosure(ir; isva=true)
         @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
@@ -281,7 +305,7 @@ eval_oc_spec(oc) = oc()
 for f in (const_int, const_int_barrier)
     ci = code_lowered(f, Tuple{})[1]
     for compiled in (true, false)
-        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64,
+        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
         oc_mismatch = let ci = code_lowered(f, Tuple{})[1]
             if compiled
@@ -302,6 +326,7 @@ f_oc_throws() = error("oops")
 @noinline function make_oc_and_collect_bt()
     did_gc = Ref{Bool}(false)
     bt = let ir = first(only(Base.code_ircode(f_oc_throws, ())))
+        ir.argtypes[1] = Tuple
         sentinel = Ref{Any}(nothing)
         oc = OpaqueClosure(ir, sentinel)
         finalizer(sentinel) do x
@@ -338,6 +363,7 @@ ccall_op_arg_restrict2_bad_args() = op_arg_restrict2((1.,), 2)
 let ir = Base.code_ircode((Int,Int)) do x, y
         @noinline x * y
     end |> only |> first
+    ir.argtypes[1] = Tuple{}
     oc = Core.OpaqueClosure(ir)
     io = IOBuffer()
     code_llvm(io, oc, Tuple{Int,Int})
@@ -349,3 +375,15 @@ end
 foopaque() = Base.Experimental.@opaque(@noinline x::Int->println(x))(1)
 
 code_llvm(devnull,foopaque,()) #shouldn't crash
+
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test (Base.show_method(IOBuffer(), oc.source::Method); true)
+end
+
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir; do_compile=false)
+    @test oc(1) == sin(1)
+end
diff --git a/test/operators.jl b/test/operators.jl
index 95006235692a0..d97db15def80f 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -328,9 +328,21 @@ end
     @test lt5(4) && !lt5(5)
 end
 
+@testset "in tuples" begin
+    @test ∈(5, (1,5,10,11))
+    @test ∉(0, (1,5,10,11))
+    @test ∈(5, (1,"hi","hey",5.0))
+    @test ∉(0, (1,"hi","hey",5.0))
+    @test ∈(5, (5,))
+    @test ∉(0, (5,))
+    @test ∉(5, ())
+end
+
 @testset "ni" begin
     @test ∋([1,5,10,11], 5)
     @test !∋([1,10,11], 5)
+    @test ∋((1,5,10,11), 5)
+    @test ∌((1,10,11), 5)
     @test ∋(5)([5,1])
     @test !∋(42)([0,1,100])
     @test ∌(0)(1:10)
@@ -367,7 +379,8 @@ end
     @test B46327() <= B46327()
 end
 
-@testset "concrete eval `x in itr::Tuple`" begin
+@testset "inference for `x in itr::Tuple`" begin
+    # concrete evaluation
     @test Core.Compiler.is_foldable(Base.infer_effects(in, (Int,Tuple{Int,Int,Int})))
     @test Core.Compiler.is_foldable(Base.infer_effects(in, (Char,Tuple{Char,Char,Char})))
     for i = (1,2,3)
@@ -377,10 +390,23 @@ end
             end |> only == Val{true}
         end
     end
-    @test Base.return_types() do
+    @test Base.infer_return_type() do
         Val(4 in (1,2,3))
-    end |> only == Val{false}
-    @test Base.return_types() do
+    end == Val{false}
+    @test Base.infer_return_type() do
         Val('1' in ('1','2','3'))
-    end |> only == Val{true}
+    end == Val{true}
+
+    # constant propagation
+    @test Base.infer_return_type((Int,Int)) do x, y
+        Val(1 in (x,2,y))
+    end >: Val{true}
+    @test Base.infer_return_type((Int,Int)) do x, y
+        Val(2 in (x,2,y))
+    end == Val{true}
+
+    # should use the loop implementation given large tuples to avoid inference blowup
+    let t = ntuple(x->'A', 10000);
+        @test Base.infer_return_type(in, (Char,typeof(t))) == Bool
+    end
 end
diff --git a/test/precompile.jl b/test/precompile.jl
index c06145cba8416..bc738e557bb51 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -1,12 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-original_depot_path = copy(Base.DEPOT_PATH)
-original_load_path = copy(Base.LOAD_PATH)
-
 using Test, Distributed, Random, Logging
 using REPL # doc lookup function
 
+include("precompile_utils.jl")
+
 Foo_module = :Foo4b3a94a1a081a8cb
+foo_incl_dep = :foo4b3a94a1a081a8cb
+bar_incl_dep = :bar4b3a94a1a081a8cb
 Foo2_module = :F2oo4b3a94a1a081a8cb
 FooBase_module = :FooBase4b3a94a1a081a8cb
 @eval module ConflictingBindings
@@ -16,35 +17,9 @@ FooBase_module = :FooBase4b3a94a1a081a8cb
 end
 using .ConflictingBindings
 
-function precompile_test_harness(@nospecialize(f), testset::String)
-    @testset "$testset" begin
-        precompile_test_harness(f, true)
-    end
-end
-function precompile_test_harness(@nospecialize(f), separate::Bool)
-    load_path = mktempdir()
-    load_cache_path = separate ? mktempdir() : load_path
-    try
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-        f(load_path)
-    finally
-        try
-            rm(load_path, force=true, recursive=true)
-        catch err
-            @show err
-        end
-        if separate
-            try
-                rm(load_cache_path, force=true, recursive=true)
-            catch err
-                @show err
-            end
-        end
-        filter!((≠)(load_path), LOAD_PATH)
-        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
-    end
-    nothing
+@testset "object_build_id" begin
+    @test Base.object_build_id([1]) === nothing
+    @test Base.object_build_id(Base) == Base.module_build_id(Base)
 end
 
 # method root provenance
@@ -102,6 +77,8 @@ precompile_test_harness(false) do dir
     Foo_file = joinpath(dir, "$Foo_module.jl")
     Foo2_file = joinpath(dir, "$Foo2_module.jl")
     FooBase_file = joinpath(dir, "$FooBase_module.jl")
+    foo_file = joinpath(dir, "$foo_incl_dep.jl")
+    bar_file = joinpath(dir, "$bar_incl_dep.jl")
 
     write(FooBase_file,
           """
@@ -150,11 +127,11 @@ precompile_test_harness(false) do dir
 
               # test that docs get reconnected
               @doc "foo function" foo(x) = x + 1
-              include_dependency("foo.jl")
-              include_dependency("foo.jl")
+              include_dependency("$foo_incl_dep.jl")
+              include_dependency("$foo_incl_dep.jl")
               module Bar
                   public bar
-                  include_dependency("bar.jl")
+                  include_dependency("$bar_incl_dep.jl")
               end
               @doc "Bar module" Bar # this needs to define the META dictionary via eval
               @eval Bar @doc "bar function" bar(x) = x + 2
@@ -245,9 +222,9 @@ precompile_test_harness(false) do dir
               gnc() = overridenc(1.0)
               Test.@test 1 < gnc() < 5 # compile this
 
-              const abigfloat_f() = big"12.34"
+              abigfloat_f() = big"12.34"
               const abigfloat_x = big"43.21"
-              const abigint_f() = big"123"
+              abigint_f() = big"123"
               const abigint_x = big"124"
 
               # issue #51111
@@ -297,6 +274,8 @@ precompile_test_harness(false) do dir
               oid_mat_int = objectid(a_mat_int)
           end
           """)
+    # Issue #52063
+    touch(foo_file); touch(bar_file)
     # Issue #12623
     @test __precompile__(false) === nothing
 
@@ -382,6 +361,9 @@ precompile_test_harness(false) do dir
         @test objectid(Foo.a_vec_int) === Foo.oid_vec_int
         @test objectid(Foo.a_mat_int) === Foo.oid_mat_int
         @test Foo.oid_vec_int !== Foo.oid_mat_int
+        @test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int)
+        @test Base.object_build_id(Foo) == Base.module_build_id(Foo)
+        @test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo)
     end
 
     @eval begin function ccallable_test()
@@ -436,8 +418,7 @@ precompile_test_harness(false) do dir
         modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
         discard_module = mod_fl_mt -> mod_fl_mt.filename
         @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ]
-        # foo.jl and bar.jl are never written to disk, so they are not relocatable
-        @test map(x -> x.filename, deps) == [ Foo_file, joinpath("@depot", "foo.jl"), joinpath("@depot", "bar.jl") ]
+        @test map(x -> x.filename, deps) == [ Foo_file, joinpath("@depot", foo_file), joinpath("@depot", bar_file) ]
         @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)),
                             Base.PkgId(Foo) => Base.PkgId(Foo2),
                             Base.PkgId(Foo) => Base.PkgId(Test),
@@ -446,7 +427,7 @@ precompile_test_harness(false) do dir
         @test !isempty(srctxt) && srctxt == read(Foo_file, String)
         @test_throws ErrorException Base.read_dependency_src(cachefile, "/tmp/nonexistent.txt")
         # dependencies declared with `include_dependency` should not be stored
-        @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl"))
+        @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, foo_file))
 
         modules, deps1 = Base.cache_dependencies(cachefile)
         modules_ok = merge(
@@ -469,7 +450,9 @@ precompile_test_harness(false) do dir
             # and their dependencies
             Dict(Base.PkgId(Base.root_module(Base, :SHA)) => Base.module_build_id(Base.root_module(Base, :SHA))),
             Dict(Base.PkgId(Base.root_module(Base, :Markdown)) => Base.module_build_id(Base.root_module(Base, :Markdown))),
+            Dict(Base.PkgId(Base.root_module(Base, :JuliaSyntaxHighlighting)) => Base.module_build_id(Base.root_module(Base, :JuliaSyntaxHighlighting))),
             Dict(Base.PkgId(Base.root_module(Base, :StyledStrings)) => Base.module_build_id(Base.root_module(Base, :StyledStrings))),
+
             # and their dependencies
             Dict(Base.PkgId(Base.root_module(Base, :Base64)) => Base.module_build_id(Base.root_module(Base, :Base64))),
         )
@@ -614,6 +597,10 @@ precompile_test_harness(false) do dir
     @test Base.invokelatest(Baz.baz) === 1
     @test Baz === UseBaz.Baz
 
+    # should not throw if the cachefile does not exist
+    @test !isfile("DoesNotExist.ji")
+    @test Base.stale_cachefile("", "DoesNotExist.ji") === true
+
     # Issue #12720
     FooBar1_file = joinpath(dir, "FooBar1.jl")
     write(FooBar1_file,
@@ -634,13 +621,13 @@ precompile_test_harness(false) do dir
     empty_prefs_hash = Base.get_preferences_hash(nothing, String[])
     @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash)
     @test isfile(joinpath(cachedir, "FooBar.ji"))
-    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String} : Tuple{<:Vector, Nothing}
+    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String, UInt128} : Tuple{<:Vector, Nothing, UInt128}
     @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @test !isdefined(Main, :FooBar)
     @test !isdefined(Main, :FooBar1)
 
     relFooBar_file = joinpath(dir, "subfolder", "..", "FooBar.jl")
-    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String} : Bool) # `..` is not a symlink on Windows
+    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String, UInt128} : Bool) # `..` is not a symlink on Windows
     mkdir(joinpath(dir, "subfolder"))
     @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
 
@@ -673,7 +660,7 @@ precompile_test_harness(false) do dir
           error("break me")
           end
           """)
-    @test_warn r"LoadError: break me\nStacktrace:\n \[1\] [\e01m\[]*error" try
+    @test_warn r"LoadError: break me\nStacktrace:\n[ ]*\[1\] [\e01m\[]*error" try
             Base.require(Main, :FooBar2)
             error("the \"break me\" test failed")
         catch exc
@@ -806,7 +793,7 @@ precompile_test_harness("code caching") do dir
     mi = minternal.specializations::Core.MethodInstance
     @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
     ci = mi.cache
-    @test ci.relocatability == 1
+    @test ci.relocatability == 0
     @test ci.inferred !== nothing
     # ...and that we can add "untracked" roots & non-relocatable CodeInstances to them too
     Base.invokelatest() do
@@ -1045,7 +1032,6 @@ precompile_test_harness("code caching") do dir
         @test mi.specTypes.parameters[end] === Integer ? !hv : hv
     end
 
-    setglobal!(Main, :inval, invalidations)
     idxs = findall(==("verify_methods"), invalidations)
     idxsbits = filter(idxs) do i
         mi = invalidations[i-1]
@@ -1355,6 +1341,25 @@ precompile_test_harness("package_callbacks") do dir
     finally
         pop!(Base.package_callbacks)
     end
+    Test5_module = :Teste4095a85
+    write(joinpath(dir, "$(Test5_module).jl"),
+    """
+    module $(Test5_module)
+    end
+    """)
+    Base.compilecache(Base.PkgId("$(Test5_module)"))
+    cnt = 0
+    push!(Base.package_callbacks, _->(cnt += 1))
+    try
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @test cnt == 1
+    finally
+        pop!(Base.package_callbacks)
+    end
 end
 
 # Issue #19960
@@ -1523,6 +1528,7 @@ precompile_test_harness("Issue #26028") do load_path
         module Foo26028
         module Bar26028
             x = 0
+            y = 0
         end
         function __init__()
             include(joinpath(@__DIR__, "Baz26028.jl"))
@@ -1532,7 +1538,10 @@ precompile_test_harness("Issue #26028") do load_path
     write(joinpath(load_path, "Baz26028.jl"),
         """
         module Baz26028
-        import Foo26028.Bar26028.x
+        using Test
+        @test_throws(ConcurrencyViolationError("deadlock detected in loading Foo26028 -> Foo26028"),
+                     @eval import Foo26028.Bar26028.x)
+        import ..Foo26028.Bar26028.y
         end
         """)
     Base.compilecache(Base.PkgId("Foo26028"))
@@ -1702,7 +1711,8 @@ precompile_test_harness("issue #46296") do load_path
 
         mi = first(Base.specializations(first(methods(identity))))
         ci = Core.CodeInstance(mi, nothing, Any, Any, nothing, nothing, zero(Int32), typemin(UInt),
-                               typemax(UInt), zero(UInt32), zero(UInt32), nothing, 0x00)
+                               typemax(UInt), zero(UInt32), nothing, 0x00,
+                               Core.DebugInfo(mi))
 
         __init__() = @assert ci isa Core.CodeInstance
 
@@ -1712,150 +1722,10 @@ precompile_test_harness("issue #46296") do load_path
     (@eval (using CodeInstancePrecompile))
 end
 
-let newinterp_path = abspath("compiler/newinterp.jl")
-    precompile_test_harness("AbstractInterpreter caching") do load_path
-        write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
-            basic_callee(x) = x
-            basic_caller(x) = basic_callee(x)
-        end) |> string)
-
-        write(joinpath(load_path, "CustomAbstractInterpreterCaching.jl"), :(module CustomAbstractInterpreterCaching
-            import SimpleModule: basic_caller, basic_callee
-
-            module Custom
-                include("$($newinterp_path)")
-                @newinterp PrecompileInterpreter
-            end
-
-            Base.return_types((Float64,)) do x
-                basic_caller(x)
-            end
-            Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
-                basic_caller(x)
-            end
-            Base.return_types((Vector{Float64},)) do x
-                sum(x)
-            end
-            Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
-                sum(x)
-            end
-        end) |> string)
-        Base.compilecache(Base.PkgId("CustomAbstractInterpreterCaching"))
-        @eval let
-            using CustomAbstractInterpreterCaching
-            cache_owner = Core.Compiler.cache_owner(
-                CustomAbstractInterpreterCaching.Custom.PrecompileInterpreter())
-            let m = only(methods(CustomAbstractInterpreterCaching.basic_callee))
-                mi = only(Base.specializations(m))
-                ci = mi.cache
-                @test isdefined(ci, :next)
-                @test ci.owner === nothing
-                @test ci.max_world == typemax(UInt)
-                ci = ci.next
-                @test !isdefined(ci, :next)
-                @test ci.owner === cache_owner
-                @test ci.max_world == typemax(UInt)
-            end
-            let m = only(methods(sum, (Vector{Float64},)))
-                found = false
-                for mi in Base.specializations(m)
-                    if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
-                        ci = mi.cache
-                        @test isdefined(ci, :next)
-                        @test ci.owner === cache_owner
-                        @test ci.max_world == typemax(UInt)
-                        ci = ci.next
-                        @test !isdefined(ci, :next)
-                        @test ci.owner === nothing
-                        @test ci.max_world == typemax(UInt)
-                        found = true
-                        break
-                    end
-                end
-                @test found
-            end
-        end
-
-        write(joinpath(load_path, "CustomAbstractInterpreterCaching2.jl"), :(module CustomAbstractInterpreterCaching2
-            import SimpleModule: basic_caller, basic_callee
-
-            module Custom
-                const CC = Core.Compiler
-                include("$($newinterp_path)")
-                @newinterp PrecompileInterpreter
-                struct CustomData
-                    inferred
-                    CustomData(@nospecialize inferred) = new(inferred)
-                end
-                function CC.transform_result_for_cache(interp::PrecompileInterpreter,
-                        mi::Core.MethodInstance, valid_worlds::CC.WorldRange, result::CC.InferenceResult)
-                    inferred_result = @invoke CC.transform_result_for_cache(interp::CC.AbstractInterpreter,
-                        mi::Core.MethodInstance, valid_worlds::CC.WorldRange, result::CC.InferenceResult)
-                    return CustomData(inferred_result)
-                end
-                function CC.src_inlining_policy(interp::PrecompileInterpreter, @nospecialize(src),
-                                            @nospecialize(info::CC.CallInfo), stmt_flag::UInt32)
-                    if src isa CustomData
-                        src = src.inferred
-                    end
-                    return @invoke CC.src_inlining_policy(interp::CC.AbstractInterpreter, src::Any,
-                                                          info::CC.CallInfo, stmt_flag::UInt32)
-                end
-                CC.retrieve_ir_for_inlining(cached_result::Core.CodeInstance, src::CustomData) =
-                    CC.retrieve_ir_for_inlining(cached_result, src.inferred)
-                CC.retrieve_ir_for_inlining(mi::Core.MethodInstance, src::CustomData, preserve_local_sources::Bool) =
-                    CC.retrieve_ir_for_inlining(mi, src.inferred, preserve_local_sources)
-            end
-
-            Base.return_types((Float64,)) do x
-                basic_caller(x)
-            end
-            Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
-                basic_caller(x)
-            end
-            Base.return_types((Vector{Float64},)) do x
-                sum(x)
-            end
-            Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
-                sum(x)
-            end
-        end) |> string)
-        Base.compilecache(Base.PkgId("CustomAbstractInterpreterCaching2"))
-        @eval let
-            using CustomAbstractInterpreterCaching2
-            cache_owner = Core.Compiler.cache_owner(
-                CustomAbstractInterpreterCaching2.Custom.PrecompileInterpreter())
-            let m = only(methods(CustomAbstractInterpreterCaching2.basic_callee))
-                mi = only(Base.specializations(m))
-                ci = mi.cache
-                @test isdefined(ci, :next)
-                @test ci.owner === nothing
-                @test ci.max_world == typemax(UInt)
-                ci = ci.next
-                @test !isdefined(ci, :next)
-                @test ci.owner === cache_owner
-                @test ci.max_world == typemax(UInt)
-            end
-            let m = only(methods(sum, (Vector{Float64},)))
-                found = false
-                for mi = Base.specializations(m)
-                    if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
-                        ci = mi.cache
-                        @test isdefined(ci, :next)
-                        @test ci.owner === cache_owner
-                        @test ci.max_world == typemax(UInt)
-                        ci = ci.next
-                        @test !isdefined(ci, :next)
-                        @test ci.owner === nothing
-                        @test ci.max_world == typemax(UInt)
-                        found = true
-                        break
-                    end
-                end
-                @test found
-            end
-        end
-    end
+@testset "Precompile external abstract interpreter" begin
+    dir = @__DIR__
+    @test success(pipeline(Cmd(`$(Base.julia_cmd()) precompile_absint1.jl`; dir); stdout, stderr))
+    @test success(pipeline(Cmd(`$(Base.julia_cmd()) precompile_absint2.jl`; dir); stdout, stderr))
 end
 
 precompile_test_harness("Recursive types") do load_path
@@ -2050,21 +1920,21 @@ precompile_test_harness("Issue #50538") do load_path
             ex
         end
         const newtype = try
-            Core.set_binding_type!(Base, :newglobal)
+            Core.eval(Base, :(global newglobal::Any))
         catch ex
             ex isa ErrorException || rethrow()
             ex
         end
-        global undefglobal
+        global undefglobal::Any
         end
         """)
     ji, ofile = Base.compilecache(Base.PkgId("I50538"))
     @eval using I50538
     @test I50538.newglobal.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent."
-    @test I50538.newtype.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent."
+    @test I50538.newtype.msg == "Evaluation into the closed module `Base` breaks incremental compilation because the side effects will not be permanent. This is likely due to some other module mutating `Base` with `eval` during precompilation - don't do this."
     @test_throws(ErrorException("cannot set type for global I50538.undefglobal. It already has a value or is already set to a different type."),
-                 Core.set_binding_type!(I50538, :undefglobal, Int))
-    Core.set_binding_type!(I50538, :undefglobal, Any)
+                 Core.eval(I50538, :(global undefglobal::Int)))
+    Core.eval(I50538, :(global undefglobal::Any))
     @test Core.get_binding_type(I50538, :undefglobal) === Any
     @test !isdefined(I50538, :undefglobal)
 end
@@ -2097,7 +1967,130 @@ precompile_test_harness("Test flags") do load_path
     @test !Base.isprecompiled(id, ;flags=current_flags)
 end
 
-empty!(Base.DEPOT_PATH)
-append!(Base.DEPOT_PATH, original_depot_path)
-empty!(Base.LOAD_PATH)
-append!(Base.LOAD_PATH, original_load_path)
+if Base.get_bool_env("CI", false) && (Sys.ARCH === :x86_64 || Sys.ARCH === :aarch64)
+    @testset "Multiversioning" begin # This test isn't the most robust because it relies on being in CI,
+        pkg = Base.identify_package("Test")  # but we need better target reflection to make a better one.
+        cachefiles = Base.find_all_in_cache_path(pkg)
+        pkgpath = Base.locate_package(pkg)
+        idx = findfirst(cachefiles) do cf
+            Base.stale_cachefile(pkgpath, cf) !== true
+        end
+        targets = Base.parse_image_targets(Base.parse_cache_header(cachefiles[idx])[7])
+        @test length(targets) > 1
+    end
+end
+
+precompile_test_harness("No backedge precompile") do load_path
+    # Test that the system doesn't accidentally forget to revalidate a method without backedges
+    write(joinpath(load_path, "NoBackEdges.jl"),
+          """
+          module NoBackEdges
+          using Core.Intrinsics: add_int
+          f(a::Int, b::Int) = add_int(a, b)
+          precompile(f, (Int, Int))
+          end
+          """)
+    ji, ofile = Base.compilecache(Base.PkgId("NoBackEdges"))
+    @eval using NoBackEdges
+    @test first(methods(NoBackEdges.f)).specializations.cache.max_world === typemax(UInt)
+end
+
+# Test precompilation of generated functions that return opaque closures
+# (with constprop marker set to false).
+precompile_test_harness("Generated Opaque") do load_path
+    write(joinpath(load_path, "GeneratedOpaque.jl"),
+        """
+        module GeneratedOpaque
+        using Base.Experimental: @opaque
+        using InteractiveUtils
+        const_int_barrier() = Base.inferencebarrier(1)::typeof(1)
+        const lno = LineNumberNode(1, :none)
+
+        const ci = @code_lowered const_int_barrier()
+        @generated function oc_re_generated_no_partial()
+            Expr(:new_opaque_closure, Tuple{}, Any, Any, false,
+                Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
+        end
+        @assert oc_re_generated_no_partial()() === 1
+        @generated function oc_re_generated_no_partial_macro()
+            AT = nothing
+            RT = nothing
+            allow_partial = false # makes this legal to generate during pre-compile
+            return Expr(:opaque_closure, AT, RT, RT, allow_partial, :(()->const_int_barrier()))
+        end
+        @assert oc_re_generated_no_partial_macro()() === 1
+        end
+        """)
+    Base.compilecache(Base.PkgId("GeneratedOpaque"))
+    @eval using GeneratedOpaque
+    let oc = invokelatest(GeneratedOpaque.oc_re_generated_no_partial)
+        @test oc.source.specializations.cache.max_world === typemax(UInt)
+        @test oc() === 1
+    end
+end
+
+precompile_test_harness("Issue #52063") do load_path
+    fname = joinpath(load_path, "i_do_not_exist.jl")
+    @test try
+        include_dependency(fname); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(fname))"
+        true
+    end
+    touch(fname)
+    @test include_dependency(fname) === nothing
+    chmod(fname, 0x000)
+    @test try
+        include_dependency(fname); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(fname))"
+        true
+    end broken=Sys.iswindows()
+    dir = mktempdir() do dir
+        @test include_dependency(dir) === nothing
+        chmod(dir, 0x000)
+        @test try
+             include_dependency(dir); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file or folder $(repr(dir))"
+            true
+        end broken=Sys.iswindows()
+        dir
+    end
+    @test try
+        include_dependency(dir); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(dir))"
+        true
+    end
+end
+
+precompile_test_harness("Binding Unique") do load_path
+    write(joinpath(load_path, "UniqueBinding1.jl"),
+        """
+        module UniqueBinding1
+            export x
+            global x = 1
+        end
+        """)
+    write(joinpath(load_path, "UniqueBinding2.jl"),
+        """
+        module UniqueBinding2
+            using UniqueBinding1
+            const thebinding = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding1, :x, true)
+            const thebinding2 = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), @__MODULE__, :thebinding, true)
+        end
+        """)
+
+    @eval using UniqueBinding1
+    @eval using UniqueBinding2
+
+    @test UniqueBinding2.thebinding === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding1, :x, true)
+    @test UniqueBinding2.thebinding2 === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding2, :thebinding, true)
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_absint1.jl b/test/precompile_absint1.jl
new file mode 100644
index 0000000000000..7bc0382ffda85
--- /dev/null
+++ b/test/precompile_absint1.jl
@@ -0,0 +1,81 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("precompile_utils.jl")
+
+precompile_test_harness() do load_path
+    write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
+        basic_callee(x) = x
+        basic_caller(x) = basic_callee(x)
+    end) |> string)
+
+    newinterp_path = abspath("compiler/newinterp.jl")
+    write(joinpath(load_path, "TestAbsIntPrecompile1.jl"), :(module TestAbsIntPrecompile1
+        import SimpleModule: basic_caller, basic_callee
+
+        module Custom
+            include("$($newinterp_path)")
+            @newinterp PrecompileInterpreter
+        end
+
+        Base.return_types((Float64,)) do x
+            basic_caller(x)
+        end
+        Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
+            basic_caller(x)
+        end
+        Base.return_types((Vector{Float64},)) do x
+            sum(x)
+        end
+        Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
+            sum(x)
+        end
+    end) |> string)
+    Base.compilecache(Base.PkgId("TestAbsIntPrecompile1"))
+
+    @eval let
+        using TestAbsIntPrecompile1
+        cache_owner = Core.Compiler.cache_owner(
+            TestAbsIntPrecompile1.Custom.PrecompileInterpreter())
+        let m = only(methods(TestAbsIntPrecompile1.basic_callee))
+            mi = only(Base.specializations(m))
+            ci = mi.cache
+            @test isdefined(ci, :next)
+            @test ci.owner === nothing
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                Base.object_build_id(ci)
+            ci = ci.next
+            @test !isdefined(ci, :next)
+            @test ci.owner === cache_owner
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                Base.object_build_id(ci)
+        end
+        let m = only(methods(sum, (Vector{Float64},)))
+            found = false
+            for mi in Base.specializations(m)
+                if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
+                    ci = mi.cache
+                    @test isdefined(ci, :next)
+                    @test ci.owner === cache_owner
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                        Base.object_build_id(ci)
+                    ci = ci.next
+                    @test !isdefined(ci, :next)
+                    @test ci.owner === nothing
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                        Base.object_build_id(ci)
+                    found = true
+                    break
+                end
+            end
+            @test found
+        end
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_absint2.jl b/test/precompile_absint2.jl
new file mode 100644
index 0000000000000..066dcbaece4c4
--- /dev/null
+++ b/test/precompile_absint2.jl
@@ -0,0 +1,104 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("precompile_utils.jl")
+
+precompile_test_harness() do load_path
+    write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
+        basic_callee(x) = x
+        basic_caller(x) = basic_callee(x)
+    end) |> string)
+
+    newinterp_path = abspath("compiler/newinterp.jl")
+    write(joinpath(load_path, "TestAbsIntPrecompile2.jl"), :(module TestAbsIntPrecompile2
+        import SimpleModule: basic_caller, basic_callee
+
+        module Custom
+            const CC = Core.Compiler
+            include("$($newinterp_path)")
+            @newinterp PrecompileInterpreter
+            struct CustomData
+                inferred
+                CustomData(@nospecialize inferred) = new(inferred)
+            end
+            function CC.transform_result_for_cache(interp::PrecompileInterpreter,
+                    mi::Core.MethodInstance, valid_worlds::CC.WorldRange, result::CC.InferenceResult)
+                inferred_result = @invoke CC.transform_result_for_cache(interp::CC.AbstractInterpreter,
+                    mi::Core.MethodInstance, valid_worlds::CC.WorldRange, result::CC.InferenceResult)
+                return CustomData(inferred_result)
+            end
+            function CC.src_inlining_policy(interp::PrecompileInterpreter, @nospecialize(src),
+                                            @nospecialize(info::CC.CallInfo), stmt_flag::UInt32)
+                if src isa CustomData
+                    src = src.inferred
+                end
+                return @invoke CC.src_inlining_policy(interp::CC.AbstractInterpreter, src::Any,
+                                                      info::CC.CallInfo, stmt_flag::UInt32)
+            end
+            CC.retrieve_ir_for_inlining(cached_result::Core.CodeInstance, src::CustomData) =
+                CC.retrieve_ir_for_inlining(cached_result, src.inferred)
+            CC.retrieve_ir_for_inlining(mi::Core.MethodInstance, src::CustomData, preserve_local_sources::Bool) =
+                CC.retrieve_ir_for_inlining(mi, src.inferred, preserve_local_sources)
+        end
+
+        Base.return_types((Float64,)) do x
+            basic_caller(x)
+        end
+        Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
+            basic_caller(x)
+        end
+        Base.return_types((Vector{Float64},)) do x
+            sum(x)
+        end
+        Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
+            sum(x)
+        end
+    end) |> string)
+    Base.compilecache(Base.PkgId("TestAbsIntPrecompile2"))
+
+    @eval let
+        using TestAbsIntPrecompile2
+        cache_owner = Core.Compiler.cache_owner(
+            TestAbsIntPrecompile2.Custom.PrecompileInterpreter())
+        let m = only(methods(TestAbsIntPrecompile2.basic_callee))
+            mi = only(Base.specializations(m))
+            ci = mi.cache
+            @test isdefined(ci, :next)
+            @test ci.owner === nothing
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                Base.object_build_id(ci)
+            ci = ci.next
+            @test !isdefined(ci, :next)
+            @test ci.owner === cache_owner
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                Base.object_build_id(ci)
+        end
+        let m = only(methods(sum, (Vector{Float64},)))
+            found = false
+            for mi = Base.specializations(m)
+                if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
+                    ci = mi.cache
+                    @test isdefined(ci, :next)
+                    @test ci.owner === cache_owner
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                        Base.object_build_id(ci)
+                    ci = ci.next
+                    @test !isdefined(ci, :next)
+                    @test ci.owner === nothing
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                        Base.object_build_id(ci)
+                    found = true
+                    break
+                end
+            end
+            @test found
+        end
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_utils.jl b/test/precompile_utils.jl
new file mode 100644
index 0000000000000..55eba353f2ada
--- /dev/null
+++ b/test/precompile_utils.jl
@@ -0,0 +1,41 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function precompile_test_harness(@nospecialize(f), testset::String)
+    @testset "$testset" precompile_test_harness(f, true)
+end
+function precompile_test_harness(@nospecialize(f), separate::Bool=true)
+    load_path = mktempdir()
+    load_cache_path = separate ? mktempdir() : load_path
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, load_cache_path)
+        f(load_path)
+    finally
+        try
+            rm(load_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+        if separate
+            try
+                rm(load_cache_path, force=true, recursive=true)
+            catch err
+                @show err
+            end
+        end
+        filter!((≠)(load_path), LOAD_PATH)
+        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
+    end
+    return nothing
+end
+
+let original_depot_path = copy(Base.DEPOT_PATH)
+    original_load_path = copy(Base.LOAD_PATH)
+
+    global function finish_precompile_test!()
+        empty!(Base.DEPOT_PATH)
+        append!(Base.DEPOT_PATH, original_depot_path)
+        empty!(Base.LOAD_PATH)
+        append!(Base.LOAD_PATH, original_load_path)
+    end
+end
diff --git a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
index 8ac961fa1a9a9..004ef7892c173 100644
--- a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
+++ b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.9.0-beta4"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+project_hash = "a4c480cfa7da9610333d5c42623bf746bd286c5f"
 
 [[deps.ExtDep]]
 deps = ["SomePackage"]
@@ -18,10 +18,12 @@ version = "0.1.0"
     [deps.HasExtensions.extensions]
     Extension = "ExtDep"
     ExtensionFolder = ["ExtDep", "ExtDep2"]
+    LinearAlgebraExt = "LinearAlgebra"
 
     [deps.HasExtensions.weakdeps]
     ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
     ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+    LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [[deps.SomePackage]]
 path = "../SomePackage"
diff --git a/test/project/Extensions/ExtDep3.jl/Project.toml b/test/project/Extensions/ExtDep3.jl/Project.toml
new file mode 100644
index 0000000000000..690b2f1cffff4
--- /dev/null
+++ b/test/project/Extensions/ExtDep3.jl/Project.toml
@@ -0,0 +1,4 @@
+name = "ExtDep3"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
+authors = ["Kristoffer <kcarlsson89@gmail.com>"]
diff --git a/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl b/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl
new file mode 100644
index 0000000000000..96a0b472d06c5
--- /dev/null
+++ b/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl
@@ -0,0 +1,5 @@
+module ExtDep3
+
+greet() = print("Hello World!")
+
+end # module ExtDep3
diff --git a/test/project/Extensions/ExtNameCollision_A/Project.toml b/test/project/Extensions/ExtNameCollision_A/Project.toml
new file mode 100644
index 0000000000000..f4cc37786f508
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_A/Project.toml
@@ -0,0 +1,9 @@
+name = "ExtNameCollision_A"
+uuid = "9f48de98-8f56-4937-aa32-2a5530882eaa"
+version = "0.1.0"
+
+[weakdeps]
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[extensions]
+REPLExt = "REPL"
diff --git a/base/docs/temp.cpp b/test/project/Extensions/ExtNameCollision_A/ext/REPLExt.jl
similarity index 100%
rename from base/docs/temp.cpp
rename to test/project/Extensions/ExtNameCollision_A/ext/REPLExt.jl
diff --git a/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl b/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl
new file mode 100644
index 0000000000000..2f47a862dd9c5
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl
@@ -0,0 +1,5 @@
+module ExtNameCollision_A
+
+greet() = print("Hello World!")
+
+end # module ExtNameCollision_A
diff --git a/test/project/Extensions/ExtNameCollision_B/Project.toml b/test/project/Extensions/ExtNameCollision_B/Project.toml
new file mode 100644
index 0000000000000..ac52d64a82a7c
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_B/Project.toml
@@ -0,0 +1,9 @@
+name = "ExtNameCollision_B"
+uuid = "597d654f-44d8-4443-9b1e-1f2f4b45906f"
+version = "0.1.0"
+
+[weakdeps]
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[extensions]
+REPLExt = "REPL"
diff --git a/test/project/Extensions/ExtNameCollision_B/ext/REPLExt.jl b/test/project/Extensions/ExtNameCollision_B/ext/REPLExt.jl
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl b/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl
new file mode 100644
index 0000000000000..e7665982a79b3
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl
@@ -0,0 +1,5 @@
+module ExtNameCollision_B
+
+greet() = print("Hello World!")
+
+end # module ExtNameCollision_B
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
index 15f068f250ce3..5706aba59d1e0 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.0"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "d523b3401f72a1ed34b7b43749fd2655c6b78542"
+project_hash = "4e196b07f2ee7adc48ac9d528d42b3cf3737c7a0"
 
 [[deps.ExtDep]]
 deps = ["SomePackage"]
@@ -15,13 +15,20 @@ path = "../ExtDep2"
 uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 version = "0.1.0"
 
+[[deps.ExtDep3]]
+path = "../ExtDep3.jl"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
+
 [[deps.HasExtensions]]
+deps = ["ExtDep3"]
 path = "../HasExtensions.jl"
 uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
 version = "0.1.0"
 
     [deps.HasExtensions.extensions]
     Extension = "ExtDep"
+    ExtensionDep = "ExtDep3"
     ExtensionFolder = ["ExtDep", "ExtDep2"]
     LinearAlgebraExt = "LinearAlgebra"
 
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
index 8f308a9fbee72..aa4956caada74 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
@@ -5,4 +5,5 @@ version = "0.1.0"
 [deps]
 ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1"
 HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
diff --git a/test/project/Extensions/HasExtensions.jl/Manifest.toml b/test/project/Extensions/HasExtensions.jl/Manifest.toml
index 55f7958701a75..429c6598fc4f4 100644
--- a/test/project/Extensions/HasExtensions.jl/Manifest.toml
+++ b/test/project/Extensions/HasExtensions.jl/Manifest.toml
@@ -1,7 +1,10 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "c87947f1f1f070eea848950c304d668a112dec3d"
+project_hash = "c0bb526b75939a74a6195ee4819e598918a22ad7"
 
-[deps]
+[[deps.ExtDep3]]
+path = "../ExtDep3.jl"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml
index a5f9bb1e42d29..fe21a1423f543 100644
--- a/test/project/Extensions/HasExtensions.jl/Project.toml
+++ b/test/project/Extensions/HasExtensions.jl/Project.toml
@@ -2,6 +2,9 @@ name = "HasExtensions"
 uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
 version = "0.1.0"
 
+[deps]
+ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1"
+
 [weakdeps]
 ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
@@ -9,5 +12,6 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [extensions]
 Extension = "ExtDep"
+ExtensionDep = "ExtDep3"
 ExtensionFolder = ["ExtDep", "ExtDep2"]
 LinearAlgebraExt = "LinearAlgebra"
diff --git a/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl b/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl
new file mode 100644
index 0000000000000..e2710d4d89bbb
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl
@@ -0,0 +1,9 @@
+module ExtensionDep
+
+using HasExtensions, ExtDep3
+
+function __init__()
+    HasExtensions.ext_dep_loaded = true
+end
+
+end
diff --git a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
index dbfaeec4f8812..9d9785f87f790 100644
--- a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
+++ b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
@@ -6,5 +6,6 @@ foo(::HasExtensionsStruct) = 1
 
 ext_loaded = false
 ext_folder_loaded = false
+ext_dep_loaded = false
 
 end # module
diff --git a/test/project/ProjectPath/CustomPath.jl b/test/project/ProjectPath/CustomPath.jl
new file mode 100644
index 0000000000000..8fe764fa066dc
--- /dev/null
+++ b/test/project/ProjectPath/CustomPath.jl
@@ -0,0 +1,5 @@
+module ProjectPath
+
+greet() = print("Hello World!")
+
+end # module ProjectPath
diff --git a/test/project/ProjectPath/Manifest.toml b/test/project/ProjectPath/Manifest.toml
new file mode 100644
index 0000000000000..123e7f575062a
--- /dev/null
+++ b/test/project/ProjectPath/Manifest.toml
@@ -0,0 +1,18 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "51ade905d618e4aa369bc869841376219cc36cb1"
+
+[[deps.ProjectPath]]
+deps = ["ProjectPathDep"]
+path = "."
+entryfile = "CustomPath.jl"
+uuid = "32833bde-7fc1-4d28-8365-9d01e1bcbc1b"
+version = "0.1.0"
+
+[[deps.ProjectPathDep]]
+path = "ProjectPathDep"
+entryfile = "CustomPath.jl"
+uuid = "f18633fc-8799-43ff-aa06-99ed830dc572"
+version = "0.1.0"
diff --git a/test/project/ProjectPath/Project.toml b/test/project/ProjectPath/Project.toml
new file mode 100644
index 0000000000000..a434f78e9c211
--- /dev/null
+++ b/test/project/ProjectPath/Project.toml
@@ -0,0 +1,7 @@
+name = "ProjectPath"
+uuid = "32833bde-7fc1-4d28-8365-9d01e1bcbc1b"
+entryfile = "CustomPath.jl"
+version = "0.1.0"
+
+[deps]
+ProjectPathDep = "f18633fc-8799-43ff-aa06-99ed830dc572"
diff --git a/test/project/ProjectPath/ProjectPathDep/CustomPath.jl b/test/project/ProjectPath/ProjectPathDep/CustomPath.jl
new file mode 100644
index 0000000000000..adbe508f0c7f9
--- /dev/null
+++ b/test/project/ProjectPath/ProjectPathDep/CustomPath.jl
@@ -0,0 +1,5 @@
+module ProjectPathDep
+
+greet() = print("Hello World!")
+
+end # module ProjectPathDep
diff --git a/test/project/ProjectPath/ProjectPathDep/Project.toml b/test/project/ProjectPath/ProjectPathDep/Project.toml
new file mode 100644
index 0000000000000..c69e54e8c9390
--- /dev/null
+++ b/test/project/ProjectPath/ProjectPathDep/Project.toml
@@ -0,0 +1,4 @@
+name = "ProjectPathDep"
+uuid = "f18633fc-8799-43ff-aa06-99ed830dc572"
+version = "0.1.0"
+entryfile = "CustomPath.jl"
diff --git a/test/project/Rot13/src/Rot13.jl b/test/project/Rot13/src/Rot13.jl
index 0672799d61f24..66f077812d878 100644
--- a/test/project/Rot13/src/Rot13.jl
+++ b/test/project/Rot13/src/Rot13.jl
@@ -7,9 +7,22 @@ end
 
 rot13(str::AbstractString) = map(rot13, str)
 
-function (@main)(ARGS)
-    foreach(arg -> print(rot13(arg), " "), ARGS)
+function (@main)(args)
+    foreach(arg -> print(rot13(arg), " "), args)
     return 0
 end
 
+module Rot26 # LOL
+
+import ..rot13
+
+rot26(str::AbstractString) = map(rot13 ∘ rot13, str)
+
+function (@main)(args)
+    foreach(arg -> print(rot26(arg), " "), args)
+    return 0
+end
+
+end
+
 end # module Rot13
diff --git a/test/project/SubProject/Devved/Project.toml b/test/project/SubProject/Devved/Project.toml
new file mode 100644
index 0000000000000..63088a132cb77
--- /dev/null
+++ b/test/project/SubProject/Devved/Project.toml
@@ -0,0 +1,3 @@
+name = "Devved"
+uuid = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+version = "0.1.0"
diff --git a/test/project/SubProject/Devved/src/Devved.jl b/test/project/SubProject/Devved/src/Devved.jl
new file mode 100644
index 0000000000000..f3eb267409ece
--- /dev/null
+++ b/test/project/SubProject/Devved/src/Devved.jl
@@ -0,0 +1,5 @@
+module Devved
+
+greet() = print("Hello World!")
+
+end # module Devved
diff --git a/test/project/SubProject/Devved2/Project.toml b/test/project/SubProject/Devved2/Project.toml
new file mode 100644
index 0000000000000..c761630566116
--- /dev/null
+++ b/test/project/SubProject/Devved2/Project.toml
@@ -0,0 +1,3 @@
+name = "Devved2"
+uuid = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+version = "0.1.0"
diff --git a/test/project/SubProject/Devved2/src/Devved2.jl b/test/project/SubProject/Devved2/src/Devved2.jl
new file mode 100644
index 0000000000000..9bd5df2793671
--- /dev/null
+++ b/test/project/SubProject/Devved2/src/Devved2.jl
@@ -0,0 +1,5 @@
+module Devved2
+
+greet() = print("Hello World!")
+
+end # module Devved2
diff --git a/test/project/SubProject/Manifest.toml b/test/project/SubProject/Manifest.toml
new file mode 100644
index 0000000000000..5d791a74652d4
--- /dev/null
+++ b/test/project/SubProject/Manifest.toml
@@ -0,0 +1,68 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "620b9377bc807ff657e6618c8ccc24887eb40285"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.Devved]]
+path = "Devved"
+uuid = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+version = "0.1.0"
+
+[[deps.Devved2]]
+path = "Devved2"
+uuid = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+version = "0.1.0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.Logging]]
+deps = ["StyledStrings"]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MyPkg]]
+deps = ["Devved", "Devved2"]
+path = "."
+uuid = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+version = "0.0.0"
+
+[[deps.PackageThatIsSub]]
+deps = ["Devved2", "MyPkg"]
+path = "PackageThatIsSub"
+uuid = "1efb588c-9412-4e40-90a4-710420bd84aa"
+version = "0.1.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
diff --git a/test/project/SubProject/PackageThatIsSub/Project.toml b/test/project/SubProject/PackageThatIsSub/Project.toml
new file mode 100644
index 0000000000000..e41dd998c5a1c
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/Project.toml
@@ -0,0 +1,14 @@
+name = "PackageThatIsSub"
+uuid = "1efb588c-9412-4e40-90a4-710420bd84aa"
+version = "0.1.0"
+
+[workspace]
+projects = ["test"]
+
+[deps]
+Devved2 = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+
+[preferences]
+value = 2
+y = 2
diff --git a/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl b/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl
new file mode 100644
index 0000000000000..7f9ea94ccb156
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl
@@ -0,0 +1,5 @@
+module PackageThatIsSub
+
+greet() = print("Hello World!")
+
+end # module PackageThatIsSub
diff --git a/test/project/SubProject/PackageThatIsSub/test/Project.toml b/test/project/SubProject/PackageThatIsSub/test/Project.toml
new file mode 100644
index 0000000000000..dc8186e2b735e
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/test/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+PackageThatIsSub = "1efb588c-9412-4e40-90a4-710420bd84aa"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[preferences]
+value = 3
+z = 3
diff --git a/test/project/SubProject/Project.toml b/test/project/SubProject/Project.toml
new file mode 100644
index 0000000000000..dcb84d865ac85
--- /dev/null
+++ b/test/project/SubProject/Project.toml
@@ -0,0 +1,13 @@
+name = "MyPkg"
+uuid = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+
+[workspace]
+projects = ["sub", "PackageThatIsSub", "test"]
+
+[deps]
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+Devved2 = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+
+[preferences]
+value = 1
+x = 1
diff --git a/test/project/SubProject/src/MyPkg.jl b/test/project/SubProject/src/MyPkg.jl
new file mode 100644
index 0000000000000..6d84954645d55
--- /dev/null
+++ b/test/project/SubProject/src/MyPkg.jl
@@ -0,0 +1,3 @@
+module MyPkg
+
+end
diff --git a/test/project/SubProject/sub/Project.toml b/test/project/SubProject/sub/Project.toml
new file mode 100644
index 0000000000000..50aa238e91d57
--- /dev/null
+++ b/test/project/SubProject/sub/Project.toml
@@ -0,0 +1,3 @@
+[deps]
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
diff --git a/test/project/SubProject/test/Project.toml b/test/project/SubProject/test/Project.toml
new file mode 100644
index 0000000000000..b64312e4b1ee2
--- /dev/null
+++ b/test/project/SubProject/test/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
diff --git a/test/ranges.jl b/test/ranges.jl
index 4660a96dfc16a..86cd1c3f2345c 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -292,15 +292,10 @@ end
 
     rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T)))
 
-    rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo)
-
     # For this test the `BigFloat` mantissa needs to be just a bit
     # larger than the `Float64` mantissa
     setprecision(BigFloat, 70) do
         n = 10
-        @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n
-            rand_twiceprecision_is_ok(T)
-        end
         @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n
             twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat))
         end
@@ -314,6 +309,13 @@ end
             twiceprecision_is_normalized(Base.TwicePrecision{Float64}(rand_twiceprecision(Float32)))
         end
     end
+
+    @testset "displaying a complex range (#52713)" begin
+        r = 1.0*(1:5) .+ im
+        @test startswith(repr(r), repr(first(r)))
+        @test endswith(repr(r), repr(last(r)))
+        @test occursin(repr(step(r)), repr(r))
+    end
 end
 @testset "ranges" begin
     @test size(10:1:0) == (0,)
@@ -430,17 +432,17 @@ end
     @testset "findfirst" begin
         @test findfirst(==(1), Base.IdentityUnitRange(-1:1)) == 1
         @test findfirst(isequal(3), Base.OneTo(10)) == 3
-        @test findfirst(==(0), Base.OneTo(10)) == nothing
-        @test findfirst(==(11), Base.OneTo(10)) == nothing
+        @test findfirst(==(0), Base.OneTo(10)) === nothing
+        @test findfirst(==(11), Base.OneTo(10)) === nothing
         @test findfirst(==(4), Int16(3):Int16(7)) === Int(2)
-        @test findfirst(==(2), Int16(3):Int16(7)) == nothing
-        @test findfirst(isequal(8), 3:7) == nothing
+        @test findfirst(==(2), Int16(3):Int16(7)) === nothing
+        @test findfirst(isequal(8), 3:7) === nothing
         @test findfirst(isequal(7), 1:2:10) == 4
         @test findfirst(==(7), 1:2:10) == 4
-        @test findfirst(==(10), 1:2:10) == nothing
-        @test findfirst(==(11), 1:2:10) == nothing
+        @test findfirst(==(10), 1:2:10) === nothing
+        @test findfirst(==(11), 1:2:10) === nothing
         @test findfirst(==(-7), 1:-1:-10) == 9
-        @test findfirst(==(2),1:-1:2) == nothing
+        @test findfirst(==(2),1:-1:2) === nothing
     end
     @testset "reverse" begin
         @test reverse(reverse(1:10)) == 1:10
@@ -2376,13 +2378,46 @@ end
     @test 0.2 * (-2:2:2) == [-0.4, 0, 0.4]
 end
 
-@testset "Indexing OneTo with IdentityUnitRange" begin
-    for endpt in Any[10, big(10), UInt(10)]
-        r = Base.OneTo(endpt)
-        inds = Base.IdentityUnitRange(3:5)
-        rs = r[inds]
-        @test rs === inds
-        @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+@testset "IdentityUnitRange indexing" begin
+    @testset "Indexing into an IdentityUnitRange" begin
+        @testset for r in Any[-1:20, Base.OneTo(20)]
+            ri = Base.IdentityUnitRange(r)
+            @test_throws "invalid index" ri[true]
+            @testset for s in Any[Base.OneTo(6), Base.OneTo{BigInt}(6), 3:6, big(3):big(6), 3:2:7]
+                @test mapreduce(==, &, ri[s], ri[s[begin]]:step(s):ri[s[end]])
+                @test axes(ri[s]) == axes(s)
+                @test eltype(ri[s]) == eltype(ri)
+            end
+        end
+        @testset "Bool indices" begin
+            r = 1:1
+            @test Base.IdentityUnitRange(r)[true:true] == r[true:true]
+            @test Base.IdentityUnitRange(r)[true:true:true] == r[true:true:true]
+            @test_throws BoundsError Base.IdentityUnitRange(1:2)[true:true]
+            @test_throws BoundsError Base.IdentityUnitRange(1:2)[true:true:true]
+        end
+    end
+    @testset "Indexing with IdentityUnitRange" begin
+        @testset "OneTo" begin
+            @testset for endpt in Any[10, big(12), UInt(11)]
+                r = Base.OneTo(endpt)
+                inds = Base.IdentityUnitRange(3:5)
+                rs = r[inds]
+                @test rs == inds
+                @test axes(rs) == axes(inds)
+                @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+            end
+        end
+        @testset "IdentityUnitRange" begin
+            @testset for r in Any[Base.IdentityUnitRange(1:4), Base.IdentityUnitRange(Base.OneTo(4)), Base.Slice(1:4), Base.Slice(Base.OneTo(4))]
+                @testset for s in Any[Base.IdentityUnitRange(3:3), Base.IdentityUnitRange(Base.OneTo(2)), Base.Slice(3:3), Base.Slice(Base.OneTo(2))]
+                    rs = r[s]
+                    @test rs == s
+                    @test axes(rs) == axes(s)
+                end
+                @test_throws BoundsError r[Base.IdentityUnitRange(first(r):last(r) + 1)]
+            end
+        end
     end
 end
 
@@ -2686,3 +2721,16 @@ end
     @test Base._log_twice64_unchecked(NaN).lo isa Float64
     @test Base._log_twice64_unchecked(Inf).lo isa Float64
 end
+
+@testset "OneTo promotion" begin
+    struct MyUnitRange{T} <: AbstractUnitRange{T}
+        range::UnitRange{T}
+    end
+    Base.first(r::MyUnitRange) = first(r.range)
+    Base.last(r::MyUnitRange) = last(r.range)
+    Base.size(r::MyUnitRange) = size(r.range)
+    Base.length(r::MyUnitRange) = length(r.range)
+    Base.getindex(r::MyUnitRange, i::Int) = getindex(r.range, i)
+    @test promote(MyUnitRange(2:3), Base.OneTo(3)) == (2:3, 1:3)
+    @test promote(MyUnitRange(UnitRange(3.0, 4.0)), Base.OneTo(3)) == (3.0:4.0, 1.0:3.0)
+end
diff --git a/test/rational.jl b/test/rational.jl
index e310875a52a83..ac469c83cf544 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -670,7 +670,7 @@ end
         @test gcdx(T(1)//T(1), T(1)//T(0)) === (T(1)//T(0), T(0), T(1))
         @test gcdx(T(1)//T(0), T(1)//T(0)) === (T(1)//T(0), T(1), T(1))
         @test gcdx(T(1)//T(0), T(0)//T(1)) === (T(1)//T(0), T(1), T(0))
-        @test gcdx(T(0)//T(1), T(0)//T(1)) === (T(0)//T(1), T(1), T(0))
+        @test gcdx(T(0)//T(1), T(0)//T(1)) === (T(0)//T(1), T(0), T(0))
 
         if T <: Signed
             @test gcdx(T(-1)//T(0), T(1)//T(2)) === (T(1)//T(0), T(1), T(0))
@@ -816,3 +816,20 @@ end
     @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im
     @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im)
 end
+
+@testset "complex numerator, denominator" begin
+    z = complex(3*3, 2*3*5)
+    @test z === numerator(z) === numerator(z // 2) === numerator(z // 5)
+    @test complex(3, 2*5) === numerator(z // 3)
+    @test isone(denominator(z))
+    @test 2 === denominator(z // 2)
+    @test 1 === denominator(z // 3)
+    @test 5 === denominator(z // 5)
+    for den ∈ 1:10
+        q = z // den
+        @test q === (numerator(q)//denominator(q))
+    end
+    @testset "do not overflow silently" begin
+        @test_throws OverflowError numerator(Int8(1)//Int8(31) + Int8(8)im//Int8(3))
+    end
+end
diff --git a/test/read.jl b/test/read.jl
index 283381668c28a..34224c146864e 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -170,6 +170,10 @@ for (name, f) in l
         local t, s, m, kept
         @test readuntil(io(t), s) == m
         @test readuntil(io(t), s, keep=true) == kept
+        if isone(length(s))
+            @test readuntil(io(t), first(s)) == m
+            @test readuntil(io(t), first(s), keep=true) == kept
+        end
         @test readuntil(io(t), SubString(s, firstindex(s))) == m
         @test readuntil(io(t), SubString(s, firstindex(s)), keep=true) == kept
         @test readuntil(io(t), GenericString(s)) == m
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 77ed94664539f..6a6f20214058c 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -576,8 +576,8 @@ end
 @testset "type of sum(::Array{$T}" for T in [UInt8, Int8, Int32, Int64, BigInt]
     result = sum(T[1 2 3; 4 5 6; 7 8 9], dims=2)
     @test result == hcat([6, 15, 24])
-    @test eltype(result) === (T <: Base.SmallSigned ? Int :
-                              T <: Base.SmallUnsigned ? UInt :
+    @test eltype(result) === (T <: Base.BitSignedSmall ? Int :
+                              T <: Base.BitUnsignedSmall ? UInt :
                               T)
 end
 
@@ -587,6 +587,30 @@ end
     @test B[argmin(B, dims=[2, 3])] == @inferred(minimum(B, dims=[2, 3]))
 end
 
+@testset "careful with @inbounds" begin
+    Base.@propagate_inbounds f(x) = x == 2 ? x[-10000] : x
+    Base.@propagate_inbounds op(x,y) = x[-10000] + y[-10000]
+    for (arr, dims) in (([1,1,2], 1), ([1 1 2], 2), ([ones(Int,256);2], 1))
+        @test_throws BoundsError mapreduce(f, +, arr)
+        @test_throws BoundsError mapreduce(f, +, arr; dims)
+        @test_throws BoundsError mapreduce(f, +, arr; dims, init=0)
+        @test_throws BoundsError mapreduce(identity, op, arr)
+        try
+            #=@test_throws BoundsError=# mapreduce(identity, op, arr; dims)
+        catch ex
+            @test_broken ex isa BoundsError
+        end
+        @test_throws BoundsError mapreduce(identity, op, arr; dims, init=0)
+
+        @test_throws BoundsError findmin(f, arr)
+        @test_throws BoundsError findmin(f, arr; dims)
+
+        @test_throws BoundsError mapreduce(f, max, arr)
+        @test_throws BoundsError mapreduce(f, max, arr; dims)
+        @test_throws BoundsError mapreduce(f, max, arr; dims, init=0)
+    end
+end
+
 @testset "in-place reductions with mismatched dimensionalities" begin
     B = reshape(1:24, 4, 3, 2)
     for R in (fill(0, 4), fill(0, 4, 1), fill(0, 4, 1, 1))
diff --git a/test/reflection.jl b/test/reflection.jl
index 4a738dd3e58ee..634390e0680d1 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -125,11 +125,18 @@ not_const = 1
 # For curmod_*
 include("testenv.jl")
 
+module TestMod36529
+    x36529 = 0
+    y36529 = 1
+    export y36529
+end
+
 module TestMod7648
 using Test
 import Base.convert
 import ..curmod_name, ..curmod
-export a9475, foo9475, c7648, foo7648, foo7648_nomethods, Foo7648
+using ..TestMod36529: x36529   # doesn't import TestMod36529 or y36529, even though it's exported
+export a9475, c7648, f9475, foo7648, foo7648_nomethods, Foo7648
 
 const c7648 = 8
 d7648 = 9
@@ -142,10 +149,11 @@ module TestModSub9475
     using Test
     using ..TestMod7648
     import ..curmod_name
-    export a9475, foo9475
+    export a9475, f9475, f54609
     a9475 = 5
     b9475 = 7
-    foo9475(x) = x
+    f9475(x) = x
+    f54609(x) = x
     let
         @test Base.binding_module(@__MODULE__, :a9475) == @__MODULE__
         @test Base.binding_module(@__MODULE__, :c7648) == TestMod7648
@@ -169,18 +177,104 @@ let
     @test Base.binding_module(TestMod7648, :d7648) == TestMod7648
     @test Base.binding_module(TestMod7648, :a9475) == TestMod7648.TestModSub9475
     @test Base.binding_module(TestMod7648.TestModSub9475, :b9475) == TestMod7648.TestModSub9475
-    @test Set(names(TestMod7648))==Set([:TestMod7648, :a9475, :foo9475, :c7648, :foo7648, :foo7648_nomethods, :Foo7648])
-    @test Set(names(TestMod7648, all = true)) == Set([:TestMod7648, :TestModSub9475, :a9475, :foo9475, :c7648, :d7648, :f7648,
-                                                :foo7648, Symbol("#foo7648"), :foo7648_nomethods, Symbol("#foo7648_nomethods"),
-                                                :Foo7648, :eval, Symbol("#eval"), :include, Symbol("#include")])
-    @test Set(names(TestMod7648, all = true, imported = true)) == Set([:TestMod7648, :TestModSub9475, :a9475, :foo9475, :c7648, :d7648, :f7648,
-                                                      :foo7648, Symbol("#foo7648"), :foo7648_nomethods, Symbol("#foo7648_nomethods"),
-                                                      :Foo7648, :eval, Symbol("#eval"), :include, Symbol("#include"),
-                                                      :convert, :curmod_name, :curmod])
+    defaultset = Set(Symbol[:Foo7648, :TestMod7648, :a9475, :c7648, :f9475, :foo7648, :foo7648_nomethods])
+    allset = defaultset ∪ Set(Symbol[
+        Symbol("#eval"), Symbol("#foo7648"), Symbol("#foo7648_nomethods"), Symbol("#include"),
+        :TestModSub9475, :d7648, :eval, :f7648, :include])
+    imported = Set(Symbol[:convert, :curmod_name, :curmod])
+    usings_from_Test = Set(Symbol[
+        Symbol("@inferred"), Symbol("@test"), Symbol("@test_broken"), Symbol("@test_deprecated"),
+        Symbol("@test_logs"), Symbol("@test_nowarn"), Symbol("@test_skip"), Symbol("@test_throws"),
+        Symbol("@test_warn"), Symbol("@testset"), :GenericArray, :GenericDict, :GenericOrder,
+        :GenericSet, :GenericString, :LogRecord, :Test, :TestLogger, :TestSetException,
+        :detect_ambiguities, :detect_unbound_args])
+    usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself
+    usings = Set(Symbol[:x36529, :TestModSub9475, :f54609]) ∪ usings_from_Test ∪ usings_from_Base
+    @test Set(names(TestMod7648)) == defaultset
+    @test Set(names(TestMod7648, all=true)) == allset
+    @test Set(names(TestMod7648, all=true, imported=true)) == allset ∪ imported
+    @test Set(names(TestMod7648, usings=true)) == defaultset ∪ usings
+    @test Set(names(TestMod7648, all=true, usings=true)) == allset ∪ usings
     @test isconst(TestMod7648, :c7648)
     @test !isconst(TestMod7648, :d7648)
 end
 
+# tests for `names(...; usings=true)`
+
+baremodule Test54609Simple
+module Inner
+export exported
+global exported::Int = 1
+global unexported::Int = 0
+end
+using Base: @assume_effects
+using .Inner
+end
+let usings = names(Test54609Simple; usings=true)
+    @test Symbol("@assume_effects") ∈ usings
+    @test :Base ∉ usings
+    @test :exported ∈ usings
+    @test :unexported ∉ usings
+end # baremodule Test54609Simple
+
+baremodule _Test54609Complex
+export exported_new
+using Base: @deprecate_binding
+global exported_new = nothing
+@deprecate_binding exported_old exported_new
+end # baremodule _Test54609Complex
+baremodule Test54609Complex
+using .._Test54609Complex
+end # baremodule Test54609Complex
+let usings = names(Test54609Complex; usings=true)
+    @test :exported_new ∈ usings
+    @test :exported_old ∉ usings
+    @test :_Test54609Complex ∈ usings # should include the `using`ed module itself
+    usings_all = names(Test54609Complex; usings=true, all=true)
+    @test :exported_new ∈ usings_all
+    @test :exported_old ∈ usings_all # deprecated names should be included with `all=true`
+end
+
+module TestMod54609
+module M1
+    const m1_x = 1
+    export m1_x
+end
+module M2
+    const m2_x = 1
+    export m2_x
+end
+module A
+    module B
+        f(x) = 1
+        secret = 1
+        module Inner2 end
+    end
+    module C
+        x = 1
+        y = 2
+        export y
+    end
+    using .B: f
+    using .C
+    using ..M1
+    import ..M2
+end
+end # module TestMod54609
+let defaultset = Set((:A,))
+    imported = Set((:M2,))
+    usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself
+    usings = Set((:A, :f, :C, :y, :M1, :m1_x)) ∪ usings_from_Base
+    allset = Set((:A, :B, :C, :eval, :include, Symbol("#eval"), Symbol("#include")))
+    @test Set(names(TestMod54609.A)) == defaultset
+    @test Set(names(TestMod54609.A, imported=true)) == defaultset ∪ imported
+    @test Set(names(TestMod54609.A, usings=true)) == defaultset ∪ usings
+    @test Set(names(TestMod54609.A, all=true)) == allset
+    @test Set(names(TestMod54609.A, all=true, usings=true)) == allset ∪ usings
+    @test Set(names(TestMod54609.A, imported=true, usings=true)) == defaultset ∪ imported ∪ usings
+    @test Set(names(TestMod54609.A, all=true, imported=true, usings=true)) == allset ∪ imported ∪ usings
+end
+
 let
     using .TestMod7648
     @test Base.binding_module(@__MODULE__, :a9475) == TestMod7648.TestModSub9475
@@ -189,10 +283,10 @@ let
     @test parentmodule(foo7648, (Any,)) == TestMod7648
     @test parentmodule(foo7648) == TestMod7648
     @test parentmodule(foo7648_nomethods) == TestMod7648
-    @test parentmodule(foo9475, (Any,)) == TestMod7648.TestModSub9475
-    @test parentmodule(foo9475) == TestMod7648.TestModSub9475
+    @test parentmodule(f9475, (Any,)) == TestMod7648.TestModSub9475
+    @test parentmodule(f9475) == TestMod7648.TestModSub9475
     @test parentmodule(Foo7648) == TestMod7648
-    @test parentmodule(first(methods(foo9475))) == TestMod7648.TestModSub9475
+    @test parentmodule(first(methods(f9475))) == TestMod7648.TestModSub9475
     @test parentmodule(first(methods(foo7648))) == TestMod7648
     @test nameof(Foo7648) === :Foo7648
     @test basename(functionloc(foo7648, (Any,))[1]) == "reflection.jl"
@@ -592,7 +686,7 @@ let
     @test @inferred wrapperT(ReflectionExample{T, Int64} where T) == ReflectionExample
     @test @inferred wrapperT(ReflectionExample) == ReflectionExample
     @test @inferred wrapperT(Union{ReflectionExample{Union{},1},ReflectionExample{Float64,1}}) == ReflectionExample
-    @test_throws(ErrorException("typename does not apply to unions whose components have different typenames"),
+    @test_throws(Core.TypeNameError(Union{Int, Float64}),
                  Base.typename(Union{Int, Float64}))
 end
 
@@ -1185,14 +1279,22 @@ end
 # marking a symbol as public should not "unexport" it
 # https://github.com/JuliaLang/julia/issues/52812
 module Mod52812
+using Test
 export a, b
-public a
+@test_throws ErrorException eval(Expr(:public, :a))
+public c
+@test_throws ErrorException eval(Expr(:export, :c))
+export b
+public c
 end
 
 @test Base.isexported(Mod52812, :a)
 @test Base.isexported(Mod52812, :b)
 @test Base.ispublic(Mod52812, :a)
 @test Base.ispublic(Mod52812, :b)
+@test Base.ispublic(Mod52812, :c) && !Base.isexported(Mod52812, :c)
 
 @test Base.infer_return_type(code_lowered, (Any,)) == Vector{Core.CodeInfo}
 @test Base.infer_return_type(code_lowered, (Any,Any)) == Vector{Core.CodeInfo}
+
+@test methods(Union{}) == Any[m.method for m in Base._methods_by_ftype(Tuple{Core.TypeofBottom, Vararg}, 1, Base.get_world_counter())] # issue #55187
diff --git a/test/regex.jl b/test/regex.jl
index e5f1428527512..51802125a3467 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -101,15 +101,34 @@
         @test haskey(m, 3)
         @test !haskey(m, 44)
         @test (m[1], m[2], m[3]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (var"1"="x", var"2"="y", var"3"="z")
+        @test Dict(m) == Dict([1=>"x", 2=>"y", 3=>"z"])
         @test sprint(show, m) == "RegexMatch(\"xyz\", 1=\"x\", 2=\"y\", 3=\"z\")"
     end
 
     # Named subpatterns
+    let m = match(r"(?<a>.)(?<c>.)(?<b>.)", "xyz")
+        @test haskey(m, :a)
+        @test haskey(m, "b")
+        @test !haskey(m, "foo")
+        @test (m[:a], m[:c], m["b"]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (a="x", c="y", b="z")
+        @test Dict(m) == Dict(["a"=>"x", "c"=>"y", "b"=>"z"])
+        @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", c=\"y\", b=\"z\")"
+        @test keys(m) == ["a", "c", "b"]
+    end
+
+    # Named and unnamed subpatterns
     let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
         @test haskey(m, :a)
         @test haskey(m, "b")
         @test !haskey(m, "foo")
         @test (m[:a], m[2], m["b"]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (a="x", var"2"="y", b="z")
+        @test Dict(m) == Dict(["a"=>"x", 2=>"y", "b"=>"z"])
         @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
         @test keys(m) == ["a", 2, "b"]
     end
@@ -194,7 +213,7 @@
 
         r = r"" * raw"a\Eb|c"
         @test match(r, raw"a\Eb|c").match == raw"a\Eb|c"
-        @test match(r, raw"c") == nothing
+        @test match(r, raw"c") === nothing
 
         # error for really incompatible options
         @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS)
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
index 05a40895b7934..e6381329e4ec6 100644
--- a/test/reinterpretarray.jl
+++ b/test/reinterpretarray.jl
@@ -40,9 +40,8 @@ end
 @test_throws ArgumentError("cannot reinterpret `Vector{Int32}` as `Int32`, type `Vector{Int32}` is not a bits type") reinterpret(Int32, Av)
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int64` array to `Int32` which is of a different size") reinterpret(Int32, reshape([Int64(0)]))
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int32` array to `Int64` which is of a different size") reinterpret(Int64, reshape([Int32(0)]))
-@test_throws ArgumentError("""cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`.
-                              The resulting array would have non-integral first dimension.
-                              """) reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
+@test_throws ArgumentError("cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`."*
+                              " The resulting array would have a non-integral first dimension.") reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
 
 @test_throws ArgumentError("`reinterpret(reshape, Complex{Int64}, a)` where `eltype(a)` is Int64 requires that `axes(a, 1)` (got Base.OneTo(4)) be equal to 1:2 (from the ratio of element sizes)") reinterpret(reshape, Complex{Int64}, A)
 @test_throws ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got 24) and `sizeof(eltype(a))` (got 16) be an integer multiple of the other") reinterpret(reshape, NTuple{3, Int64}, B)
@@ -588,3 +587,23 @@ end
 
     @test_throws ArgumentError reinterpret(Tuple{Int32, Int64}, (Int16(1), Int64(4)))
 end
+
+let R = reinterpret(Float32, ComplexF32[1.0f0+2.0f0*im, 4.0f0+3.0f0*im])
+    @test !isassigned(R, 0)
+    @test isassigned(R, 1)
+    @test isassigned(R, 4)
+    @test isassigned(R, Int8(2), Int16(1), Int32(1), Int64(1))
+    @test !isassigned(R, 1, 2)
+    @test !isassigned(R, 5)
+    @test Array(R)::Vector{Float32} == [1.0f0, 2.0f0, 4.0f0, 3.0f0]
+end
+
+let R = reinterpret(reshape, Float32, ComplexF32[1.0f0+2.0f0*im, 4.0f0+3.0f0*im])
+    @test !isassigned(R, 0)
+    @test isassigned(R, 1)
+    @test isassigned(R, 4)
+    @test isassigned(R, Int8(2), Int16(2), Int32(1), Int64(1))
+    @test !isassigned(R, 1, 1, 2)
+    @test !isassigned(R, 5)
+    @test Array(R)::Matrix{Float32} == [1.0f0 4.0f0; 2.0f0 3.0f0]
+end
diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl
index b2a539ac330d3..039d422c35e25 100644
--- a/test/relocatedepot.jl
+++ b/test/relocatedepot.jl
@@ -17,10 +17,12 @@ function test_harness(@nospecialize(fn); empty_load_path=true, empty_depot_path=
     end
 end
 
-# We test relocation with three dummy pkgs:
-# - RelocationTestPkg1 - no include_dependency
-# - RelocationTestPkg2 - with include_dependency tracked by `mtime`
-# - RelocationTestPkg3 - with include_dependency tracked by content
+# We test relocation with these dummy pkgs:
+# - RelocationTestPkg1 - pkg with no include_dependency
+# - RelocationTestPkg2 - pkg with include_dependency tracked by `mtime`
+# - RelocationTestPkg3 - pkg with include_dependency tracked by content
+# - RelocationTestPkg4 - pkg with no dependencies; will be compiled such that the pkgimage is
+#                        not relocatable, but no repeated recompilation happens upon loading
 
 if !test_relocated_depot
 
@@ -78,8 +80,10 @@ if !test_relocated_depot
             cachefiles = Base.find_all_in_cache_path(pkg)
             rm.(cachefiles, force=true)
             @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
             Base.require(pkg)
             @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == true
         end
     end
 
@@ -93,10 +97,12 @@ if !test_relocated_depot
             rm.(cachefiles, force=true)
             rm(joinpath(@__DIR__, pkgname, "src", "foodir"), force=true, recursive=true)
             @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
             touch(joinpath(@__DIR__, pkgname, "src", "foo.txt"))
             mkdir(joinpath(@__DIR__, pkgname, "src", "foodir"))
             Base.require(pkg)
             @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
         end
     end
 
@@ -110,10 +116,33 @@ if !test_relocated_depot
             rm.(cachefiles, force=true)
             rm(joinpath(@__DIR__, pkgname, "src", "bardir"), force=true, recursive=true)
             @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
             touch(joinpath(@__DIR__, pkgname, "src", "bar.txt"))
             mkdir(joinpath(@__DIR__, pkgname, "src", "bardir"))
             Base.require(pkg)
             @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "precompile RelocationTestPkg4" begin
+        # test for #52346 and https://github.com/JuliaLang/julia/issues/53859#issuecomment-2027352004
+        # If a pkgimage is not relocatable, no repeated precompilation should occur.
+        pkgname = "RelocationTestPkg4"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            # skip this dir to make the pkgimage not relocatable
+            filter!(DEPOT_PATH) do depot
+                !startswith(@__DIR__, depot)
+            end
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == false
         end
     end
 
@@ -202,6 +231,7 @@ else
             # stdlib should be already precompiled
             pkg = Base.identify_package("DelimitedFiles")
             @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
         end
     end
 
@@ -213,6 +243,7 @@ else
             push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
             pkg = Base.identify_package(pkgname)
             @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
         end
     end
 
@@ -224,10 +255,13 @@ else
             push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
             pkg = Base.identify_package(pkgname)
             @test Base.isprecompiled(pkg) == false # moving depot changes mtime of include_dependency
+            @test Base.isrelocatable(pkg) == false # because not precompiled
             Base.require(pkg)
             @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
             touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg2", "src", "foodir", "foofoo"))
             @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
         end
     end
 
@@ -235,12 +269,27 @@ else
         pkgname = "RelocationTestPkg3"
         test_harness() do
             push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
-            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
             push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
             pkg = Base.identify_package(pkgname)
             @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
             touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg3", "src", "bardir", "barbar"))
             @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+        end
+    end
+
+    @testset "load RelocationTestPkg4 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg4"
+        test_harness() do
+            push!(LOAD_PATH, @__DIR__, "relocatedepot")
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            # precompiled but not relocatable
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == false
         end
     end
 
diff --git a/test/rounding.jl b/test/rounding.jl
index 045c834e63013..76b15ec1d9118 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -458,3 +458,15 @@ end
     @test_throws InexactError round(Int128, -Inf16)
     # More comprehensive testing is present in test/floatfuncs.jl
 end
+
+@testset "floor(<:AbstractFloat, large_number) (#52355)" begin
+    @test floor(Float32, 0xffff_ffff) == prevfloat(2f0^32) <= 0xffff_ffff
+    @test trunc(Float16, typemax(UInt128)) == floatmax(Float16)
+    @test round(Float16, typemax(UInt128)) == Inf16
+    for i in [-BigInt(floatmax(Float64)), -BigInt(floatmax(Float64))*100, BigInt(floatmax(Float64)), BigInt(floatmax(Float64))*100]
+        f = ceil(Float64, i)
+        @test f >= i
+        @test isinteger(f) || isinf(f)
+        @test prevfloat(f) < i
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7c6fa8480d98e..c46472ac93fa8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,7 +11,7 @@ include("choosetests.jl")
 include("testenv.jl")
 include("buildkitetestjson.jl")
 
-using .BuildKiteTestJSON
+using .BuildkiteTestJSON
 
 (; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
 tests = unique(tests)
diff --git a/test/scopedvalues.jl b/test/scopedvalues.jl
index 39de22bc2bee5..61b10c557c455 100644
--- a/test/scopedvalues.jl
+++ b/test/scopedvalues.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-import Base: ScopedValues
+
+using Base.ScopedValues
+
+include("compiler/irutils.jl")
 
 @testset "errors" begin
     @test ScopedValue{Float64}(1)[] == 1.0
@@ -52,6 +55,16 @@ emptyf() = nothing
 @testset "conversion" begin
     with(emptyf, sval_float=>2)
     @test_throws MethodError with(emptyf, sval_float=>"hello")
+    a = ScopedValue(1)
+    with(a => 2.0) do
+        @test a[] == 2
+        @test a[] isa Int
+    end
+    a = ScopedValue(1.0)
+    with(a => 2) do
+        @test a[] == 2.0
+        @test a[] isa Float64
+    end
 end
 
 import Base.Threads: @spawn
@@ -67,13 +80,13 @@ import Base.Threads: @spawn
 end
 
 @testset "show" begin
-    @test sprint(show, ScopedValue{Int}()) == "ScopedValue{$Int}(undefined)"
-    @test sprint(show, sval) == "ScopedValue{$Int}(1)"
+    @test sprint(show, ScopedValue{Int}()) == "Base.ScopedValues.ScopedValue{$Int}(undefined)"
+    @test sprint(show, sval) == "Base.ScopedValues.ScopedValue{$Int}(1)"
     @test sprint(show, Core.current_scope()) == "nothing"
     with(sval => 2.0) do
-        @test sprint(show, sval) == "ScopedValue{$Int}(2)"
+        @test sprint(show, sval) == "Base.ScopedValues.ScopedValue{$Int}(2)"
         objid = sprint(show, Base.objectid(sval))
-        @test sprint(show, Core.current_scope()) == "Base.ScopedValues.Scope(ScopedValue{$Int}@$objid => 2)"
+        @test sprint(show, Core.current_scope()) == "Base.ScopedValues.Scope(Base.ScopedValues.ScopedValue{$Int}@$objid => 2)"
     end
 end
 
@@ -150,3 +163,9 @@ end
 let code = code_typed(with_macro_slot_cross)[1][1].code
     @test !any(x->isa(x, Core.PhiCNode), code)
 end
+
+# inline constant scoped values
+const inlineable_const_sv = ScopedValue(1)
+@test fully_eliminated(; retval=(inlineable_const_sv => 1)) do
+    inlineable_const_sv => 1
+end
diff --git a/test/secretbuffer.jl b/test/secretbuffer.jl
index 29e28ded8da72..703552570745c 100644
--- a/test/secretbuffer.jl
+++ b/test/secretbuffer.jl
@@ -1,5 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt
+
 using Base: SecretBuffer, SecretBuffer!, shred!, isshredded
 using Test, Random
 
@@ -170,4 +174,29 @@ using Test, Random
         @test read(s5) == read(s6) == codeunits(str)
         shred!(s5); shred!(s6)
     end
+
+    if !Sys.iswindows()
+        @testset "getpass" begin
+            v1, s1 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I"); (read(s), Base.shred!(s))), ["LPAwVZM8D4I: " => "too many secrets\n"])
+            s2 = SecretBuffer("too many secrets")
+            @test s1 isa SecretBuffer
+            @test isshredded(s1)
+            @test v1 == read(s2) == codeunits("too many secrets")
+            shred!(s1); shred!(s2)
+
+            v3, s3 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=false); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> " => "frperg\n"])
+            s4 = SecretBuffer("frperg")
+            @test s3 isa SecretBuffer
+            @test isshredded(s3)
+            @test v3 == read(s4) == codeunits("frperg")
+            shred!(s3); shred!(s4)
+
+            v5, s5 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=true); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> : " => "frperg\n"])
+            s6 = SecretBuffer("frperg")
+            @test s5 isa SecretBuffer
+            @test isshredded(s5)
+            @test v5 == read(s6) == codeunits("frperg")
+            shred!(s5); shred!(s6)
+        end
+    end
 end
diff --git a/test/show.jl b/test/show.jl
index 8d6b32a3fd663..d9c3585b7c1df 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -755,6 +755,69 @@ end
 
 @test startswith(sprint(show, typeof(x->x), context = :module=>@__MODULE__), "var\"")
 
+# PR 53719
+module M53719
+    f = x -> x + 1
+    function foo(x)
+        function bar(y)
+            function baz(z)
+                return x + y + z
+            end
+            return baz
+        end
+        return bar
+    end
+    function foo2(x)
+        function bar2(y)
+            return z -> x + y + z
+        end
+        return bar2
+    end
+    lambda1 = (x)->begin
+        function foo(y)
+            return x + y
+        end
+        return foo
+    end
+    lambda2 = (x)->begin
+        y -> x + y
+    end
+end
+
+@testset "PR 53719 function names" begin
+    # M53719.f should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.f, context = :module=>M53719))
+    # M53719.foo(1) should be printed as var"#bar"
+    @test occursin(r"var\"#bar", sprint(show, M53719.foo(1), context = :module=>M53719))
+    # M53719.foo(1)(2) should be printed as var"#baz"
+    @test occursin(r"var\"#baz", sprint(show, M53719.foo(1)(2), context = :module=>M53719))
+    # M53719.foo2(1) should be printed as var"#bar2"
+    @test occursin(r"var\"#bar2", sprint(show, M53719.foo2(1), context = :module=>M53719))
+    # M53719.foo2(1)(2) should be printed as var"#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+", sprint(show, M53719.foo2(1)(2), context = :module=>M53719))
+    # M53719.lambda1(1) should be printed as var"#foo"
+    @test occursin(r"var\"#foo", sprint(show, M53719.lambda1(1), context = :module=>M53719))
+    # M53719.lambda2(1) should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.lambda2(1), context = :module=>M53719))
+end
+
+@testset "PR 53719 function types" begin
+    # typeof(M53719.f) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.f), context = :module=>M53719))
+    #typeof(M53719.foo(1)) should be printed as var"#bar#foo##[0-9]+"
+    @test occursin(r"var\"#bar#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)), context = :module=>M53719))
+    #typeof(M53719.foo(1)(2)) should be printed as var"#baz#foo##[0-9]+"
+    @test occursin(r"var\"#baz#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)(2)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)) should be printed as var"#bar2#foo2##[0-9]+"
+    @test occursin(r"var\"#bar2#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)(2)) should be printed as var"#foo2##[0-9]+#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)(2)), context = :module=>M53719))
+    #typeof(M53719.lambda1(1)) should be printed as var"#foo#[0-9]+"
+    @test occursin(r"var\"#foo#[0-9]+", sprint(show, typeof(M53719.lambda1(1)), context = :module=>M53719))
+    #typeof(M53719.lambda2(1)) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.lambda2(1)), context = :module=>M53719))
+end
+
 #test methodshow.jl functions
 @test Base.inbase(Base)
 @test !Base.inbase(LinearAlgebra)
@@ -1291,7 +1354,7 @@ end
               repr == "Union{String, $(curmod_prefix)M30442.T}"
     end
     let repr = sprint(dump, Ptr{UInt8}(UInt(1)))
-        @test repr == "Ptr{UInt8} @$(Base.repr(UInt(1)))\n"
+        @test repr == "Ptr{UInt8}($(Base.repr(UInt(1))))\n"
     end
     let repr = sprint(dump, Core.svec())
         @test repr == "empty SimpleVector\n"
@@ -1381,6 +1444,7 @@ test_repr("(:).a")
 @test repr(@NamedTuple{kw::@NamedTuple{kw2::Int64}}) == "@NamedTuple{kw::@NamedTuple{kw2::Int64}}"
 @test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}"
 @test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}"
+@test repr(@NamedTuple{var"#"::Int64}) == "@NamedTuple{var\"#\"::Int64}"
 
 # Test general printing of `Base.Pairs` (it should not use the `@Kwargs` macro syntax)
 @test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Tuple{Symbol}, @NamedTuple{init::$Int}}"
@@ -2075,7 +2139,7 @@ eval(Meta._parse_string("""function my_fun28173(x)
 end""", "a"^80, 1, 1, :statement)[1]) # use parse to control the line numbers
 let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
-    fill!(src.codelocs, 0) # IRCode printing is only capable of printing partial line info
+    src.debuginfo = Core.DebugInfo(src.debuginfo.def) # IRCode printing defaults to incomplete line info printing, so turn it off completely for CodeInfo too
     let source_slotnames = String["my_fun28173", "x"],
         repr_ir = split(repr(ir, context = :SOURCE_SLOTNAMES=>source_slotnames), '\n'),
         repr_ir = "CodeInfo(\n" * join((l[4:end] for l in repr_ir), "\n") * ")" # remove line numbers
@@ -2682,3 +2746,30 @@ end
 using .Issue49382
 (::Type{Issue49382.Type49382})() = 1
 @test sprint(show, methods(Issue49382.Type49382)) isa String
+
+# Showing of bad SlotNumber in Expr(:toplevel)
+let lowered = Meta.lower(Main, Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(x = 1)), :(y = 1))))
+    ci = lowered.args[1]
+    @assert isa(ci, Core.CodeInfo)
+    @test !isempty(ci.slotnames)
+    @assert ci.code[1].head === :toplevel
+    ci.code[1].args[1] = :($(Core.SlotNumber(1)) = 1)
+    # Check that this gets printed as `_1 = 1` not `y = 1`
+    @test contains(sprint(show, ci), "_1 = 1")
+end
+
+# Pointers should be reprable
+@test is_juliarepr(pointer([1]))
+@test is_juliarepr(Ptr{Vector{Complex{Float16}}}(UInt(0xdeadbeef)))
+
+# Toplevel MethodInstance with undef :uninferred
+let topmi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    topmi.specTypes = Tuple{}
+    topmi.def = Main
+    @test contains(repr(topmi), "Toplevel MethodInstance")
+end
+
+@testset "show(<do-block expr>) no trailing whitespace" begin
+    do_expr1 = :(foo() do; bar(); end)
+    @test !contains(sprint(show, do_expr1), " \n")
+end
diff --git a/test/some.jl b/test/some.jl
index e49fc586a3a6e..89f699d8306c3 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -44,11 +44,37 @@
 
 ##  == and isequal nothing
 
-@test Some(1) != nothing
-@test Some(nothing) != nothing
+@test Some(1) !== nothing
+@test Some(nothing) !== nothing
 @test !isequal(Some(1), nothing)
 @test !isequal(Some(nothing), nothing)
 
+# Some with something else is false
+@test !=(Some(nothing), nothing)
+@test !=(nothing, Some(nothing))
+
+# Two `Some`s forward to their wrapped things
+@test ==(Some([0x1]), Some([1]))
+
+# propagate wrapped missings
+@test !=(Some(1), Some(missing)) isa Missing
+@test !=(Some(missing), Some(1)) isa Missing
+@test ==(Some(missing), Some(missing)) isa Missing
+
+# Make sure to still propagate non-wrapped Missing
+@test ==(Some(1), missing) isa Missing
+@test ==(missing, Some(1)) isa Missing
+
+@test isequal(Some([0x1]), Some([1]))
+@test !isequal(missing, Some(missing))
+@test !isequal(Some(missing), missing)
+@test isequal(Some(missing), Some(missing))
+
+# hashing implications
+@test hash(Some(0x1)) != hash(0x1)
+@test hash(Some(0x1)) == hash(Some(1))
+@test hash((Some(1),)) != hash((1, Some))
+
 @testset "something" begin
     @test_throws ArgumentError something()
     @test something(1) === 1
diff --git a/test/sorting.jl b/test/sorting.jl
index d1875c9727a29..2714197f58823 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -12,7 +12,7 @@ using .Main.OffsetArrays
 @testset "Base.Sort docstrings" begin
     undoc = Docs.undocumented_names(Base.Sort)
     @test_broken isempty(undoc)
-    @test undoc == [:Algorithm, :SMALL_ALGORITHM, :SMALL_THRESHOLD, :Sort]
+    @test undoc == [:Algorithm, :SMALL_THRESHOLD, :Sort]
 end
 
 @testset "Order" begin
diff --git a/test/spawn.jl b/test/spawn.jl
index 831eac493d4aa..c1802ba1f74da 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -573,7 +573,7 @@ end
 @test Cmd(`foo`, env=["A=true"]).env      == ["A=true"]
 @test Cmd(`foo`, env=("A"=>true,)).env    == ["A=true"]
 @test Cmd(`foo`, env=["A"=>true]).env     == ["A=true"]
-@test Cmd(`foo`, env=nothing).env         == nothing
+@test Cmd(`foo`, env=nothing).env         === nothing
 
 # test for interpolation of Cmd
 let c = setenv(`x`, "A"=>true)
diff --git a/test/specificity.jl b/test/specificity.jl
index 816a59f63e193..13688036c2047 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -316,3 +316,14 @@ end
 @test args_morespecific(Tuple{typeof(Union{}), Any}, Tuple{Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any}, Tuple{Type{Union{}}, Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any, Type{Union{}}}, Tuple{Type{Union{}}, Any, Type{Union{}}, Type{Union{}}})
+
+# requires assertions enabled
+let root = NTuple
+    N = root.var
+    T = root.body.var
+    x1 = root.body.body
+    x2 = Dict{T,Tuple{N}}
+    A = UnionAll(N, UnionAll(T, Tuple{Union{x1, x2}}))
+    B = Tuple{Union{UnionAll(N, UnionAll(T, x1)), UnionAll(N, UnionAll(T, x2))}}
+    @ccall jl_type_morespecific_no_subtype(A::Any, B::Any)::Cint
+end
diff --git a/test/stacktraces.jl b/test/stacktraces.jl
index 69b3b71d03167..bc86479dbab4b 100644
--- a/test/stacktraces.jl
+++ b/test/stacktraces.jl
@@ -102,11 +102,9 @@ for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false
 end
 end
 
-let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo,
-    li = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ()),
-    sf
-
-    setfield!(li, :uninferred, src, :monotonic)
+let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo
+    li = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ())
+    @atomic li.cache = ccall(:jl_new_codeinst_for_uninferred, Ref{Core.CodeInstance}, (Any, Any), li, src)
     li.specTypes = Tuple{}
     li.def = @__MODULE__
     sf = StackFrame(:a, :b, 3, li, false, false, 0)
diff --git a/test/staged.jl b/test/staged.jl
index 76d02c1938e4d..aec4a3bf135d3 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# N.B.: This file is also run from interpreter.jl, so needs to be standalone-executable
+using Test
+
 using Random
 using InteractiveUtils: code_llvm, code_native
 
@@ -200,7 +203,7 @@ let gf_err2
     @test_throws Expected gf_err2(code_typed)
     @test_throws Expected gf_err2(code_llvm)
     @test_throws Expected gf_err2(code_native)
-    @test gf_err_ref[] == 88
+    @test gf_err_ref[] < 1000
 end
 
 # issue #15043
@@ -337,13 +340,15 @@ let world = Base.get_world_counter()
     match = Base._which(Tuple{typeof(sin), Int}; world)
     mi = Core.Compiler.specialize_method(match)
     lwr = Core.Compiler.retrieve_code_info(mi, world)
-    @test all(lin->lin.method === :sin, lwr.linetable)
+    nstmts = length(lwr.code)
+    di = Core.DebugInfo(Core.Compiler.DebugInfoStream(mi, lwr.debuginfo, nstmts), nstmts)
+    lwr.debuginfo = di
     @eval function sin_generated(a)
         $(Expr(:meta, :generated, Returns(lwr)))
         $(Expr(:meta, :generated_only))
     end
     src = only(code_lowered(sin_generated, (Int,)))
-    @test all(lin->lin.method === :sin, src.linetable)
+    @test src.debuginfo === di
     @test sin_generated(42) == sin(42)
 end
 
@@ -362,7 +367,7 @@ let
     nstmts = length(src.code)
     nslots = 1
     src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
+    src.debuginfo = Core.DebugInfo(:f_unreachable_generated)
     src.ssaflags = fill(Int32(0), nstmts)
     src.slotflags = fill(0, nslots)
     src.slottypes = Any[Any]
@@ -375,3 +380,61 @@ let
     ir, _ = Base.code_ircode(f_unreachable, ()) |> only
     @test length(ir.cfg.blocks) == 1
 end
+
+# Test that `Core.CachedGenerator` works as expected
+struct Generator54916 <: Core.CachedGenerator end
+function (::Generator54916)(world::UInt, source::LineNumberNode, args...)
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(:doit54916, :func, :arg), Core.svec())
+    return stub(world, source, :(func(arg)))
+end
+@eval function doit54916(func, arg)
+    $(Expr(:meta, :generated, Generator54916()))
+    $(Expr(:meta, :generated_only))
+end
+@test doit54916(sin, 1) == sin(1)
+let mi = only(methods(doit54916)).specializations
+    ci = mi.cache::Core.CodeInstance
+    found = false
+    while true
+        if ci.owner === :uninferred && ci.inferred isa Core.CodeInfo
+            found = true
+            break
+        end
+        isdefined(ci, :next) || break
+        ci = ci.next
+    end
+    @test found
+end
+
+# Test that writing a bad cassette-style pass gives the expected error (#49715)
+function generator49715(world, source, self, f, tt)
+    tt = tt.parameters[1]
+    sig = Tuple{f, tt.parameters...}
+    mi = Base._which(sig; world)
+    error("oh no")
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
+    stub(world, source, :(nothing))
+end
+@eval function doit49715(f, tt)
+    $(Expr(:meta, :generated, generator49715))
+    $(Expr(:meta, :generated_only))
+end
+@test_throws "oh no" doit49715(sin, Tuple{Int})
+
+# Test that the CodeInfo returned from generated function need not match the generator.
+function overdubbee54341(a, b)
+    a + b
+end
+const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1]
+function overdub_generator54341(world::UInt, source::LineNumberNode, args...)
+    if length(args) != 2
+        :(error("Wrong number of arguments"))
+    else
+        return copy(overdubee_codeinfo54341)
+    end
+end
+@eval function overdub54341(args...)
+    $(Expr(:meta, :generated, overdub_generator54341))
+    $(Expr(:meta, :generated_only))
+end
+@test overdub54341(1, 2) == 3
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
index fda583bf7f778..ee53c3d5846eb 100644
--- a/test/strings/annotated.jl
+++ b/test/strings/annotated.jl
@@ -5,13 +5,24 @@
     @test str == Base.AnnotatedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[])
     @test length(str) == 11
     @test ncodeunits(str) == 11
+    @test codeunits(str) == codeunits("some string")
+    @test codeunit(str) == UInt8
+    @test codeunit(str, 1) == codeunit("some string", 1)
+    @test firstindex(str) == firstindex("some string")
+    @test convert(Base.AnnotatedString, str) === str
     @test eltype(str) == Base.AnnotatedChar{eltype(str.string)}
     @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[])
     @test str[1:4] isa SubString{typeof(str)}
     @test str[1:4] == Base.AnnotatedString("some")
+    big_byte_str = Base.AnnotatedString("आख")
+    @test_throws StringIndexError big_byte_str[5]
     @test "a" * str == Base.AnnotatedString("asome string")
     @test str * "a" == Base.AnnotatedString("some stringa")
     @test str * str == Base.AnnotatedString("some stringsome string")
+    @test cmp(str, "some stringy thingy") == -1
+    @test cmp("some stringy thingy", str) == 1
+    @test str[3:4] == SubString("me")
+    @test SubString("me") == str[3:4]
     Base.annotate!(str, 1:4, :thing => 0x01)
     Base.annotate!(str, 6:11, :other => 0x02)
     Base.annotate!(str, 1:11, :all => 0x03)
@@ -21,13 +32,15 @@
     #  └───┰─────┘
     #     :all
     @test str[3:4] == SubString(str, 3, 4)
+    @test str[3:4] != SubString("me")
+    @test SubString("me") != str[3:4]
     @test Base.AnnotatedString(str[3:4]) ==
         Base.AnnotatedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)])
     @test Base.AnnotatedString(str[3:6]) ==
-        Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (1:4, :all => 0x03), (4:4, :other => 0x02)])
-    @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
+        Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (4:4, :other => 0x02), (1:4, :all => 0x03)])
+    @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (6:11, :other => 0x02), (1:11, :all => 0x03)])
     @test str != Base.AnnotatedString("some string")
-    @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (6:6, :other => 0x02), (11:11, :all => 0x03)])
+    @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (1:11, :all => 0x03), (6:6, :other => 0x02)])
     @test str != Base.AnnotatedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (6:11, :other => 0x12)])
     @test str != Base.AnnotatedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
     @test Base.AnnotatedString([Base.AnnotatedChar('a', [:a => 1]), Base.AnnotatedChar('b', [:b => 2])]) ==
@@ -51,18 +64,20 @@
     # @test collect(Base.eachstyle(str)) ==
     #     [("some", [:thing => 0x01, :all => 0x03]),
     #     (" string", [:all => 0x03, :other => 0x02])]
-    @test ==(Base.annotatedstring_optimize!(
-        Base.AnnotatedString("abc", [(1:1, :val => 1),
-                             (2:2, :val => 2),
-                             (2:2, :val => 1),
-                             (3:3, :val => 2)])),
-             Base.AnnotatedString("abc", [(1:2, :val => 1),
-                                  (2:3, :val => 2)]))
+    @test chopprefix(sprint(show, str), "Base.") ==
+        "AnnotatedString{String}(\"some string\", [(1:4, :thing => 0x01), (6:11, :other => 0x02), (1:11, :all => 0x03)])"
+    @test eval(Meta.parse(repr(str))) == str
+    @test sprint(show, MIME("text/plain"), str) == "\"some string\""
 end
 
 @testset "AnnotatedChar" begin
     chr = Base.AnnotatedChar('c')
+    @test Base.AnnotatedChar(UInt32('c')) == chr
+    @test convert(Base.AnnotatedChar, chr) === chr
     @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[])
+    @test uppercase(chr) == Base.AnnotatedChar('C')
+    @test titlecase(chr) == Base.AnnotatedChar('C')
+    @test lowercase(Base.AnnotatedChar('C')) == chr
     str = Base.AnnotatedString("hmm", [(1:1, :attr => "h0h0"),
                                (1:2, :attr => "h0m1"),
                                (2:3, :attr => "m1m2")])
@@ -100,6 +115,8 @@ end
                      [(1:4, :label => 5),
                       (5:5, :label => 2),
                       (6:9, :label => 5)])
+    @test join((String(str1), str1), ' ') ==
+        Base.AnnotatedString("test test", [(6:9, :label => 5)])
     @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label => 5)])
     @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label => "oomph"),
                                                        (6:7, :label => "oomph")])
@@ -108,6 +125,33 @@ end
     @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")])
 end
 
+@testset "Unicode" begin
+    for words in (["ᲃase", "cɦɒnɡeȿ", "can", "CHⱯNGE", "Сodeunıts"],
+                  ["Сodeunıts", "ᲃase", "cɦɒnɡeȿ", "can", "CHⱯNGE"])
+        ann_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i => i)])
+                     for (i, w) in enumerate(words)]
+        ann_str = join(ann_words, '-')
+        for transform in (lowercase, uppercase, titlecase)
+            t_words = map(transform, words)
+            ann_t_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i => i)])
+                        for (i, w) in enumerate(t_words)]
+            ann_t_str = join(ann_t_words, '-')
+            t_ann_str = transform(ann_str)
+            @test String(ann_t_str) == String(t_ann_str)
+            @test Base.annotations(ann_t_str) == Base.annotations(t_ann_str)
+        end
+        for transform in (uppercasefirst, lowercasefirst)
+            t_words = vcat(transform(first(words)), words[2:end])
+            ann_t_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i => i)])
+                        for (i, w) in enumerate(t_words)]
+            ann_t_str = join(ann_t_words, '-')
+            t_ann_str = transform(ann_str)
+            @test String(ann_t_str) == String(t_ann_str)
+            @test Base.annotations(ann_t_str) == Base.annotations(t_ann_str)
+        end
+    end
+end
+
 @testset "AnnotatedIOBuffer" begin
     aio = Base.AnnotatedIOBuffer()
     # Append-only writing
@@ -118,8 +162,8 @@ end
     # Check `annotate!`, including region sorting
     @test truncate(aio, 0).io.size == 0
     @test write(aio, "hello world") == ncodeunits("hello world")
-    @test Base.annotate!(aio, 7:11, :tag => 2) === aio
     @test Base.annotate!(aio, 1:5, :tag => 1) === aio
+    @test Base.annotate!(aio, 7:11, :tag => 2) === aio
     @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)]
     # Reading
     @test read(seekstart(deepcopy(aio.io)), String) == "hello world"
@@ -147,24 +191,55 @@ end
     @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged
     @test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5
     @test read(seekstart(aio), String) == "hey-o alice"
-    @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced
+    @test Base.annotations(aio) == [(7:11, :tag => 2), (1:5, :hey => 'o')] # First annotation should have been entirely replaced
     @test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie')
     @test read(seekstart(aio), String) == "hey-o abbie"
-    @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
+    @test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (1:5, :hey => 'o'), (8:10, :hey => 'a')]
     @test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
     @test read(seekstart(aio), String) == "aby-o abbie"
-    @test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
+    @test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (3:5, :hey => 'o'), (8:10, :hey => 'a')]
     @test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
     @test read(seekstart(aio), String) == "abyss abbie"
-    @test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
+    @test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (3:3, :hey => 'o'), (8:10, :hey => 'a')]
     # Writing one buffer to another
     newaio = Base.AnnotatedIOBuffer()
     @test write(newaio, seekstart(aio)) == 11
     @test read(seekstart(newaio), String) == "abyss abbie"
     @test Base.annotations(newaio) == Base.annotations(aio)
     @test write(seek(newaio, 5), seek(aio, 5)) == 6
-    @test Base.annotations(newaio) == Base.annotations(aio)
+    @test sort(Base.annotations(newaio)) == sort(Base.annotations(aio))
     @test write(newaio, seek(aio, 5)) == 6
     @test read(seekstart(newaio), String) == "abyss abbie abbie"
-    @test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])
+    @test sort(Base.annotations(newaio)) == sort(vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)]))
+    # The `_insert_annotations!` cautious-merging optimisation
+    aio = Base.AnnotatedIOBuffer()
+    @test write(aio, Base.AnnotatedChar('a', [:a => 1, :b => 2])) == 1
+    @test Base.annotations(aio) == [(1:1, :a => 1), (1:1, :b => 2)]
+    @test write(aio, Base.AnnotatedChar('b', [:a => 1, :b => 2])) == 1
+    @test Base.annotations(aio) == [(1:2, :a => 1), (1:2, :b => 2)]
+    let aio2 = copy(aio) # A different start makes merging too risky to do.
+        @test write(aio2, Base.AnnotatedChar('c', [:a => 0, :b => 2])) == 1
+        @test Base.annotations(aio2) == [(1:2, :a => 1), (1:2, :b => 2), (3:3, :a => 0), (3:3, :b => 2)]
+    end
+    let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though.
+        @test write(aio2, Base.AnnotatedChar('c', [:b => 2])) == 1
+        @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2)]
+    end
+    let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over.
+        @test write(aio2, Base.AnnotatedChar('c', [:b => 2, :c => 3, :d => 4])) == 1
+        @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2), (3:3, :c => 3), (3:3, :d => 4)]
+    end
+    let aio2 = Base.AnnotatedIOBuffer()
+        @test write(aio2, Base.AnnotatedChar('a', [:b => 1])) == 1
+        @test write(aio2, Base.AnnotatedChar('b', [:a => 1, :b => 1])) == 1
+        @test read(seekstart(aio2), Base.AnnotatedString) ==
+            Base.AnnotatedString("ab", [(1:1, :b => 1), (2:2, :a => 1), (2:2, :b => 1)])
+    end
+    # Working through an IOContext
+    aio = Base.AnnotatedIOBuffer()
+    wrapio = IOContext(aio)
+    @test write(wrapio, Base.AnnotatedString("hey", [(1:3, :x => 1)])) == 3
+    @test write(wrapio, Base.AnnotatedChar('a', [:y => 2])) == 1
+    @test read(seekstart(aio), Base.AnnotatedString) ==
+        Base.AnnotatedString("heya", [(1:3, :x => 1), (4:4, :y => 2)])
 end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index 87d812c5bf201..874607f3c1b20 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -1235,6 +1235,8 @@ end
         @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
     end
     @test_throws ArgumentError Symbol("a\0a")
+
+    @test Base._string_n_override == Core.Compiler.encode_effects_override(Base.compute_assumed_settings((:total, :(!:consistent))))
 end
 
 @testset "Ensure UTF-8 DFA can never leave invalid state" begin
@@ -1388,3 +1390,22 @@ end
         end
     end
 end
+
+@testset "transcode" begin
+    # string starting with an ASCII character
+    str_1 = "zβγ"
+    # string starting with a 2 byte UTF-8 character
+    str_2 = "αβγ"
+    # string starting with a 3 byte UTF-8 character
+    str_3 = "आख"
+    # string starting with a 4 byte UTF-8 character
+    str_4 = "𒃵𒃰"
+    @testset for str in (str_1, str_2, str_3, str_4)
+        @test transcode(String, str) === str
+        @test transcode(String, transcode(UInt16, str)) == str
+        @test transcode(String, transcode(UInt16, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(Int32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt8, transcode(UInt16, str))) == str
+    end
+end
diff --git a/test/strings/io.jl b/test/strings/io.jl
index f1fe0c24e8aea..209844580b3cd 100644
--- a/test/strings/io.jl
+++ b/test/strings/io.jl
@@ -165,6 +165,11 @@
     @test Base.escape_raw_string(raw"some\"string\\", '`') == "some\"string\\\\"
     @test Base.escape_raw_string(raw"some\"string") == "some\\\"string"
     @test Base.escape_raw_string(raw"some`string", '`') == "some\\`string"
+
+    # ascii and fullhex flags:
+    @test escape_string("\u00e4\u00f6\u00fc") == "\u00e4\u00f6\u00fc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true) == "\\ue4\\uf6\\ufc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true, fullhex=true) == "\\u00e4\\u00f6\\u00fc"
 end
 @testset "join()" begin
     @test join([]) == join([],",") == ""
@@ -339,3 +344,8 @@ end
 @testset "`string` return types" begin
     @test all(T -> T <: AbstractString, Base.return_types(string))
 end
+
+@testset "type stable `join` (#55389)" begin
+    itr = ("foo" for _ in 1:100)
+    @test Base.return_types(join, (typeof(itr),))[] == String
+end
diff --git a/test/strings/search.jl b/test/strings/search.jl
index d328168bfa466..c43327fe2971b 100644
--- a/test/strings/search.jl
+++ b/test/strings/search.jl
@@ -4,389 +4,409 @@
 astr = "Hello, world.\n"
 u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
 
-# I think these should give error on 4 also, and "" is not treated
-# consistently with SubString("",1,1), nor with Char[]
-for ind in (0, 5)
-    @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
-    @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
-end
+@testset "BoundsError for findnext/findprev" begin
+    # I think these should give error on 4 also, and "" is not treated
+    # consistently with SubString("",1,1), nor with Char[]
+    for ind in (0, 5)
+        @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
+        @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
+    end
 
-# Note: the commented out test will be enabled after fixes to make
-# sure that findnext/findprev are consistent
-# no matter what type of AbstractString the second argument is
-@test_throws BoundsError findnext(isequal('a'), "foo", 0)
-@test_throws BoundsError findnext(in(Char[]), "foo", 5)
-# @test_throws BoundsError findprev(in(Char[]), "foo", 0)
-@test_throws BoundsError findprev(in(Char[]), "foo", 5)
+    # Note: the commented out test will be enabled after fixes to make
+    # sure that findnext/findprev are consistent
+    # no matter what type of AbstractString the second argument is
+    @test_throws BoundsError findnext(isequal('a'), "foo", 0)
+    @test_throws BoundsError findnext(in(Char[]), "foo", 5)
+    # @test_throws BoundsError findprev(in(Char[]), "foo", 0)
+    @test_throws BoundsError findprev(in(Char[]), "foo", 5)
 
-# @test_throws ErrorException in("foobar","bar")
-@test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+    # @test_throws ErrorException in("foobar","bar")
+    @test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+end
 
-# ascii forward search
-for str in [astr, GenericString(astr)]
+@testset "ascii forward search $(typeof(str))" for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('x'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∀'), str) == nothing
+    @test findfirst(isequal('x'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∀'), str) === nothing
     @test findfirst(isequal('H'), str) == 1
     @test findfirst(isequal('l'), str) == 3
     @test findnext(isequal('l'), str, 4) == 4
     @test findnext(isequal('l'), str, 5) == 11
-    @test findnext(isequal('l'), str, 12) == nothing
+    @test findnext(isequal('l'), str, 12) === nothing
     @test findfirst(isequal(','), str) == 6
-    @test findnext(isequal(','), str, 7) == nothing
+    @test findnext(isequal(','), str, 7) === nothing
     @test findfirst(isequal('\n'), str) == 14
-    @test findnext(isequal('\n'), str, 15) == nothing
+    @test findnext(isequal('\n'), str, 15) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
-end
 
-for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext('z', str, 0)
     @test_throws BoundsError findnext('∀', str, 0)
-    @test findfirst('x', str) == nothing
-    @test findfirst('\0', str) == nothing
-    @test findfirst('\u80', str) == nothing
-    @test findfirst('∀', str) == nothing
+    @test findfirst('x', str) === nothing
+    @test findfirst('\0', str) === nothing
+    @test findfirst('\u80', str) === nothing
+    @test findfirst('∀', str) === nothing
     @test findfirst('H', str) == 1
     @test findfirst('l', str) == 3
     @test findfirst('e', str) == 2
-    @test findfirst('u', str) == nothing
+    @test findfirst('u', str) === nothing
     @test findnext('l', str, 4) == 4
     @test findnext('l', str, 5) == 11
-    @test findnext('l', str, 12) == nothing
+    @test findnext('l', str, 12) === nothing
     @test findfirst(',', str) == 6
-    @test findnext(',', str, 7) == nothing
+    @test findnext(',', str, 7) === nothing
     @test findfirst('\n', str) == 14
-    @test findnext('\n', str, 15) == nothing
+    @test findnext('\n', str, 15) === nothing
     @test_throws BoundsError findnext('ε', str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext('a', str, nextind(str,lastindex(str))+1)
 end
 
-# ascii backward search
-for str in [astr]
-    @test findlast(isequal('x'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∀'), str) == nothing
+@testset "ascii backward search" begin
+    str = astr
+    @test findlast(isequal('x'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∀'), str) === nothing
     @test findlast(isequal('H'), str) == 1
-    @test findprev(isequal('H'), str, 0) == nothing
+    @test findprev(isequal('H'), str, 0) === nothing
     @test findlast(isequal('l'), str) == 11
     @test findprev(isequal('l'), str, 5) == 4
     @test findprev(isequal('l'), str, 4) == 4
     @test findprev(isequal('l'), str, 3) == 3
-    @test findprev(isequal('l'), str, 2) == nothing
+    @test findprev(isequal('l'), str, 2) === nothing
     @test findlast(isequal(','), str) == 6
-    @test findprev(isequal(','), str, 5) == nothing
+    @test findprev(isequal(','), str, 5) === nothing
     @test findlast(isequal('\n'), str) == 14
-end
 
-for str in [astr]
-    @test findlast('x', str) == nothing
-    @test findlast('\0', str) == nothing
-    @test findlast('\u80', str) == nothing
-    @test findlast('∀', str) == nothing
+    @test findlast('x', str) === nothing
+    @test findlast('\0', str) === nothing
+    @test findlast('\u80', str) === nothing
+    @test findlast('∀', str) === nothing
     @test findlast('H', str) == 1
-    @test findprev('H', str, 0) == nothing
+    @test findprev('H', str, 0) === nothing
     @test findlast('l', str) == 11
     @test findprev('l', str, 5) == 4
     @test findprev('l', str, 4) == 4
     @test findprev('l', str, 3) == 3
-    @test findprev('l', str, 2) == nothing
+    @test findprev('l', str, 2) === nothing
     @test findlast(',', str) == 6
-    @test findprev(',', str, 5) == nothing
-    @test findlast(str, "") == nothing
-    @test findlast(str^2, str) == nothing
+    @test findprev(',', str, 5) === nothing
+    @test findlast(str, "") === nothing
+    @test findlast(str^2, str) === nothing
     @test findlast('\n', str) == 14
 end
 
-# utf-8 forward search
-for str in (u8str, GenericString(u8str))
+@testset "utf-8 forward search $(typeof(str))" for str in (u8str, GenericString(u8str))
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('z'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∄'), str) == nothing
+    @test findfirst(isequal('z'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∄'), str) === nothing
     @test findfirst(isequal('∀'), str) == 1
     @test_throws StringIndexError findnext(isequal('∀'), str, 2)
-    @test findnext(isequal('∀'), str, 4) == nothing
+    @test findnext(isequal('∀'), str, 4) === nothing
     @test findfirst(isequal('∃'), str) == 13
     @test_throws StringIndexError findnext(isequal('∃'), str, 15)
-    @test findnext(isequal('∃'), str, 16) == nothing
+    @test findnext(isequal('∃'), str, 16) === nothing
     @test findfirst(isequal('x'), str) == 26
     @test findnext(isequal('x'), str, 27) == 43
-    @test findnext(isequal('x'), str, 44) == nothing
+    @test findnext(isequal('x'), str, 44) === nothing
     @test findfirst(isequal('δ'), str) == 17
     @test_throws StringIndexError findnext(isequal('δ'), str, 18)
     @test findnext(isequal('δ'), str, nextind(str,17)) == 33
-    @test findnext(isequal('δ'), str, nextind(str,33)) == nothing
+    @test findnext(isequal('δ'), str, nextind(str,33)) === nothing
     @test findfirst(isequal('ε'), str) == 5
     @test findnext(isequal('ε'), str, nextind(str,5)) == 54
-    @test findnext(isequal('ε'), str, nextind(str,54)) == nothing
-    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) == nothing
-    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) == nothing
+    @test findnext(isequal('ε'), str, nextind(str,54)) === nothing
+    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) === nothing
+    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
 end
 
-# utf-8 backward search
-for str in [u8str]
-    @test findlast(isequal('z'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∄'), str) == nothing
+@testset "utf-8 backward search" begin
+    str = u8str
+    @test findlast(isequal('z'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∄'), str) === nothing
     @test findlast(isequal('∀'), str) == 1
-    @test findprev(isequal('∀'), str, 0) == nothing
+    @test findprev(isequal('∀'), str, 0) === nothing
     @test findlast(isequal('∃'), str) == 13
     @test findprev(isequal('∃'), str, 14) == 13
     @test findprev(isequal('∃'), str, 13) == 13
-    @test findprev(isequal('∃'), str, 12) == nothing
+    @test findprev(isequal('∃'), str, 12) === nothing
     @test findlast(isequal('x'), str) == 43
     @test findprev(isequal('x'), str, 42) == 26
-    @test findprev(isequal('x'), str, 25) == nothing
+    @test findprev(isequal('x'), str, 25) === nothing
     @test findlast(isequal('δ'), str) == 33
     @test findprev(isequal('δ'), str, 32) == 17
-    @test findprev(isequal('δ'), str, 16) == nothing
+    @test findprev(isequal('δ'), str, 16) === nothing
     @test findlast(isequal('ε'), str) == 54
     @test findprev(isequal('ε'), str, 53) == 5
-    @test findprev(isequal('ε'), str, 4) == nothing
+    @test findprev(isequal('ε'), str, 4) === nothing
+end
+
+@testset "string forward search with a single-char string" begin
+    @test findfirst("x", astr) === nothing
+    @test findfirst("H", astr) == 1:1
+    @test findnext("H", astr, 2) === nothing
+    @test findfirst("l", astr) == 3:3
+    @test findnext("l", astr, 4) == 4:4
+    @test findnext("l", astr, 5) == 11:11
+    @test findnext("l", astr, 12) === nothing
+    @test findfirst("\n", astr) == 14:14
+    @test findnext("\n", astr, 15) === nothing
+
+    @test findfirst("z", u8str) === nothing
+    @test findfirst("∄", u8str) === nothing
+    @test findfirst("∀", u8str) == 1:1
+    @test findnext("∀", u8str, 4) === nothing
+    @test findfirst("∃", u8str) == 13:13
+    @test findnext("∃", u8str, 16) === nothing
+    @test findfirst("x", u8str) == 26:26
+    @test findnext("x", u8str, 27) == 43:43
+    @test findnext("x", u8str, 44) === nothing
+    @test findfirst("ε", u8str) == 5:5
+    @test findnext("ε", u8str, 7) == 54:54
+    @test findnext("ε", u8str, 56) === nothing
 end
 
-# string forward search with a single-char string
-@test findfirst("x", astr) == nothing
-@test findfirst("H", astr) == 1:1
-@test findnext("H", astr, 2) == nothing
-@test findfirst("l", astr) == 3:3
-@test findnext("l", astr, 4) == 4:4
-@test findnext("l", astr, 5) == 11:11
-@test findnext("l", astr, 12) == nothing
-@test findfirst("\n", astr) == 14:14
-@test findnext("\n", astr, 15) == nothing
-
-@test findfirst("z", u8str) == nothing
-@test findfirst("∄", u8str) == nothing
-@test findfirst("∀", u8str) == 1:1
-@test findnext("∀", u8str, 4) == nothing
-@test findfirst("∃", u8str) == 13:13
-@test findnext("∃", u8str, 16) == nothing
-@test findfirst("x", u8str) == 26:26
-@test findnext("x", u8str, 27) == 43:43
-@test findnext("x", u8str, 44) == nothing
-@test findfirst("ε", u8str) == 5:5
-@test findnext("ε", u8str, 7) == 54:54
-@test findnext("ε", u8str, 56) == nothing
-
-# strifindprev  backward search with a single-char string
-@test findlast("x", astr) == nothing
-@test findlast("H", astr) == 1:1
-@test findprev("H", astr, 2) == 1:1
-@test findprev("H", astr, 0) == nothing
-@test findlast("l", astr) == 11:11
-@test findprev("l", astr, 10) == 4:4
-@test findprev("l", astr, 4) == 4:4
-@test findprev("l", astr, 3) == 3:3
-@test findprev("l", astr, 2) == nothing
-@test findlast("\n", astr) == 14:14
-@test findprev("\n", astr, 13) == nothing
-
-@test findlast("z", u8str) == nothing
-@test findlast("∄", u8str) == nothing
-@test findlast("∀", u8str) == 1:1
-@test findprev("∀", u8str, 0) == nothing
-#TODO: setting the limit in the middle of a wide char
-#      makes findnext fail but findprev succeed.
-#      Should findprev fail as well?
-#@test findprev("∀", u8str, 2) == nothing # gives 1:3
-@test findlast("∃", u8str) == 13:13
-@test findprev("∃", u8str, 12) == nothing
-@test findlast("x", u8str) == 43:43
-@test findprev("x", u8str, 42) == 26:26
-@test findprev("x", u8str, 25) == nothing
-@test findlast("ε", u8str) == 54:54
-@test findprev("ε", u8str, 53) == 5:5
-@test findprev("ε", u8str, 4) == nothing
-
-# string forward search with a single-char regex
-@test findfirst(r"x", astr) == nothing
-@test findfirst(r"H", astr) == 1:1
-@test findnext(r"H", astr, 2) == nothing
-@test findfirst(r"l", astr) == 3:3
-@test findnext(r"l", astr, 4) == 4:4
-@test findnext(r"l", astr, 5) == 11:11
-@test findnext(r"l", astr, 12) == nothing
-@test findfirst(r"\n", astr) == 14:14
-@test findnext(r"\n", astr, 15) == nothing
-@test findfirst(r"z", u8str) == nothing
-@test findfirst(r"∄", u8str) == nothing
-@test findfirst(r"∀", u8str) == 1:1
-@test findnext(r"∀", u8str, 4) == nothing
-@test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
-@test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
-@test findfirst(r"∃", u8str) == 13:13
-@test findnext(r"∃", u8str, 16) == nothing
-@test findfirst(r"x", u8str) == 26:26
-@test findnext(r"x", u8str, 27) == 43:43
-@test findnext(r"x", u8str, 44) == nothing
-@test findfirst(r"ε", u8str) == 5:5
-@test findnext(r"ε", u8str, 7) == 54:54
-@test findnext(r"ε", u8str, 56) == nothing
-for i = 1:lastindex(astr)
-    @test findnext(r"."s, astr, i) == i:i
+@testset "findprev backward search with a single-char string" begin
+    @test findlast("x", astr) === nothing
+    @test findlast("H", astr) == 1:1
+    @test findprev("H", astr, 2) == 1:1
+    @test findprev("H", astr, 0) === nothing
+    @test findlast("l", astr) == 11:11
+    @test findprev("l", astr, 10) == 4:4
+    @test findprev("l", astr, 4) == 4:4
+    @test findprev("l", astr, 3) == 3:3
+    @test findprev("l", astr, 2) === nothing
+    @test findlast("\n", astr) == 14:14
+    @test findprev("\n", astr, 13) === nothing
+
+    @test findlast("z", u8str) === nothing
+    @test findlast("∄", u8str) === nothing
+    @test findlast("∀", u8str) == 1:1
+    @test findprev("∀", u8str, 0) === nothing
+    #TODO: setting the limit in the middle of a wide char
+    #      makes findnext fail but findprev succeed.
+    #      Should findprev fail as well?
+    #@test findprev("∀", u8str, 2) === nothing # gives 1:3
+    @test findlast("∃", u8str) == 13:13
+    @test findprev("∃", u8str, 12) === nothing
+    @test findlast("x", u8str) == 43:43
+    @test findprev("x", u8str, 42) == 26:26
+    @test findprev("x", u8str, 25) === nothing
+    @test findlast("ε", u8str) == 54:54
+    @test findprev("ε", u8str, 53) == 5:5
+    @test findprev("ε", u8str, 4) === nothing
 end
-for i = 1:lastindex(u8str)
-    if isvalid(u8str,i)
-        @test findnext(r"."s, u8str, i) == i:i
+
+@testset "string forward search with a single-char regex" begin
+    @test findfirst(r"x", astr) === nothing
+    @test findfirst(r"H", astr) == 1:1
+    @test findnext(r"H", astr, 2) === nothing
+    @test findfirst(r"l", astr) == 3:3
+    @test findnext(r"l", astr, 4) == 4:4
+    @test findnext(r"l", astr, 5) == 11:11
+    @test findnext(r"l", astr, 12) === nothing
+    @test findfirst(r"\n", astr) == 14:14
+    @test findnext(r"\n", astr, 15) === nothing
+    @test findfirst(r"z", u8str) === nothing
+    @test findfirst(r"∄", u8str) === nothing
+    @test findfirst(r"∀", u8str) == 1:1
+    @test findnext(r"∀", u8str, 4) === nothing
+    @test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
+    @test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
+    @test findfirst(r"∃", u8str) == 13:13
+    @test findnext(r"∃", u8str, 16) === nothing
+    @test findfirst(r"x", u8str) == 26:26
+    @test findnext(r"x", u8str, 27) == 43:43
+    @test findnext(r"x", u8str, 44) === nothing
+    @test findfirst(r"ε", u8str) == 5:5
+    @test findnext(r"ε", u8str, 7) == 54:54
+    @test findnext(r"ε", u8str, 56) === nothing
+    for i = 1:lastindex(astr)
+        @test findnext(r"."s, astr, i) == i:i
+    end
+    for i = 1:lastindex(u8str)
+        if isvalid(u8str,i)
+            @test findnext(r"."s, u8str, i) == i:i
+        end
     end
 end
 
-# string forward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findnext("", astr, i) == i:i-1
+@testset "string forward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findnext("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findnext("", u8str, i) == i:i-1
+    end
+    @test findfirst("", "") === 1:0
 end
-for i = 1:lastindex(u8str)
-    @test findnext("", u8str, i) == i:i-1
+
+@testset "string backward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findprev("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findprev("", u8str, i) == i:i-1
+    end
+    @test findlast("", "") === 1:0
 end
-@test findfirst("", "") === 1:0
 
-# string backward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findprev("", astr, i) == i:i-1
+@testset "string forward search with a zero-char regex" begin
+    for i = 1:lastindex(astr)
+        @test findnext(r"", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        # TODO: should regex search fast-forward invalid indices?
+        if isvalid(u8str,i)
+            @test findnext(r"", u8str, i) == i:i-1
+        end
+    end
 end
-for i = 1:lastindex(u8str)
-    @test findprev("", u8str, i) == i:i-1
+
+# See the comments in #54579
+@testset "Search for invalid chars" begin
+    @test findfirst(==('\xff'), "abc\xffde") == 4
+    @test findprev(isequal('\xa6'), "abc\xa69", 5) == 4
+    @test isnothing(findfirst(==('\xff'), "abcdeæd"))
+
+    @test isnothing(findnext(==('\xa6'), "æ", 1))
+    @test isnothing(findprev(==('\xa6'), "æa", 2))
 end
-@test findlast("", "") === 1:0
 
-# string forward search with a zero-char regex
-for i = 1:lastindex(astr)
-    @test findnext(r"", astr, i) == i:i-1
+@testset "string forward search with a two-char string literal" begin
+    @test findfirst("xx", "foo,bar,baz") === nothing
+    @test findfirst("fo", "foo,bar,baz") == 1:2
+    @test findnext("fo", "foo,bar,baz", 3) === nothing
+    @test findfirst("oo", "foo,bar,baz") == 2:3
+    @test findnext("oo", "foo,bar,baz", 4) === nothing
+    @test findfirst("o,", "foo,bar,baz") == 3:4
+    @test findnext("o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(",b", "foo,bar,baz") == 4:5
+    @test findnext(",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(",b", "foo,bar,baz", 10) === nothing
+    @test findfirst("az", "foo,bar,baz") == 10:11
+    @test findnext("az", "foo,bar,baz", 12) === nothing
 end
-for i = 1:lastindex(u8str)
-    # TODO: should regex search fast-forward invalid indices?
-    if isvalid(u8str,i)
-        @test findnext(r"", u8str, i) == i:i-1
-    end
+
+@testset "issue #9365" begin
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "ééé") == 1:3
+    @test findnext("éé", "ééé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€€") == 1:4
+    @test findnext("€€", "€€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
+
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "éé") == 1:3
+    @test findnext("éé", "éé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€") == 1:4
+    @test findnext("€€", "€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "ééé") == 3:5
+    @test findprev("éé", "ééé", lastindex("ééé")) == 3:5
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€€") == 4:7
+    @test findprev("€€", "€€€", lastindex("€€€")) == 4:7
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "éé") == 1:3        # should really be 1:4!
+    @test findprev("éé", "éé", lastindex("ééé")) == 1:3
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€") == 1:4        # should really be 1:6!
+    @test findprev("€€", "€€", lastindex("€€€")) == 1:4
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
+end
+
+@testset "string backward search with a two-char string literal" begin
+    @test findlast("xx", "foo,bar,baz") === nothing
+    @test findlast("fo", "foo,bar,baz") == 1:2
+    @test findprev("fo", "foo,bar,baz", 1) === nothing
+    @test findlast("oo", "foo,bar,baz") == 2:3
+    @test findprev("oo", "foo,bar,baz", 2) === nothing
+    @test findlast("o,", "foo,bar,baz") == 3:4
+    @test findprev("o,", "foo,bar,baz", 1) === nothing
+    @test findlast(",b", "foo,bar,baz") == 8:9
+    @test findprev(",b", "foo,bar,baz", 6) == 4:5
+    @test findprev(",b", "foo,bar,baz", 3) === nothing
+    @test findlast("az", "foo,bar,baz") == 10:11
+    @test findprev("az", "foo,bar,baz", 10) === nothing
+end
+
+@testset "string search with a two-char regex" begin
+    @test findfirst(r"xx", "foo,bar,baz") === nothing
+    @test findfirst(r"fo", "foo,bar,baz") == 1:2
+    @test findnext(r"fo", "foo,bar,baz", 3) === nothing
+    @test findfirst(r"oo", "foo,bar,baz") == 2:3
+    @test findnext(r"oo", "foo,bar,baz", 4) === nothing
+    @test findfirst(r"o,", "foo,bar,baz") == 3:4
+    @test findnext(r"o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(r",b", "foo,bar,baz") == 4:5
+    @test findnext(r",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(r",b", "foo,bar,baz", 10) === nothing
+    @test findfirst(r"az", "foo,bar,baz") == 10:11
+    @test findnext(r"az", "foo,bar,baz", 12) === nothing
+end
+
+@testset "occursin/contains" begin
+    # occursin with a String and Char needle
+    @test occursin("o", "foo")
+    @test occursin('o', "foo")
+    # occursin in curried form
+    @test occursin("foo")("o")
+    @test occursin("foo")('o')
+
+    # contains
+    @test contains("foo", "o")
+    @test contains("foo", 'o')
+    # contains in curried form
+    @test contains("o")("foo")
+    @test contains('o')("foo")
+
+    @test_throws ErrorException "ab" ∈ "abc"
+end
+
+@testset "issue #15723" begin
+    @test findfirst(isequal('('), "⨳(") == 4
+    @test findnext(isequal('('), "(⨳(", 2) == 5
+    @test findlast(isequal('('), "(⨳(") == 5
+    @test findprev(isequal('('), "(⨳(", 2) == 1
+
+    @test @inferred findall(isequal('a'), "éa") == [3]
+    @test @inferred findall(isequal('€'), "€€") == [1, 4]
+    @test @inferred isempty(findall(isequal('é'), ""))
+end
+
+
+@testset "issue #18109" begin
+    s_18109 = "fooα🐨βcd3"
+    @test findlast(isequal('o'), s_18109) == 3
+    @test findfirst(isequal('d'), s_18109) == 13
 end
 
-# string forward search with a two-char string literal
-@test findfirst("xx", "foo,bar,baz") == nothing
-@test findfirst("fo", "foo,bar,baz") == 1:2
-@test findnext("fo", "foo,bar,baz", 3) == nothing
-@test findfirst("oo", "foo,bar,baz") == 2:3
-@test findnext("oo", "foo,bar,baz", 4) == nothing
-@test findfirst("o,", "foo,bar,baz") == 3:4
-@test findnext("o,", "foo,bar,baz", 5) == nothing
-@test findfirst(",b", "foo,bar,baz") == 4:5
-@test findnext(",b", "foo,bar,baz", 6) == 8:9
-@test findnext(",b", "foo,bar,baz", 10) == nothing
-@test findfirst("az", "foo,bar,baz") == 10:11
-@test findnext("az", "foo,bar,baz", 12) == nothing
-
-# issue #9365
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "ééé") == 1:3
-@test findnext("éé", "ééé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€€") == 1:4
-@test findnext("€€", "€€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
-
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "éé") == 1:3
-@test findnext("éé", "éé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€") == 1:4
-@test findnext("€€", "€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "ééé") == 3:5
-@test findprev("éé", "ééé", lastindex("ééé")) == 3:5
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€€") == 4:7
-@test findprev("€€", "€€€", lastindex("€€€")) == 4:7
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "éé") == 1:3        # should really be 1:4!
-@test findprev("éé", "éé", lastindex("ééé")) == 1:3
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€") == 1:4        # should really be 1:6!
-@test findprev("€€", "€€", lastindex("€€€")) == 1:4
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
-
-# string backward search with a two-char string literal
-@test findlast("xx", "foo,bar,baz") == nothing
-@test findlast("fo", "foo,bar,baz") == 1:2
-@test findprev("fo", "foo,bar,baz", 1) == nothing
-@test findlast("oo", "foo,bar,baz") == 2:3
-@test findprev("oo", "foo,bar,baz", 2) == nothing
-@test findlast("o,", "foo,bar,baz") == 3:4
-@test findprev("o,", "foo,bar,baz", 1) == nothing
-@test findlast(",b", "foo,bar,baz") == 8:9
-@test findprev(",b", "foo,bar,baz", 6) == 4:5
-@test findprev(",b", "foo,bar,baz", 3) == nothing
-@test findlast("az", "foo,bar,baz") == 10:11
-@test findprev("az", "foo,bar,baz", 10) == nothing
-
-# string search with a two-char regex
-@test findfirst(r"xx", "foo,bar,baz") == nothing
-@test findfirst(r"fo", "foo,bar,baz") == 1:2
-@test findnext(r"fo", "foo,bar,baz", 3) == nothing
-@test findfirst(r"oo", "foo,bar,baz") == 2:3
-@test findnext(r"oo", "foo,bar,baz", 4) == nothing
-@test findfirst(r"o,", "foo,bar,baz") == 3:4
-@test findnext(r"o,", "foo,bar,baz", 5) == nothing
-@test findfirst(r",b", "foo,bar,baz") == 4:5
-@test findnext(r",b", "foo,bar,baz", 6) == 8:9
-@test findnext(r",b", "foo,bar,baz", 10) == nothing
-@test findfirst(r"az", "foo,bar,baz") == 10:11
-@test findnext(r"az", "foo,bar,baz", 12) == nothing
-
-# occursin with a String and Char needle
-@test occursin("o", "foo")
-@test occursin('o', "foo")
-# occursin in curried form
-@test occursin("foo")("o")
-@test occursin("foo")('o')
-
-# contains
-@test contains("foo", "o")
-@test contains("foo", 'o')
-# contains in curried form
-@test contains("o")("foo")
-@test contains('o')("foo")
-
-@test_throws ErrorException "ab" ∈ "abc"
-
-# issue #15723
-@test findfirst(isequal('('), "⨳(") == 4
-@test findnext(isequal('('), "(⨳(", 2) == 5
-@test findlast(isequal('('), "(⨳(") == 5
-@test findprev(isequal('('), "(⨳(", 2) == 1
-
-@test @inferred findall(isequal('a'), "éa") == [3]
-@test @inferred findall(isequal('€'), "€€") == [1, 4]
-@test @inferred isempty(findall(isequal('é'), ""))
-
-# issue #18109
-s_18109 = "fooα🐨βcd3"
-@test findlast(isequal('o'), s_18109) == 3
-@test findfirst(isequal('d'), s_18109) == 13
-
-# findall (issue #31788)
-@testset "findall" begin
+@testset "findall (issue #31788)" begin
     @test findall("fooo", "foo") == UnitRange{Int}[]
     @test findall("ing", "Spinning laughing dancing") == [6:8, 15:17, 23:25]
     @test all(findall("", "foo") .=== [1:0, 2:1, 3:2, 4:3]) # use === to compare empty ranges
@@ -395,13 +415,29 @@ s_18109 = "fooα🐨βcd3"
     @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6]
 end
 
+@testset "Findall char in string" begin
+    @test findall(==('w'), "wabcwewwawk") == [1, 5, 7, 8, 10]
+    @test isempty(findall(isequal("w"), "abcde!,"))
+    @test findall(==('读'), "联国读大会一九四二月十读日第号决通过并颁布读") == [7, 34, 64]
+
+    # Empty string
+    @test isempty(findall(isequal('K'), ""))
+    @test isempty(findall(isequal('α'), ""))
+
+    # Finds an invalid char ONLY if it's at a char boundary in the string,
+    # i.e. iterating the string would emit the given char.
+    @test findall(==('\xfe'), "abκæøc\xfeα\xfeβå!") == [10, 13]
+    @test isempty(findall(==('\xaf'), "abκæ读α\xe8\xaf\xfeβå!"))
+    @test isempty(findall(==('\xc3'), ";æ"))
+end
+
 # issue 37280
 @testset "UInt8, Int8 vector" begin
     for T in [Int8, UInt8], VT in [Int8, UInt8]
         A = T[0x40, 0x52, 0x00, 0x52, 0x00]
 
         for A in (A, @view(A[1:end]), codeunits(String(copyto!(Vector{UInt8}(undef,5), A))))
-            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) == nothing
+            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) === nothing
             @test findfirst(VT[0x52], A) === 2:2
             @test findfirst(==(VT(0x52)), A) === 2
             @test findlast(VT[0x30], A) === findlast(==(VT(0x30)), A) === nothing
@@ -429,6 +465,45 @@ end
             @test_throws BoundsError findprev(pattern, A, -3)
         end
     end
+
+    @test findall([0x01, 0x02], [0x03, 0x01, 0x02, 0x01, 0x02, 0x06]) == [2:3, 4:5]
+    @test isempty(findall([0x04, 0x05], [0x03, 0x04, 0x06]))
+end
+
+# Issue 54578
+@testset "No conflation of Int8 and UInt8" begin
+    # Work for mixed types if the values are the same
+    @test findfirst(==(Int8(1)), [0x01]) == 1
+    @test findnext(iszero, Int8[0, -2, 0, -3], 2) == 3
+    @test findfirst(Int8[1,4], UInt8[0, 2, 4, 1, 8, 1, 4, 2]) == 6:7
+    @test findprev(UInt8[5, 6], Int8[1, 9, 2, 5, 6, 3], 6) == 4:5
+
+    # Returns nothing for the same methods if the values are different,
+    # even if the bitpatterns are the same
+    @test isnothing(findfirst(==(Int8(-1)), [0xff]))
+    @test isnothing(findnext(isequal(0xff), Int8[-1, -2, -1], 2))
+    @test isnothing(findfirst(UInt8[0xff, 0xfe], Int8[0, -1, -2, 1, 8, 1, 4, 2]))
+    @test isnothing(findprev(UInt8[0xff, 0xfe], Int8[1, 9, 2, -1, -2, 3], 6))
+end
+
+@testset "DenseArray with offsets" begin
+    isdefined(Main, :OffsetDenseArrays) || @eval Main include("../testhelpers/OffsetDenseArrays.jl")
+    OffsetDenseArrays = Main.OffsetDenseArrays
+
+    A = OffsetDenseArrays.OffsetDenseArray(collect(0x61:0x69), 100)
+    @test findfirst(==(0x61), A) == 101
+    @test findlast(==(0x61), A) == 101
+    @test findfirst(==(0x00), A) === nothing
+
+    @test findfirst([0x62, 0x63, 0x64], A) == 102:104
+    @test findlast([0x63, 0x64], A) == 103:104
+    @test findall([0x62, 0x63], A) == [102:103]
+
+    @test findfirst(iszero, A) === nothing
+    A = OffsetDenseArrays.OffsetDenseArray([0x01, 0x02, 0x00, 0x03], -100)
+    @test findfirst(iszero, A) == -97
+    @test findnext(==(0x02), A, -99) == -98
+    @test findnext(==(0x02), A, -97) === nothing
 end
 
 # issue 32568
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 771be253b1ec9..c09652c3a608d 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -2,196 +2,211 @@
 
 ## SubString and Cstring tests ##
 
-## SubString tests ##
-u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
-u8str2 = u8str^2
-len_u8str = length(u8str)
-slen_u8str = length(u8str)
-len_u8str2 = length(u8str2)
-slen_u8str2 = length(u8str2)
-
-@test len_u8str2 == 2 * len_u8str
-@test slen_u8str2 == 2 * slen_u8str
-
-u8str2plain = String(u8str2)
-
-for i1 = 1:length(u8str2)
-    if !isvalid(u8str2, i1); continue; end
-    for i2 = i1:length(u8str2)
-        if !isvalid(u8str2, i2); continue; end
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test u8str2[i1:i2] == u8str2plain[i1:i2]
+@testset "SubString" begin
+    u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+    u8str2 = u8str^2
+    len_u8str = length(u8str)
+    slen_u8str = length(u8str)
+    len_u8str2 = length(u8str2)
+    slen_u8str2 = length(u8str2)
+
+    @test len_u8str2 == 2 * len_u8str
+    @test slen_u8str2 == 2 * slen_u8str
+
+    u8str2plain = String(u8str2)
+    @test !isascii(u8str2)
+    @test cmp(u8str2, u8str^3) == -1
+    @test cmp(u8str2, u8str2)  == 0
+    @test cmp(u8str^3, u8str2) == 1
+    @test codeunit(u8str2) == codeunit(u8str2plain)
+
+    @test convert(Union{String, SubString{String}}, u8str2)      === u8str2
+    @test convert(Union{String, SubString{String}}, u8str2plain) === u8str2plain
+
+    for i1 = 1:ncodeunits(u8str2)
+        if !isvalid(u8str2, i1); continue; end
+        for i2 = i1:ncodeunits(u8str2)
+            if !isvalid(u8str2, i2); continue; end
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test u8str2[i1:i2] == u8str2plain[i1:i2]
+        end
     end
-end
 
-# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
-# gives the same result as `getindex` (except that it is a view not a copy)
-for idx in 0:1
-    @test SubString("∀", 1, idx) == "∀"[1:idx]
-end
+    # tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
+    # gives the same result as `getindex` (except that it is a view not a copy)
+    for idx in 0:1
+        @test SubString("∀", 1, idx) == "∀"[1:idx]
+    end
 
-# Substring provided with invalid end index throws BoundsError
-@test_throws StringIndexError SubString("∀", 1, 2)
-@test_throws StringIndexError SubString("∀", 1, 3)
-@test_throws BoundsError SubString("∀", 1, 4)
-
-# Substring provided with invalid start index throws BoundsError
-@test SubString("∀∀", 1:1) == "∀"
-@test SubString("∀∀", 1:4) == "∀∀"
-@test SubString("∀∀", 4:4) == "∀"
-@test_throws StringIndexError SubString("∀∀", 1:2)
-@test_throws StringIndexError SubString("∀∀", 1:5)
-@test_throws StringIndexError SubString("∀∀", 2:4)
-@test_throws BoundsError SubString("∀∀", 0:1)
-@test_throws BoundsError SubString("∀∀", 0:4)
-@test_throws BoundsError SubString("∀∀", 1:7)
-@test_throws BoundsError SubString("∀∀", 4:7)
-
-# tests for SubString of more than one multibyte `Char` string
-# we are consistent with `getindex` for `String`
-for idx in [0, 1, 4]
-    @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
-    @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
-end
+    @testset "invalid end index" begin
+        # Substring provided with invalid end index throws BoundsError
+        @test_throws StringIndexError SubString("∀", 1, 2)
+        @test_throws StringIndexError SubString("∀", 1, 3)
+        @test_throws BoundsError SubString("∀", 1, 4)
+    end
 
-# index beyond lastindex("∀∀")
-for idx in [2:3; 5:6]
-    @test_throws StringIndexError SubString("∀∀", 1, idx)
-end
-for idx in 7:8
-    @test_throws BoundsError SubString("∀∀", 1, idx)
-end
+    @testset "invalid start index" begin
+        # Substring provided with invalid start index throws BoundsError
+        @test SubString("∀∀", 1:1) == "∀"
+        @test SubString("∀∀", 1:4) == "∀∀"
+        @test SubString("∀∀", 4:4) == "∀"
+        @test_throws StringIndexError SubString("∀∀", 1:2)
+        @test_throws StringIndexError SubString("∀∀", 1:5)
+        @test_throws StringIndexError SubString("∀∀", 2:4)
+        @test_throws BoundsError SubString("∀∀", 0:1)
+        @test_throws BoundsError SubString("∀∀", 0:4)
+        @test_throws BoundsError SubString("∀∀", 1:7)
+        @test_throws BoundsError SubString("∀∀", 4:7)
+    end
 
-let str="tempus fugit"              #length(str)==12
-    ss=SubString(str,1,lastindex(str)) #match source string
-    @test length(ss)==length(str)
+    # tests for SubString of more than one multibyte `Char` string
+    # we are consistent with `getindex` for `String`
+    for idx in [0, 1, 4]
+        @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
+        @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
+    end
 
-    ss=SubString(str,1:lastindex(str))
-    @test length(ss)==length(str)
+    @testset "index beyond lastindex(\"∀∀\")" begin
+        for idx in [2:3; 5:6]
+            @test_throws StringIndexError SubString("∀∀", 1, idx)
+        end
+        for idx in 7:8
+            @test_throws BoundsError SubString("∀∀", 1, idx)
+        end
+    end
 
-    ss=SubString(str,1,0)    #empty SubString
-    @test length(ss)==0
+    let str="tempus fugit"              #length(str)==12
+        ss=SubString(str,1,lastindex(str)) #match source string
+        @test length(ss)==length(str)
 
-    ss=SubString(str,1:0)
-    @test length(ss)==0
+        ss=SubString(str,1:lastindex(str))
+        @test length(ss)==length(str)
 
-    @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
-    @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
+        ss=SubString(str,1,0)    #empty SubString
+        @test length(ss)==0
 
-    @test_throws BoundsError SubString("", 1, 4)  #empty source string
-    @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
-    @test_throws BoundsError SubString("", 10, 12)
-    @test SubString("", 12, 10) == ""
-end
+        ss=SubString(str,1:0)
+        @test length(ss)==0
 
-@test SubString("foobar", big(1), big(3)) == "foo"
-
-let str = "aa\u2200\u2222bb"
-    u = SubString(str, 3, 6)
-    @test length(u) == 2
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == "\u2200\u2222"
-
-    @test_throws StringIndexError SubString(str, 4, 5)
-    @test_throws BoundsError iterate(u, 0)
-    @test_throws BoundsError iterate(u, 8)
-    @test_throws BoundsError getindex(u, 0)
-    @test_throws BoundsError getindex(u, 7)
-    @test_throws BoundsError getindex(u, 0:1)
-    @test_throws BoundsError getindex(u, 7:7)
-    @test reverseind(u, 1) == 4
-    @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
-    @test Base.cconvert(Ptr{Int8}, u) == u
-end
+        @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
+        @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
 
-let str = "føøbar"
-    @test_throws BoundsError SubString(str, 10, 10)
-    u = SubString(str, 4, 3)
-    @test length(u) == 0
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == ""
-end
+        @test_throws BoundsError SubString("", 1, 4)  #empty source string
+        @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
+        @test_throws BoundsError SubString("", 10, 12)
+        @test SubString("", 12, 10) == ""
+    end
 
-# search and SubString (issue #5679)
-let str = "Hello, world!"
-    u = SubString(str, 1, 5)
-    @test findlast("World", u) == nothing
-    @test findlast(isequal('z'), u) == nothing
-    @test findlast("ll", u) == 3:4
-end
+    @test SubString("foobar", big(1), big(3)) == "foo"
+
+    let str = "aa\u2200\u2222bb"
+        u = SubString(str, 3, 6)
+        @test length(u) == 2
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == "\u2200\u2222"
+
+        @test_throws StringIndexError SubString(str, 4, 5)
+        @test_throws BoundsError iterate(u, 0)
+        @test_throws BoundsError iterate(u, 8)
+        @test_throws BoundsError getindex(u, 0)
+        @test_throws BoundsError getindex(u, 7)
+        @test_throws BoundsError getindex(u, 0:1)
+        @test_throws BoundsError getindex(u, 7:7)
+        @test reverseind(u, 1) == 4
+        @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
+        @test Base.cconvert(Ptr{Int8}, u) == u
+    end
 
-# SubString created from SubString
-let str = "Hello, world!"
-    u = SubString(str, 2, 5)
-    for idx in 1:4
-        @test SubString(u, 2, idx) == u[2:idx]
-        @test SubString(u, 2:idx) == u[2:idx]
+    let str = "føøbar"
+        @test_throws BoundsError SubString(str, 10, 10)
+        u = SubString(str, 4, 3)
+        @test length(u) == 0
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == ""
     end
-    @test_throws BoundsError SubString(u, 1, 10)
-    @test_throws BoundsError SubString(u, 1:10)
-    @test_throws BoundsError SubString(u, 20:30)
-    @test SubString(u, 20:15) == ""
-    @test_throws BoundsError SubString(u, -1:10)
-    @test SubString(u, -1, -10) == ""
-    @test SubString(SubString("123", 1, 2), -10, -20) == ""
-end
 
-# sizeof
-@test sizeof(SubString("abc\u2222def",4,4)) == 3
-
-# issue #3710
-@test prevind(SubString("{var}",2,4),4) == 3
-
-# issue #4183
-@test split(SubString("x", 2, 0), "y") == [""]
-
-# issue #6772
-@test parse(Float64, SubString("10",1,1)) === 1.0
-@test parse(Float64, SubString("1 0",1,1)) === 1.0
-@test parse(Float32, SubString("10",1,1)) === 1.0f0
-
-# issue #5870
-@test !occursin(Regex("aa"), SubString("",1,0))
-@test occursin(Regex(""), SubString("",1,0))
-
-# isvalid, length, prevind, nextind for SubString{String}
-let s = "lorem ipsum", sdict = Dict(
-    SubString(s, 1, 11)  => "lorem ipsum",
-    SubString(s, 1, 6)   => "lorem ",
-    SubString(s, 1, 0)   => "",
-    SubString(s, 2, 4)   => "ore",
-    SubString(s, 2, 11)  => "orem ipsum",
-    SubString(s, 15, 14) => "",
-)
-    for (ss, s) in sdict
-        @test ncodeunits(ss) == ncodeunits(s)
-        for i in -2:13
-            @test isvalid(ss, i) == isvalid(s, i)
-        end
-        for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
-            @test length(ss, i, j) == length(s, i, j)
+    @testset "search and SubString (issue #5679)" begin
+        str = "Hello, world!"
+        u = SubString(str, 1, 5)
+        @test findlast("World", u) === nothing
+        @test findlast(isequal('z'), u) === nothing
+        @test findlast("ll", u) == 3:4
+    end
+
+    @testset "SubString created from SubString" begin
+        str = "Hello, world!"
+        u = SubString(str, 2, 5)
+        for idx in 1:4
+            @test SubString(u, 2, idx) == u[2:idx]
+            @test SubString(u, 2:idx) == u[2:idx]
         end
+        @test_throws BoundsError SubString(u, 1, 10)
+        @test_throws BoundsError SubString(u, 1:10)
+        @test_throws BoundsError SubString(u, 20:30)
+        @test SubString(u, 20:15) == ""
+        @test_throws BoundsError SubString(u, -1:10)
+        @test SubString(u, -1, -10) == ""
+        @test SubString(SubString("123", 1, 2), -10, -20) == ""
+    end
+
+    # sizeof
+    @test sizeof(SubString("abc\u2222def",4,4)) == 3
+
+    # issue #3710
+    @test prevind(SubString("{var}",2,4),4) == 3
+
+    # issue #4183
+    @test split(SubString("x", 2, 0), "y") == [""]
+
+    @testset "issue #6772" begin
+        @test parse(Float64, SubString("10",1,1)) === 1.0
+        @test parse(Float64, SubString("1 0",1,1)) === 1.0
+        @test parse(Float32, SubString("10",1,1)) === 1.0f0
     end
-    for (ss, s) in sdict
-        @test length(ss) == length(s)
-        for i in 0:ncodeunits(ss), j = 0:length(ss)+1
-            @test prevind(ss, i+1, j) == prevind(s, i+1, j)
-            @test nextind(ss, i, j) == nextind(s, i, j)
+
+    @testset "issue #5870" begin
+        @test !occursin(Regex("aa"), SubString("",1,0))
+        @test occursin(Regex(""), SubString("",1,0))
+    end
+    @testset" isvalid, length, prevind, nextind for SubString{String}" begin
+        s = "lorem ipsum"
+        sdict = Dict(
+            SubString(s, 1, 11)  => "lorem ipsum",
+            SubString(s, 1, 6)   => "lorem ",
+            SubString(s, 1, 0)   => "",
+            SubString(s, 2, 4)   => "ore",
+            SubString(s, 2, 11)  => "orem ipsum",
+            SubString(s, 15, 14) => "",
+        )
+        for (ss, s) in sdict
+            @test ncodeunits(ss) == ncodeunits(s)
+            for i in -2:13
+                @test isvalid(ss, i) == isvalid(s, i)
+            end
+            for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
+                @test length(ss, i, j) == length(s, i, j)
+            end
+        end
+        for (ss, s) in sdict
+            @test length(ss) == length(s)
+            for i in 0:ncodeunits(ss), j = 0:length(ss)+1
+                @test prevind(ss, i+1, j) == prevind(s, i+1, j)
+                @test nextind(ss, i, j) == nextind(s, i, j)
+            end
+            @test_throws BoundsError prevind(s, 0)
+            @test_throws BoundsError prevind(ss, 0)
+            @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
+            @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
         end
-        @test_throws BoundsError prevind(s, 0)
-        @test_throws BoundsError prevind(ss, 0)
-        @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
-        @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
     end
-end
 
-# proper nextind/prevind/thisind for SubString{String}
-let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
+    rng = MersenneTwister(1)
+    strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
                                       String(rand(rng, UInt8, 50))]
-    for s in strs
+    @testset "proper nextind/prevind/thisind for SubString{String}: $(repr(s))" for s in strs
         a = 0
         while a <= ncodeunits(s)
             a = nextind(s, a)
@@ -223,111 +238,115 @@ let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"
             end
         end
     end
-end
 
-# for isvalid(SubString{String})
-let s = "Σx + βz - 2"
-    for i in -1:ncodeunits(s)+2
-        if checkbounds(Bool, s, i)
-            if isvalid(s, i)
-                ss = SubString(s, 1, i)
-                for j = 1:ncodeunits(ss)
-                    @test isvalid(ss, j) == isvalid(s, j)
+    # for isvalid(SubString{String})
+    let s = "Σx + βz - 2"
+        for i in -1:ncodeunits(s)+2
+            if checkbounds(Bool, s, i)
+                if isvalid(s, i)
+                    ss = SubString(s, 1, i)
+                    for j = 1:ncodeunits(ss)
+                        @test isvalid(ss, j) == isvalid(s, j)
+                    end
+                else
+                    @test_throws StringIndexError SubString(s, 1, i)
                 end
+            elseif i > 0
+                @test_throws BoundsError SubString(s, 1, i)
             else
-                @test_throws StringIndexError SubString(s, 1, i)
+                @test SubString(s, 1, i) == ""
             end
-        elseif i > 0
-            @test_throws BoundsError SubString(s, 1, i)
-        else
-            @test SubString(s, 1, i) == ""
         end
     end
-end
 
-let ss = SubString("hello", 1, 5)
-    @test length(ss, 1, 0) == 0
-    @test_throws BoundsError length(ss, 1, -1)
-    @test_throws BoundsError length(ss, 1, 6)
-    @test_throws BoundsError length(ss, 1, 10)
-    @test_throws BoundsError prevind(ss, 0, 1)
-    @test prevind(ss, 1, 1) == 0
-    @test prevind(ss, 6, 1) == 5
-    @test_throws BoundsError prevind(ss, 7, 1)
-    @test_throws BoundsError nextind(ss, -1, 1)
-    @test nextind(ss, 0, 1) == 1
-    @test nextind(ss, 5, 1) == 6
-    @test_throws BoundsError nextind(ss, 6, 1)
-end
+    let ss = SubString("hello", 1, 5)
+        @test length(ss, 1, 0) == 0
+        @test_throws BoundsError length(ss, 1, -1)
+        @test_throws BoundsError length(ss, 1, 6)
+        @test_throws BoundsError length(ss, 1, 10)
+        @test_throws BoundsError prevind(ss, 0, 1)
+        @test prevind(ss, 1, 1) == 0
+        @test prevind(ss, 6, 1) == 5
+        @test_throws BoundsError prevind(ss, 7, 1)
+        @test_throws BoundsError nextind(ss, -1, 1)
+        @test nextind(ss, 0, 1) == 1
+        @test nextind(ss, 5, 1) == 6
+        @test_throws BoundsError nextind(ss, 6, 1)
+    end
 
-# length(SubString{String}) performance specialization
-let s = "|η(α)-ϕ(κ)| < ε"
-    @test length(SubString(s, 1, 0)) == length(s[1:0])
-    @test length(SubString(s, 4, 4)) == length(s[4:4])
-    @test length(SubString(s, 1, 7)) == length(s[1:7])
-    @test length(SubString(s, 4, 11)) == length(s[4:11])
-end
+    # length(SubString{String}) performance specialization
+    let s = "|η(α)-ϕ(κ)| < ε"
+        @test length(SubString(s, 1, 0)) == length(s[1:0])
+        @test length(SubString(s, 4, 4)) == length(s[4:4])
+        @test length(SubString(s, 1, 7)) == length(s[1:7])
+        @test length(SubString(s, 4, 11)) == length(s[4:11])
+    end
 
-@testset "reverseind" for T in (String, SubString, GenericString)
-    for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
-        for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
-            for c in ('X', 'δ', '\U0001d6a5')
-                s = convert(T, string(prefix, c, suffix))
-                r = reverse(s)
-                ri = findfirst(isequal(c), r)
-                @test c == s[reverseind(s, ri)] == r[ri]
-                s = convert(T, string(prefix, prefix, c, suffix, suffix))
-                pre = convert(T, prefix)
-                sb = SubString(s, nextind(pre, lastindex(pre)),
-                               lastindex(convert(T, string(prefix, prefix, c, suffix))))
-                r = reverse(sb)
-                ri = findfirst(isequal(c), r)
-                @test c == sb[reverseind(sb, ri)] == r[ri]
+    @testset "reverseind" for T in (String, SubString, GenericString)
+        for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
+            for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
+                for c in ('X', 'δ', '\U0001d6a5')
+                    s = convert(T, string(prefix, c, suffix))
+                    r = reverse(s)
+                    ri = findfirst(isequal(c), r)
+                    @test c == s[reverseind(s, ri)] == r[ri]
+                    s = convert(T, string(prefix, prefix, c, suffix, suffix))
+                    pre = convert(T, prefix)
+                    sb = SubString(s, nextind(pre, lastindex(pre)),
+                                   lastindex(convert(T, string(prefix, prefix, c, suffix))))
+                    r = reverse(sb)
+                    ri = findfirst(isequal(c), r)
+                    @test c == sb[reverseind(sb, ri)] == r[ri]
+                end
             end
         end
     end
-end
 
-@testset "reverseind of empty strings" begin
-    for s in ("",
-              SubString("", 1, 0),
-              SubString("ab", 1, 0),
-              SubString("ab", 2, 1),
-              SubString("ab", 3, 2),
-              GenericString(""))
-        @test reverseind(s, 0) == 1
-        @test reverseind(s, 1) == 0
+    @testset "reverseind of empty strings" begin
+        for s in ("",
+                  SubString("", 1, 0),
+                  SubString("ab", 1, 0),
+                  SubString("ab", 2, 1),
+                  SubString("ab", 3, 2),
+                  GenericString(""))
+            @test reverseind(s, 0) == 1
+            @test reverseind(s, 1) == 0
+        end
     end
 end
 
-## Cstring tests ##
-
-# issue #13974: comparison against pointers
-let
-    str = String("foobar")
-    ptr = pointer(str)
-    cstring = Cstring(ptr)
-    @test ptr == cstring
-    @test cstring == ptr
-
-    # convenient NULL string creation from Ptr{Cvoid}
-    nullstr = Cstring(C_NULL)
-
-    # Comparisons against NULL strings
-    @test ptr != nullstr
-    @test nullstr != ptr
+@testset "Cstring" begin
+    @testset "issue #13974: comparison against pointers" begin
+        str = String("foobar")
+        ptr = pointer(str)
+        cstring = Cstring(ptr)
+        @test ptr == cstring
+        @test cstring == ptr
+
+        # convenient NULL string creation from Ptr{Cvoid}
+        nullstr = Cstring(C_NULL)
+
+        # Comparisons against NULL strings
+        @test ptr != nullstr
+        @test nullstr != ptr
+
+        # Short-hand comparison against C_NULL
+        @test nullstr == C_NULL
+        @test C_NULL == nullstr
+        @test cstring != C_NULL
+        @test C_NULL != cstring
+    end
 
-    # Short-hand comparison against C_NULL
-    @test nullstr == C_NULL
-    @test C_NULL == nullstr
-    @test cstring != C_NULL
-    @test C_NULL != cstring
+    @testset "issue #31381: eltype(Cstring) != Cchar" begin
+        s = Cstring(C_NULL)
+        @test eltype(Cstring) == Cchar
+        @test eltype(s) == Cchar
+        @test pointer(s) isa Ptr{Cchar}
+    end
 end
 
-# issue #31381: eltype(Cstring) != Cchar
-let
-    s = Cstring(C_NULL)
-    @test eltype(Cstring) == Cchar
-    @test eltype(s) == Cchar
-    @test pointer(s) isa Ptr{Cchar}
+@testset "Codeunits" begin
+    s = "I'm a string!"
+    @test codeunit(s) == UInt8
+    @test codeunit(s, Int8(1)) == codeunit(s, 1)
 end
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 59638dc3b9ca6..ae16e24f4ea8b 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -2,6 +2,20 @@
 
 SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
 
+@testset "textwidth" begin
+    for (c, w) in [('x', 1), ('α', 1), ('🍕', 2), ('\0', 0), ('\u0302', 0), ('\xc0', 1)]
+        @test textwidth(c) == w
+        @test textwidth(c^3) == w*3
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+    for i in 0x00:0x7f # test all ASCII chars (which have fast path)
+        w = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+        c = Char(i)
+        @test textwidth(c) == w
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+end
+
 @testset "padding (lpad and rpad)" begin
     @test lpad("foo", 2) == "foo"
     @test rpad("foo", 2) == "foo"
@@ -53,6 +67,52 @@ SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
     @test rpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
 end
 
+@testset "string truncation (ltruncate, rtruncate, ctruncate)" begin
+    @test ltruncate("foo", 4) == "foo"
+    @test ltruncate("foo", 3) == "foo"
+    @test ltruncate("foo", 2) == "…o"
+    @test ltruncate("🍕🍕 I love 🍕", 10) == "…I love 🍕" # handle wide emojis
+    @test ltruncate("🍕🍕 I love 🍕", 10, "[…]") == "[…]love 🍕"
+    # when the replacement string is longer than the trunc
+    # trust that the user wants the replacement string rather than erroring
+    @test ltruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred ltruncate("xxx", 4)
+    @inferred ltruncate("xxx", 2)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test rtruncate("foo", 4) == "foo"
+    @test rtruncate("foo", 3) == "foo"
+    @test rtruncate("foo", 2) == "f…"
+    @test rtruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 I lo…"
+    @test rtruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕 I […]"
+    @test rtruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred rtruncate("xxx", 4)
+    @inferred rtruncate("xxx", 2)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test ctruncate("foo", 4) == "foo"
+    @test ctruncate("foo", 3) == "foo"
+    @test ctruncate("foo", 2) == "f…"
+    @test ctruncate("foo", 2; prefer_left=true) == "f…"
+    @test ctruncate("foo", 2; prefer_left=false) == "…o"
+    @test ctruncate("foobar", 6) == "foobar"
+    @test ctruncate("foobar", 5) == "fo…ar"
+    @test ctruncate("foobar", 4) == "fo…r"
+    @test ctruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 …e 🍕"
+    @test ctruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕[…] 🍕"
+    @test ctruncate("abc", 2, "xxxxxx") == "xxxxxx"
+    @test ctruncate("🍕🍕🍕🍕🍕🍕xxxxxxxxxxx", 9) == "🍕🍕…xxxx"
+
+    @inferred ctruncate("xxxxx", 5)
+    @inferred ctruncate("xxxxx", 3)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 5)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 3)
+end
+
 # string manipulation
 @testset "lstrip/rstrip/strip" begin
     @test strip("") == ""
diff --git a/test/subarray.jl b/test/subarray.jl
index 31bd69cccb5ae..818365730f079 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -339,6 +339,7 @@ end
     A = copy(reshape(1:120, 3, 5, 8))
     sA = view(A, 2:2, 1:5, :)
     @test @inferred(strides(sA)) == (1, 3, 15)
+    @test IndexStyle(sA) == IndexStyle(typeof(sA)) == IndexCartesian()
     @test parent(sA) == A
     @test parentindices(sA) == (2:2, 1:5, Base.Slice(1:8))
     @test size(sA) == (1, 5, 8)
@@ -826,6 +827,25 @@ end
     @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i2, i3]
     V = view(A, i1, 1:5, i3, i2)
     @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i3, i2]
+
+    @testset "custom ranges" begin
+        struct MyStepRange{T} <: OrdinalRange{T,T}
+            r::StepRange{T,T}
+        end
+
+        for f in (:first, :last, :step, :length, :size)
+            @eval Base.$f(r::MyStepRange) = $f(r.r)
+        end
+        Base.getindex(r::MyStepRange, i::Int) = r.r[i]
+
+        a = rand(6)
+        V = view(a, MyStepRange(2:2:4))
+        @test @inferred(Base.unaliascopy(V))::typeof(V) == V
+
+        # empty range
+        V = view(a, MyStepRange(2:2:1))
+        @test @inferred(Base.unaliascopy(V))::typeof(V) == V
+    end
 end
 
 @testset "issue #27632" begin
@@ -1055,48 +1075,51 @@ end
         @test !isassigned(v, 1, 2) # inbounds but not assigned
         @test !isassigned(v, 3, 3) # out-of-bounds
     end
+end
 
-    @testset "_unsetindex!" begin
-        function test_unsetindex(A, B)
-            copyto!(A, B)
-            for i in eachindex(A)
-                @test !isassigned(A, i)
-            end
-            inds = eachindex(A)
-            @test_throws BoundsError Base._unsetindex!(A, last(inds) + oneunit(eltype(inds)))
-        end
-        @testset "dest IndexLinear, src IndexLinear" begin
-            for p in (fill(BigInt(2)), BigInt[1, 2], BigInt[1 2; 3 4])
-                A = view(copy(p), ntuple(_->:, ndims(p))...)
-                B = view(similar(A), ntuple(_->:, ndims(p))...)
-                test_unsetindex(A, B)
-                test_unsetindex(p, B)
-            end
-        end
+@testset "aliasing checks with shared indices" begin
+    indices = [1,3]
+    a = rand(3)
+    av = @view a[indices]
+    b = rand(3)
+    bv = @view b[indices]
+    @test !Base.mightalias(av, bv)
+    @test Base.mightalias(a, av)
+    @test Base.mightalias(b, bv)
+    @test Base.mightalias(indices, av)
+    @test Base.mightalias(indices, bv)
+    @test Base.mightalias(view(indices, :), av)
+    @test Base.mightalias(view(indices, :), bv)
+end
 
-        @testset "dest IndexLinear, src IndexCartesian" begin
-            for p in (fill(BigInt(2)), BigInt[1, 2], BigInt[1 2; 3 4])
-                A = view(copy(p), ntuple(_->:, ndims(p))...)
-                B = view(similar(A), axes(A)...)
-                test_unsetindex(A, B)
-                test_unsetindex(p, B)
-            end
-        end
+@testset "aliasing checks with disjoint arrays" begin
+    A = rand(3,4,5)
+    @test Base.mightalias(view(A, :, :, 1), view(A, :, :, 1))
+    @test !Base.mightalias(view(A, :, :, 1), view(A, :, :, 2))
 
-        @testset "dest IndexCartesian, src IndexLinear" begin
-            for p in (fill(BigInt(2)), BigInt[1, 2], BigInt[1 2; 3 4])
-                A = view(p, axes(p)...)
-                B = similar(A)
-                test_unsetindex(A, B)
-            end
-        end
+    B = reinterpret(UInt64, A)
+    @test Base.mightalias(view(B, :, :, 1), view(A, :, :, 1))
+    @test !Base.mightalias(view(B, :, :, 1), view(A, :, :, 2))
 
-        @testset "dest IndexCartesian, src IndexCartesian" begin
-            for p in (fill(BigInt(2)), BigInt[1, 2], BigInt[1 2; 3 4])
-                A = view(p, axes(p)...)
-                B = view(similar(A), axes(A)...)
-                test_unsetindex(A, B)
-            end
-        end
-    end
+    C = reinterpret(UInt32, A)
+    @test Base.mightalias(view(C, :, :, 1), view(A, :, :, 1))
+    @test Base.mightalias(view(C, :, :, 1), view(A, :, :, 2)) # This is overly conservative
+    @test Base.mightalias(@view(C[begin:2:end, :, 1]), view(A, :, :, 1))
+    @test Base.mightalias(@view(C[begin:2:end, :, 1]), view(A, :, :, 2)) # This is overly conservative
+end
+
+@testset "aliasing check with reshaped subarrays" begin
+    C = rand(2,1)
+    V1 = @view C[1, :]
+    V2 = @view C[2, :]
+
+    @test !Base.mightalias(V1, V2)
+    @test !Base.mightalias(V1, permutedims(V2))
+    @test !Base.mightalias(permutedims(V1), V2)
+    @test !Base.mightalias(permutedims(V1), permutedims(V2))
+
+    @test Base.mightalias(V1, V1)
+    @test Base.mightalias(V1, permutedims(V1))
+    @test Base.mightalias(permutedims(V1), V1)
+    @test Base.mightalias(permutedims(V1), permutedims(V1))
 end
diff --git a/test/subtype.jl b/test/subtype.jl
index 27e8fa604a5e9..7be869107b432 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -707,16 +707,17 @@ macro testintersect(a, b, result)
     a = esc(a)
     b = esc(b)
     result = esc(result)
-    Base.remove_linenums!(quote
+    # use a manual macrocall expression since Test will examine this __source__ value
+    return quote
         # test real intersect
-        @test $cmp(_type_intersect($a, $b), $result)
-        @test $cmp(_type_intersect($b, $a), $result)
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($a, $b), $result))))
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($b, $a), $result))))
         # test simplified intersect
         if !($result === Union{})
-            @test typeintersect($a, $b) != Union{}
-            @test typeintersect($b, $a) != Union{}
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($a, $b) != Union{})))
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($b, $a) != Union{})))
         end
-    end)
+    end
 end
 
 abstract type IT4805_2{N, T} end
@@ -2267,31 +2268,46 @@ let S = Tuple{Integer, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
     @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Any,Vararg{Any,N}}}, Union{})
 end
 
+function equal_envs(env1, env2)
+    length(env1) == length(env2) || return false
+    for i = 1:length(env1)
+        a = env1[i]
+        b = env2[i]
+        if a isa TypeVar
+            if !(b isa TypeVar && a.name == b.name && a.lb == b.lb && a.ub == b.ub)
+                return false
+            end
+        elseif !(a == b)
+            return false
+        end
+    end
+    return true
+end
+
 # issue #43064
 let
-    env_tuple(@nospecialize(x), @nospecialize(y)) = (intersection_env(x, y)[2]...,)
-    all_var(x::UnionAll) = (x.var, all_var(x.body)...)
-    all_var(x::DataType) = ()
+    env_tuple(@nospecialize(x), @nospecialize(y)) = intersection_env(x, y)[2]
     TT0 = Tuple{Type{T},Union{Real,Missing,Nothing}} where {T}
     TT1 = Union{Type{Int8},Type{Int16}}
     @test env_tuple(Tuple{TT1,Missing}, TT0) ===
           env_tuple(Tuple{TT1,Nothing}, TT0) ===
-          env_tuple(Tuple{TT1,Int}, TT0) === all_var(TT0)
+          env_tuple(Tuple{TT1,Int}, TT0) ===
+          Core.svec(TT0.var)
 
     TT0 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Union{Int,Int8},Union{Int,Int8},Int}
     TT3 = Tuple{Int,Union{Int,Int8},Int}
-    @test env_tuple(TT2, TT0) === all_var(TT0)
-    @test env_tuple(TT2, TT1) === all_var(TT1)
-    @test env_tuple(TT3, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT3, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(TypeVar(:T1, Union{Int, Int8}), TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), TypeVar(:T1, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 
     TT0 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Int,Union{Int,Int8},Int,Int}
-    @test env_tuple(TT2, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT2, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 end
 
 #issue #46735
@@ -2336,9 +2352,10 @@ T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{Abstr
 #issue 36185
 let S = Tuple{Type{T},Array{Union{T,Missing},N}} where {T,N},
     T = Tuple{Type{T},Array{Union{T,Nothing},N}} where {T,N}
-    @testintersect(S, T, !Union{})
-    @test_broken typeintersect(S, T) != S
-    @test_broken typeintersect(T, S) != T
+    I = typeintersect(S, T)
+    @test I == typeintersect(T, S) != Union{}
+    @test_broken I <: S
+    @test_broken I <: T
 end
 
 #issue 46736
@@ -2379,12 +2396,41 @@ let S = Tuple{T2, V2} where {T2, N2, V2<:(Array{S2, N2} where {S2 <: T2})},
     @testintersect(S, T, !Union{})
 end
 
-# A simple case which has a small local union.
-# make sure the env is not widened too much when we intersect(Int8, Int8).
-struct T48006{A1,A2,A3} end
-@testintersect(Tuple{T48006{Float64, Int, S1}, Int} where {F1<:Real, S1<:Union{Int8, Val{F1}}},
-               Tuple{T48006{F2, I, S2}, I} where {F2<:Real, I<:Int, S2<:Union{Int8, Val{F2}}},
-               Tuple{T48006{Float64, Int, S1}, Int} where S1<:Union{Val{Float64}, Int8})
+let S = Dict{Int, S1} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Dict{F2, S2} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test_broken typeintersect(S, T) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+    @test typeintersect(T, S) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+end
+
+# Ensure inner `intersect_all` never under-esitimate.
+let S = Tuple{F1, Dict{Int, S1}} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Tuple{Any, Dict{F2, S2}} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test Tuple{Nothing, Dict{Int, Int8}} <: S
+    @test Tuple{Nothing, Dict{Int, Int8}} <: T
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(S, T)
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(T, S)
+end
+
+let S = Tuple{F1, Val{S1}} where {F1, S1<:Dict{F1}}
+    T = Tuple{Any, Val{S2}} where {F2, S2<:Union{map(T->Dict{T}, Base.BitInteger_types)...}}
+    ST = typeintersect(S, T)
+    TS = typeintersect(S, T)
+    for U in Base.BitInteger_types
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: S
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: T
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: ST
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: TS
+    end
+end
+
+#issue 55206
+struct T55206{A,B<:Complex{A},C<:Union{Dict{Nothing},Dict{A}}} end
+@testintersect(T55206, T55206{<:Any,<:Any,<:Dict{Nothing}}, T55206{A,<:Complex{A},<:Dict{Nothing}} where {A})
+@testintersect(
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {F1, S1<:AbstractSet{F1}},
+    Tuple{Dict{T1, T2}, Val{S2}} where {T1, T2, S2<:Union{Set{T1},Set{T2}}},
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {S1<:Union{Set{Int8},Set{Int16}}}
+)
 
 f48167(::Type{Val{L2}}, ::Type{Union{Val{L1}, Set{R}}}) where {L1, R, L2<:L1} = 1
 f48167(::Type{Val{L1}}, ::Type{Union{Val{L2}, Set{R}}}) where {L1, R, L2<:L1} = 2
@@ -2553,7 +2599,7 @@ end
 let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} where T<:Val{A} where A where S<:Val,
     S = Tuple{Type{T}, T, Val{T}} where T<:(Val{S} where S<:Val)
     # optimal = Union{}?
-    @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val)
+    @test typeintersect(T, S) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {S<:Val, T<:Val}
     @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)}
 end
 
@@ -2572,6 +2618,32 @@ let a = Tuple{Union{Nothing, Type{Pair{T1}} where T1}}
     @test !Base.has_free_typevars(typeintersect(a, b))
 end
 
+#issue 53366
+let Y = Tuple{Val{T}, Val{Val{T}}} where T
+    A = Val{Val{T}} where T
+    T = TypeVar(:T, UnionAll(A.var, Val{A.var}))
+    B = UnionAll(T, Val{T})
+    X = Tuple{A, B}
+    @testintersect(X, Y, !Union{})
+end
+
+#issue 53621 (requires assertions enabled)
+abstract type A53621{T, R, C, U} <: AbstractSet{Union{C, U}} end
+struct T53621{T, R<:Real, C, U} <: A53621{T, R, C, U} end
+let
+    U = TypeVar(:U)
+    C = TypeVar(:C)
+    T = TypeVar(:T)
+    R = TypeVar(:R)
+    CC = TypeVar(:CC, Union{C, U})
+    UU = TypeVar(:UU, Union{C, U})
+    S1 = UnionAll(T, UnionAll(R, Type{UnionAll(C, UnionAll(U, T53621{T, R, C, U}))}))
+    S2 = UnionAll(C, UnionAll(U, UnionAll(CC, UnionAll(UU, UnionAll(T, UnionAll(R, T53621{T, R, CC, UU}))))))
+    S = Tuple{S1, S2}
+    T = Tuple{Type{T53621{T, R}}, AbstractSet{T}} where {T, R}
+    @testintersect(S, T, !Union{})
+end
+
 #issue 53371
 struct T53371{A,B,C,D,E} end
 S53371{A} = Union{Int, <:A}
@@ -2580,3 +2652,72 @@ let S = Type{T53371{A, B, C, D, E}} where {A, B<:R53371{A}, C<:R53371{A}, D<:R53
     T = Type{T53371{A, B, C, D, E} where {A, B<:R53371{A}, C<:R53371{A}, D<:R53371{A}, E<:R53371{A}}}
     @test !(S <: T)
 end
+
+#issue 54356
+let S = Tuple{Val{Val{Union{Val{A2}, A2}}}, Val{Val{Union{Val{A2}, Val{A4}, A4}}}} where {A2, A4<:Union{Val{A2}, A2}},
+    T = Tuple{Vararg{Val{V}}} where {V}
+    @testintersect(S, T, !Union{})
+end
+
+#issue 54356
+abstract type A54356{T<:Real} end
+struct B54356{T} <: A54356{T} end
+struct C54356{S,T<:Union{S,Complex{S}}} end
+struct D54356{S<:Real,T} end
+let S = Tuple{Val, Val{T}} where {T}, R = Tuple{Val{Val{T}}, Val{T}} where {T},
+    SS = Tuple{Val, Val{T}, Val{T}} where {T}, RR = Tuple{Val{Val{T}}, Val{T}, Val{T}} where {T}
+    # parameters check for self
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Complex{B}}}, S{1}, R{1})
+    # parameters check for supertype (B54356 -> A54356)
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, B54356{B}}}, S{1}, R{1})
+    # enure unused TypeVar skips the `UnionAll` wrapping
+    @testintersect(Tuple{Val{A}, A} where {B, A<:(Union{Val{B}, D54356{B,C}} where {C})}, S{1}, R{1})
+    # invariant parameter should not get narrowed
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Val{Union{Int,Complex{B}}}}}, S{1}, R{1})
+    # bit value could not be `Union` element
+    @testintersect(Tuple{Val{A}, A, Val{B}} where {B, A<:Union{B, Val{B}}}, SS{1}, RR{1})
+    @testintersect(Tuple{Val{A}, A, Val{B}} where {B, A<:Union{B, Complex{B}}}, SS{1}, Union{})
+    # `check_datatype_parameters` should ignore bad `Union` elements in constraint's ub
+    T = Tuple{Val{Union{Val{Nothing}, Val{C54356{V,V}}}}, Val{Nothing}} where {Nothing<:V<:Nothing}
+    @test T <: S{Nothing}
+    @test T <: Tuple{Val{A}, A} where {B, C, A<:Union{Val{B}, Val{C54356{B,C}}}}
+    @test T <: typeintersect(Tuple{Val{A}, A} where {B, C, A<:Union{Val{B}, Val{C54356{B,C}}}}, S{Nothing})
+    # extra check for Vararg
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NTuple{B,Any}}}, S{-1}, R{-1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Tuple{Any,Vararg{Any,B}}}}, S{-1}, R{-1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Tuple{Vararg{Int,Union{Int,Complex{B}}}}}}, S{1}, R{1})
+    # extra check for NamedTuple
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int}}}}, S{1}, R{1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int}}}}, S{(1,)}, R{(1,)})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{(:a),B}}}, S{NTuple{2,Int}}, R{NTuple{2,Int}})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int,Int}}}}, S{(:a,:a)}, R{(:a,:a)})
+    # extra check for GenericMemory/GenericMemoryRef
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, GenericMemory{B}}}, S{1}, R{1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, GenericMemory{:not_atomic,Int,B}}}, S{1}, R{1})
+end
+
+#issue 54516
+let S = Tuple{Val{<:T}, Union{Int,T}} where {T},
+    T = Tuple{Union{Int,T}, Val{<:T}} where {T}
+    @testintersect(S, T, !Union{})
+    @test !Base.has_free_typevars(typeintersect(S, T))
+end
+
+#issue 55230
+let T1 = NTuple{12, Union{Val{1}, Val{2}, Val{3}, Val{4}, Val{5}, Val{6}}}
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Val}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Real}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{Val,Real}}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{String,Real}}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Union{Val,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Union{String,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test !(T1 <: T2)
+    @test Tuple{Union{Val{1},Val{2}}} <: Tuple{S} where {T, S<:Val{T}}
+end
diff --git a/test/syntax.jl b/test/syntax.jl
index 423d1e3d04bfb..1b630a56f84f8 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -713,36 +713,10 @@ m1_exprs = get_expr_list(Meta.lower(@__MODULE__, quote @m1 end))
 let low3 = Meta.lower(@__MODULE__, quote @m3 end)
     m3_exprs = get_expr_list(low3)
     ci = low3.args[1]::Core.CodeInfo
-    @test ci.codelocs in ([4, 4, 0], [4, 0])
+    #@test ci.codelocs in ([4, 4, 0], [4, 0])
     @test is_return_ssavalue(m3_exprs[end])
 end
 
-function f1(a)
-    b = a + 100
-    b
-end
-
-@generated function f2(a)
-    quote
-        b = a + 100
-        b
-    end
-end
-
-f1_ci = code_typed(f1, (Int,), debuginfo=:source)[1][1]
-f2_ci = code_typed(f2, (Int,), debuginfo=:source)[1][1]
-
-f1_exprs = get_expr_list(f1_ci)
-f2_exprs = get_expr_list(f2_ci)
-
-if Base.JLOptions().can_inline != 0
-    @test length(f1_ci.linetable) == 3
-    @test length(f2_ci.linetable) >= 3
-else
-    @test length(f1_ci.linetable) == 2
-    @test length(f2_ci.linetable) >= 3
-end
-
 # Check that string and command literals are parsed to the appropriate macros
 @test :(x"s") == :(@x_str "s")
 @test :(x"s"flag) == :(@x_str "s" "flag")
@@ -2215,6 +2189,16 @@ end
 end
 @test z28789 == 42
 
+const warn28789 = "Assignment to `s28789` in soft scope is ambiguous because a global variable by the same name exists: "*
+    "`s28789` will be treated as a new local. Disambiguate by using `local s28789` to suppress this warning or "*
+    "`global s28789` to assign to the existing global variable."
+@test_logs (:warn, warn28789) @test_throws UndefVarError @eval begin
+    s28789 = 0
+    for i = 1:10
+        s28789 += i
+    end
+end
+
 # issue #38650, `struct` should always be a hard scope
 f38650() = 0
 @eval begin
@@ -2558,19 +2542,19 @@ end
                                                               3 4 5]
 
 @test Meta.parse("for x in 1:10 g(x) end") ==
-  Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
+    Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
 @test Meta.parse("(f->f(1))() do x x+1 end") ==
-  Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
+    Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
 @test Meta.parse("while i < 10 i += 1 end") ==
-  Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
+    Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
 @test Meta.parse("begin x=1 end") == Meta.parse("begin#==#x=1#==#end")
 @test Meta.parse("if x<y x+1 elseif y>0 y+1 else z end") ==
-  Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
+    Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
 @test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end")
 @test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c")
 @test_parseerror("f#==#(x)=x", "space before \"(\" not allowed in \"f (\" at none:1")
 @test Meta.parse("try f() catch e g() finally h() end") ==
-  Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
+    Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
 @test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b")
 
 # issue #37540
@@ -2661,9 +2645,9 @@ end
 @test_throws ErrorException("invalid method definition in Mod3: function Mod3.f must be explicitly imported to be extended") Core.eval(Mod3, :(f(x::Int) = x))
 @test !isdefined(Mod3, :always_undef) # resolve this binding now in Mod3
 @test_throws ErrorException("invalid method definition in Mod3: exported function Mod.always_undef does not exist") Core.eval(Mod3, :(always_undef(x::Int) = x))
-@test_throws ErrorException("cannot assign a value to imported variable Mod.always_undef from module Mod3") Core.eval(Mod3, :(const always_undef = 3))
-@test_throws ErrorException("cannot assign a value to imported variable Mod3.f") Core.eval(Mod3, :(const f = 3))
-@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it already has a value") Core.eval(Mod, :(const maybe_undef = 3))
+@test_throws ErrorException("cannot declare Mod3.always_undef constant; it was already declared as an import") Core.eval(Mod3, :(const always_undef = 3))
+@test_throws ErrorException("cannot declare Mod3.f constant; it was already declared as an import") Core.eval(Mod3, :(const f = 3))
+@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it was already declared global") Core.eval(Mod, :(const maybe_undef = 3))
 
 z = 42
 import .z as also_z
@@ -2847,7 +2831,7 @@ end
     @test a == 5
     @test b == 6
 
-    @test_throws ErrorException (; a, b) = (x=1,)
+    @test_throws FieldError (; a, b) = (x=1,)
 
     @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x end), :error)
     @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x, y end), :error)
@@ -2856,7 +2840,7 @@ end
     f((; a, b)) = a, b
     @test f((b=3, a=4)) == (4, 3)
     @test f((b=3, c=2, a=4)) == (4, 3)
-    @test_throws ErrorException f((;))
+    @test_throws FieldError f((;))
 
     # with type annotation
     let num, den, a, b
@@ -3650,3 +3634,354 @@ end
 end
 @test (@MyMacroModule.mymacro) == 1
 @test (@MyMacroModule.mymacro(a)) == 2
+
+# Issue #53673 - missing macro hygiene for for/generator
+baremodule MacroHygieneFor
+    import ..Base
+    using Base: esc, Expr, +
+    macro for1()
+        :(let a=(for i=10; end; 1); a; end)
+    end
+    macro for2()
+        :(let b=(for j=11, k=12; end; 2); b; end)
+    end
+    macro for3()
+        :(let c=($(Expr(:for, esc(Expr(:block, :(j=11), :(k=12))), :())); 3); c; end)
+    end
+    macro for4()
+        :(begin; local j; let a=(for outer j=10; end; 4); j+a; end; end)
+    end
+end
+let nnames = length(names(MacroHygieneFor; all=true))
+    @test (@MacroHygieneFor.for1) == 1
+    @test (@MacroHygieneFor.for2) == 2
+    @test (@MacroHygieneFor.for3) == 3
+    @test (@MacroHygieneFor.for4) == 14
+    @test length(names(MacroHygieneFor; all=true)) == nnames
+end
+
+baremodule MacroHygieneGenerator
+    using ..Base: Any, !
+    my!(x) = !x
+    macro gen1()
+        :(let a=Any[x for x in 1]; a; end)
+    end
+    macro gen2()
+        :(let a=Bool[x for x in (true, false) if my!(x)]; a; end)
+    end
+    macro gen3()
+        :(let a=Bool[x for x in (true, false), y in (true, false) if my!(x) && my!(y)]; a; end)
+    end
+end
+let nnames = length(names(MacroHygieneGenerator; all=true))
+    @test (MacroHygieneGenerator.@gen1) == Any[x for x in 1]
+    @test (MacroHygieneGenerator.@gen2) == Bool[false]
+    @test (MacroHygieneGenerator.@gen3) == Bool[false]
+    @test length(names(MacroHygieneGenerator; all=true)) == nnames
+end
+
+# Issue #53729 - Lowering recursion into Expr(:toplevel)
+@test eval(Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(f53729(x) = x)), :(x=1)))) == 1
+@test f53729(2) == 2
+
+# Issue #54701 - Macro hygiene of argument destructuring
+macro makef54701()
+    quote
+        call(f) = f((1, 2))
+        function $(esc(:f54701))()
+            call() do (a54701, b54701)
+                return a54701+b54701
+            end
+        end
+    end
+end
+@makef54701
+@test f54701() == 3
+@test !@isdefined(a54701)
+@test !@isdefined(b54701)
+
+# Issue #54607 - binding creation in foreign modules should not be permitted
+module Foreign54607
+    # Syntactic, not dynamic
+    try_to_create_binding1() = (Foreign54607.foo = 2)
+    # GlobalRef is allowed for same-module assignment
+    @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo2)) = 2)
+    function global_create_binding()
+        global bar
+        bar = 3
+    end
+    baz = 4
+    begin;
+        @Base.Experimental.force_compile
+        compiled_assign = 5
+    end
+    @eval $(GlobalRef(Foreign54607, :gr_assign)) = 6
+end
+@test_throws ErrorException (Foreign54607.foo = 1)
+@test_throws ErrorException Foreign54607.try_to_create_binding1()
+@test_throws ErrorException Foreign54607.try_to_create_binding2()
+function assign_in_foreign_module()
+    (Foreign54607.foo = 1)
+    nothing
+end
+@test !Core.Compiler.is_nothrow(Base.infer_effects(assign_in_foreign_module))
+@test_throws ErrorException begin
+    @Base.Experimental.force_compile
+    (Foreign54607.foo = 1)
+end
+@test_throws ErrorException @eval (GlobalRef(Foreign54607, :gr_assign2)) = 7
+Foreign54607.global_create_binding()
+@test isdefined(Foreign54607, :bar)
+@test isdefined(Foreign54607, :baz)
+@test isdefined(Foreign54607, :compiled_assign)
+@test isdefined(Foreign54607, :gr_assign)
+Foreign54607.bar = 8
+@test Foreign54607.bar == 8
+begin
+    @Base.Experimental.force_compile
+    Foreign54607.bar = 9
+end
+@test Foreign54607.bar == 9
+
+# Issue #54805 - export mislowering
+module Export54805
+let
+    local b54805=1
+    export b54805
+end
+b54805 = 2
+end
+using .Export54805
+@test b54805 == 2
+
+# F{T} = ... has special syntax semantics, not found anywhere else in the language
+# that make `F` `const` iff an assignment to `F` is global in the relevant scope.
+# We implicitly test this elsewhere, but there's some tricky interactions with
+# explicit declarations that we test here.
+module ImplicitCurlies
+    using ..Test
+    let
+        ImplicitCurly1{T} = Ref{T}
+    end
+    @test !@isdefined(ImplicitCurly1)
+    let
+        global ImplicitCurly2
+        ImplicitCurly2{T} = Ref{T}
+    end
+    @test @isdefined(ImplicitCurly2) && isconst(@__MODULE__, :ImplicitCurly2)
+    begin
+        ImplicitCurly3{T} = Ref{T}
+    end
+    @test @isdefined(ImplicitCurly3) && isconst(@__MODULE__, :ImplicitCurly3)
+    begin
+        local ImplicitCurly4
+        ImplicitCurly4{T} = Ref{T}
+    end
+    @test !@isdefined(ImplicitCurly4)
+    @test_throws "syntax: `global const` declaration not allowed inside function" Core.eval(@__MODULE__, :(function implicit5()
+        global ImplicitCurly5
+        ImplicitCurly5{T} = Ref{T}
+    end))
+    @test !@isdefined(ImplicitCurly5)
+    function implicit6()
+        ImplicitCurly6{T} = Ref{T}
+        return ImplicitCurly6
+    end
+    @test !@isdefined(ImplicitCurly6)
+    # Check return value of assignment expr
+    @test isa((const ImplicitCurly7{T} = Ref{T}), UnionAll)
+    @test isa(begin; ImplicitCurly8{T} = Ref{T}; end, UnionAll)
+end
+
+# `const` does not distribute over assignments
+const aconstassign = bconstassign = 2
+@test isconst(@__MODULE__, :aconstassign)
+@test !isconst(@__MODULE__, :bconstassign)
+@test aconstassign == bconstassign
+
+const afunc_constassign() = bfunc_constassign() = 2
+@test afunc_constassign()() == 2
+@test !@isdefined(bfunc_constassign)
+
+# `const` RHS is regular toplevel scope (not `let`)
+const arhs_toplevel = begin
+    athis_should_be_a_global = 1
+    2
+end
+@test isconst(@__MODULE__, :arhs_toplevel)
+@test !isconst(@__MODULE__, :athis_should_be_a_global)
+@test arhs_toplevel == 2
+@test athis_should_be_a_global == 1
+
+# `const` is permitted before function assignment for legacy reasons
+const fconst_assign() = 1
+const (gconst_assign(), hconst_assign()) = (2, 3)
+@test (fconst_assign(), gconst_assign(), hconst_assign()) == (1, 2, 3)
+@test isconst(@__MODULE__, :fconst_assign)
+@test isconst(@__MODULE__, :gconst_assign)
+@test isconst(@__MODULE__, :hconst_assign)
+
+# `const` assignment to `_` drops the assignment effect,
+# and the conversion, but not the rhs.
+struct CantConvert; end
+Base.convert(::Type{CantConvert}, x) = error()
+@test (const _::CantConvert = 1) == 1
+@test !isconst(@__MODULE__, :_)
+@test_throws ErrorException("expected") (const _ = error("expected"))
+
+# Issue #54787
+const (destruct_const54787...,) = (1,2,3)
+@test destruct_const54787 == (1,2,3)
+@test isconst(@__MODULE__, :destruct_const54787)
+const a54787, b54787, c54787 = destruct_const54787
+@test (a54787, b54787, c54787) == (1,2,3)
+@test isconst(@__MODULE__, :a54787)
+@test isconst(@__MODULE__, :b54787)
+@test isconst(@__MODULE__, :c54787)
+
+# Same number of statements on lhs and rhs, but non-atom
+const c54787_1,c54787_2 = 1,(2*1)
+@test isconst(@__MODULE__, :c54787_1)
+@test isconst(@__MODULE__, :c54787_2)
+@test c54787_1 == 1
+@test c54787_2 == 2
+
+# Methods can be added to any singleton not just generic functions
+struct SingletonMaker; end
+const no_really_this_is_a_function_i_promise = Val{SingletonMaker()}()
+no_really_this_is_a_function_i_promise(a) = 2 + a
+@test Val{SingletonMaker()}()(2) == 4
+
+# Test that lowering doesn't accidentally put a `Module` in the Method name slot
+let src = @Meta.lower let capture=1
+    global foo_lower_block
+    foo_lower_block() = capture
+end
+    code = src.args[1].code
+    for i = length(code):-1:1
+        expr = code[i]
+        Meta.isexpr(expr, :method) || continue
+        @test isa(expr.args[1], Union{GlobalRef, Symbol})
+    end
+end
+
+# Test that globals can be `using`'d even if they are not yet defined
+module UndefGlobal54954
+    global theglobal54954::Int
+end
+using .UndefGlobal54954: theglobal54954
+@test Core.get_binding_type(@__MODULE__, :theglobal54954) === Int
+
+# Extended isdefined
+module ExtendedIsDefined
+    using Test
+    module Import
+        export x2, x3
+        x2 = 2
+        x3 = 3
+        x4 = 4
+    end
+    const x1 = 1
+    using .Import
+    import .Import.x4
+    @test x2 == 2 # Resolve the binding
+    @eval begin
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x1)))
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x2)))
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x3)))
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x4)))
+
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x1), false))
+        @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x2), false))
+        @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x3), false))
+        @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x4), false))
+    end
+
+    @eval begin
+        @Base.Experimental.force_compile
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x1)))
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x2)))
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x3)))
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x4)))
+
+        @test $(Expr(:isdefined, GlobalRef(@__MODULE__, :x1), false))
+        @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x2), false))
+        @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x3), false))
+        @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x4), false))
+    end
+end
+
+# Test importing the same module twice using two different paths
+module FooDualImport
+end
+module BarDualImport
+import ..FooDualImport
+import ..FooDualImport.FooDualImport
+end
+
+# Test trying to define a constant and then importing the same constant
+const ImportConstant = 1
+module ImportConstantTestModule
+    using Test
+    const ImportConstant = 1
+    import ..ImportConstant
+    @test ImportConstant == 1
+    @test isconst(@__MODULE__, :ImportConstant)
+end
+
+# Test trying to define a constant and then trying to assign to the same value
+module AssignConstValueTest
+    const x = 1
+    x = 1
+end
+@test isconst(AssignConstValueTest, :x)
+
+# Module Replacement
+module ReplacementContainer
+    module ReplaceMe
+        const x = 1
+    end
+    const Old = ReplaceMe
+    module ReplaceMe
+        const x = 2
+    end
+end
+@test ReplacementContainer.Old !== ReplacementContainer.ReplaceMe
+@test ReplacementContainer.ReplaceMe.x === 2
+
+# Setglobal of previously declared global
+module DeclareSetglobal
+    using Test
+    @test_throws ErrorException setglobal!(@__MODULE__, :DeclareMe, 1)
+    global DeclareMe
+    setglobal!(@__MODULE__, :DeclareMe, 1)
+    @test DeclareMe === 1
+end
+
+# Binding type of const (N.B.: This may change in the future)
+module ConstBindingType
+    using Test
+    const x = 1
+    @test Core.get_binding_type(@__MODULE__, :x) === Any
+end
+
+# Explicit import may resolve using failed
+module UsingFailedExplicit
+    using Test
+    module A; export x; x = 1; end
+    module B; export x; x = 2; end
+    using .A, .B
+    @test_throws UndefVarError x
+    using .A: x as x
+    @test x === 1
+end
+
+# issue #45494
+begin
+  local b::Tuple{<:Any} = (0,)
+  function f45494()
+    b = b
+    b
+  end
+end
+@test f45494() === (0,)
diff --git a/test/testdefs.jl b/test/testdefs.jl
index b96c95045f2bd..eb0bf570b11fd 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -23,7 +23,7 @@ function runtests(name, path, isolate=true; seed=nothing)
         end
         res_and_time_data = @timed @testset "$name" begin
             # Random.seed!(nothing) will fail
-            seed != nothing && Random.seed!(seed)
+            seed !== nothing && Random.seed!(seed)
 
             original_depot_path = copy(Base.DEPOT_PATH)
             original_load_path = copy(Base.LOAD_PATH)
diff --git a/test/testhelpers/ChallengePrompts.jl b/test/testhelpers/ChallengePrompts.jl
new file mode 100644
index 0000000000000..10dd1553afbbd
--- /dev/null
+++ b/test/testhelpers/ChallengePrompts.jl
@@ -0,0 +1,123 @@
+module ChallengePrompts
+
+include("FakePTYs.jl")
+using .FakePTYs: with_fake_pty
+using Serialization: serialize, deserialize
+
+const timeout = 60
+
+"""
+    challenge_prompt(code::Expr, challenges; pkgs=[])
+
+Execute the passed code in a separate process, looking for
+the passed prompts and responding as defined in the pairs of
+(prompt, response) in the collection of challenges.
+
+Optionally `import` the given `pkgs`.
+
+Returns the value of the last expression.
+"""
+function challenge_prompt(code::Expr, challenges; pkgs=[])
+    input_code = tempname()
+    open(input_code, "w") do fp
+        serialize(fp, code)
+    end
+    output_file = tempname()
+    torun = """
+        $(isempty(pkgs) ? "" : string("import ", join(pkgs, ", ")))
+        using Serialization
+        result = open($(repr(input_code))) do fp
+            eval(deserialize(fp))
+        end
+        open($(repr(output_file)), "w") do fp
+            serialize(fp, result)
+        end"""
+    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
+    try
+        challenge_prompt(cmd, challenges)
+        return open(output_file, "r") do fp
+            deserialize(fp)
+        end
+    finally
+        isfile(output_file) && rm(output_file)
+        isfile(input_code) && rm(input_code)
+    end
+    return nothing
+end
+
+function challenge_prompt(cmd::Cmd, challenges)
+    function format_output(output)
+        str = read(seekstart(output), String)
+        isempty(str) && return ""
+        return "Process output found:\n\"\"\"\n$str\n\"\"\""
+    end
+    out = IOBuffer()
+    with_fake_pty() do pts, ptm
+        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
+        Base.close_stdio(pts)
+
+        # Kill the process if it takes too long. Typically occurs when process is waiting
+        # for input.
+        timer = Channel{Symbol}(1)
+        watcher = @async begin
+            waited = 0
+            while waited < timeout && process_running(p)
+                sleep(1)
+                waited += 1
+            end
+
+            if process_running(p)
+                kill(p)
+                put!(timer, :timeout)
+            elseif success(p)
+                put!(timer, :success)
+            else
+                put!(timer, :failure)
+            end
+
+            # SIGKILL stubborn processes
+            if process_running(p)
+                sleep(3)
+                process_running(p) && kill(p, Base.SIGKILL)
+            end
+            wait(p)
+        end
+
+        wroteall = false
+        try
+            for (challenge, response) in challenges
+                write(out, readuntil(ptm, challenge, keep=true))
+                if !isopen(ptm)
+                    error("Could not locate challenge: \"$challenge\". ",
+                          format_output(out))
+                end
+                write(ptm, response)
+            end
+            wroteall = true
+
+            # Capture output from process until `pts` is closed
+            write(out, ptm)
+        catch ex
+            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
+                # ignore EIO from `ptm` after `pts` dies
+                error("Process failed possibly waiting for a response. ",
+                      format_output(out))
+            end
+        end
+
+        status = fetch(timer)
+        close(ptm)
+        if status !== :success
+            if status === :timeout
+                error("Process timed out possibly waiting for a response. ",
+                      format_output(out))
+            else
+                error("Failed process. ", format_output(out), "\n", p)
+            end
+        end
+        wait(watcher)
+    end
+    nothing
+end
+
+end
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index c592699440ee0..56ce6dc7d3a49 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -1,5 +1,4 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
 module FakePTYs
 
 if Sys.iswindows()
@@ -24,10 +23,7 @@ function open_fake_pty()
         close(pts)
         pts = fds
         # convert pts handle to a TTY
-        #fds = pts.handle
-        #pts.status = Base.StatusClosed
-        #pts.handle = C_NULL
-        #pts = Base.TTY(fds, Base.StatusOpen)
+        #pts = open(fds)::Base.TTY
     else
         O_RDWR = Base.Filesystem.JL_O_RDWR
         O_NOCTTY = Base.Filesystem.JL_O_NOCTTY
@@ -44,8 +40,9 @@ function open_fake_pty()
         pts = RawFD(fds)
 
         # pts = fdio(fds, true)
-        # pts = Base.Filesystem.File(RawFD(fds))
-        # pts = Base.TTY(RawFD(fds); readable = false)
+        # pts = Base.Filesystem.File(pts)
+        # pts = Base.TTY(pts)
+        # pts = Base.open(pts)
         ptm = Base.TTY(RawFD(fdm))
     end
     return pts, ptm
diff --git a/test/testhelpers/FillArrays.jl b/test/testhelpers/FillArrays.jl
index ee988e0f0aa9c..d3b8d74da7148 100644
--- a/test/testhelpers/FillArrays.jl
+++ b/test/testhelpers/FillArrays.jl
@@ -11,6 +11,8 @@ Base.size(F::Fill) = F.size
 
 Base.copy(F::Fill) = F
 
+Base.AbstractArray{T,N}(F::Fill{<:Any,N}) where {T,N} = Fill(T(F.value), F.size)
+
 @inline getindex_value(F::Fill) = F.value
 
 @inline function Base.getindex(F::Fill{<:Any,N}, i::Vararg{Int,N}) where {N}
@@ -29,6 +31,8 @@ end
     F
 end
 
+Base.zero(F::Fill) = Fill(zero(F.value), size(F))
+
 Base.show(io::IO, F::Fill) = print(io, "Fill($(F.value), $(F.size))")
 Base.show(io::IO, ::MIME"text/plain", F::Fill) = show(io, F)
 
diff --git a/test/testhelpers/OffsetDenseArrays.jl b/test/testhelpers/OffsetDenseArrays.jl
new file mode 100644
index 0000000000000..44a1b8d627800
--- /dev/null
+++ b/test/testhelpers/OffsetDenseArrays.jl
@@ -0,0 +1,31 @@
+"""
+    module OffsetDenseArrays
+
+A minimal implementation of an offset array which is also <: DenseArray.
+"""
+module OffsetDenseArrays
+
+struct OffsetDenseArray{A <: DenseVector, T} <: DenseVector{T}
+    x::A
+    offset::Int
+end
+OffsetDenseArray(x::AbstractVector{T}, i::Integer) where {T} = OffsetDenseArray{typeof(x), T}(x, Int(i))
+
+Base.size(x::OffsetDenseArray) = size(x.x)
+Base.pointer(x::OffsetDenseArray) = pointer(x.x)
+
+function Base.getindex(x::OffsetDenseArray, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset]
+end
+
+function Base.setindex(x::OffsetDenseArray, v, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset] = v
+end
+
+IndexStyle(::Type{<:OffsetDenseArray}) = Base.IndexLinear()
+Base.axes(x::OffsetDenseArray) = (x.offset + 1 : x.offset + length(x.x),)
+Base.keys(x::OffsetDenseArray) = only(axes(x))
+
+end # module
diff --git a/test/testhelpers/Quaternions.jl b/test/testhelpers/Quaternions.jl
index 81b7a0c2d0121..b1a414266bb34 100644
--- a/test/testhelpers/Quaternions.jl
+++ b/test/testhelpers/Quaternions.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 module Quaternions
+using LinearAlgebra
 using Random
 
 export Quaternion
@@ -13,9 +14,12 @@ struct Quaternion{T<:Real} <: Number
     v2::T
     v3::T
 end
+Quaternion{T}(s::Real) where {T<:Real} = Quaternion{T}(T(s), zero(T), zero(T), zero(T))
 Quaternion(s::Real, v1::Real, v2::Real, v3::Real) = Quaternion(promote(s, v1, v2, v3)...)
 Base.convert(::Type{Quaternion{T}}, s::Real) where {T <: Real} =
     Quaternion{T}(convert(T, s), zero(T), zero(T), zero(T))
+Base.promote_rule(::Type{Quaternion{T}}, ::Type{S}) where {T <: Real, S <: Real} =
+    Quaternion{promote_type(T, S)}
 Base.abs2(q::Quaternion) = q.s*q.s + q.v1*q.v1 + q.v2*q.v2 + q.v3*q.v3
 Base.float(z::Quaternion{T}) where T = Quaternion(float(z.s), float(z.v1), float(z.v2), float(z.v3))
 Base.abs(q::Quaternion) = sqrt(abs2(q))
@@ -23,7 +27,10 @@ Base.real(::Type{Quaternion{T}}) where {T} = T
 Base.real(q::Quaternion) = q.s
 Base.conj(q::Quaternion) = Quaternion(q.s, -q.v1, -q.v2, -q.v3)
 Base.isfinite(q::Quaternion) = isfinite(q.s) & isfinite(q.v1) & isfinite(q.v2) & isfinite(q.v3)
+Base.isreal(q::Quaternion) = iszero(q.v1) & iszero(q.v2) & iszero(q.v3)
 Base.zero(::Type{Quaternion{T}}) where T = Quaternion{T}(zero(T), zero(T), zero(T), zero(T))
+# avoid defining sqrt(::Quaternion)
+LinearAlgebra.choltype(::AbstractArray{Quaternion{T}}) where T = Quaternion{promote_type(T, Float32)}
 
 Base.:(+)(ql::Quaternion, qr::Quaternion) =
  Quaternion(ql.s + qr.s, ql.v1 + qr.v1, ql.v2 + qr.v2, ql.v3 + qr.v3)
@@ -39,6 +46,9 @@ Base.:(*)(r::Real, q::Quaternion) = q * r
 Base.:(*)(r::Bool, q::Quaternion) = q * r # remove method ambiguity
 Base.:(/)(q::Quaternion, w::Quaternion) = q * conj(w) * (1.0 / abs2(w))
 Base.:(\)(q::Quaternion, w::Quaternion) = conj(q) * w * (1.0 / abs2(q))
+Base.:(/)(q::Quaternion, r::Real) = Quaternion(q.s / r, q.v1 / r, q.v2 / r, q.v3 / r)
+Base.:(==)(q::Quaternion, w::Quaternion) =
+    (q.s == w.s) & (q.v1 == w.v1) & (q.v2 == w.v2) & (q.v3 == w.v3)
 
 # adapted from https://github.com/JuliaGeometry/Quaternions.jl/pull/42
 function Base.rand(rng::AbstractRNG, ::Random.SamplerType{Quaternion{T}}) where {T<:Real}
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
index 43bc27e630479..495fe03347ee7 100644
--- a/test/testhelpers/SizedArrays.jl
+++ b/test/testhelpers/SizedArrays.jl
@@ -23,6 +23,9 @@ Base.first(::SOneTo) = 1
 Base.last(r::SOneTo) = length(r)
 Base.show(io::IO, r::SOneTo) = print(io, "SOneTo(", length(r), ")")
 
+Broadcast.axistype(a::Base.OneTo, s::SOneTo) = s
+Broadcast.axistype(s::SOneTo, a::Base.OneTo) = s
+
 struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
     data::A
     function SizedArray{SZ}(data::AbstractArray{T,N}) where {SZ,T,N}
@@ -33,20 +36,45 @@ struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
         SZ == size(data) || throw(ArgumentError("size mismatch!"))
         new{SZ,T,N,A}(A(data))
     end
+    function SizedArray{SZ,T,N}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
+    function SizedArray{SZ,T}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
 end
 SizedMatrix{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,2,A}
 SizedVector{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,1,A}
-Base.convert(::Type{SizedArray{SZ,T,N,A}}, data::AbstractArray) where {SZ,T,N,A} = SizedArray{SZ,T,N,A}(data)
+Base.convert(::Type{S}, data::AbstractArray) where {S<:SizedArray} = data isa S ? data : S(data)
 
 # Minimal AbstractArray interface
 Base.size(a::SizedArray) = size(typeof(a))
 Base.size(::Type{<:SizedArray{SZ}}) where {SZ} = SZ
 Base.axes(a::SizedArray) = map(SOneTo, size(a))
 Base.getindex(A::SizedArray, i...) = getindex(A.data, i...)
+Base.setindex!(A::SizedArray, v, i...) = setindex!(A.data, v, i...)
 Base.zero(::Type{T}) where T <: SizedArray = SizedArray{size(T)}(zeros(eltype(T), size(T)))
+Base.parent(S::SizedArray) = S.data
 +(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = SizedArray{SZ}(S1.data + S2.data)
 ==(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = S1.data == S2.data
 
+homogenize_shape(t::Tuple) = (_homogenize_shape(first(t)), homogenize_shape(Base.tail(t))...)
+homogenize_shape(::Tuple{}) = ()
+_homogenize_shape(x::Integer) = x
+_homogenize_shape(x::AbstractUnitRange) = length(x)
+const Dims = Union{Integer, Base.OneTo, SOneTo}
+function Base.similar(::Type{A}, shape::Tuple{Dims, Vararg{Dims}}) where {A<:AbstractArray}
+    similar(A, homogenize_shape(shape))
+end
+function Base.similar(::Type{A}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {A<:AbstractArray}
+    R = similar(A, length.(shape))
+    SizedArray{length.(shape)}(R)
+end
+function Base.similar(x::SizedArray, ::Type{T}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {T}
+    sz = map(length, shape)
+    SizedArray{sz}(similar(parent(x), T, sz))
+end
+
 const SizedMatrixLike = Union{SizedMatrix, Transpose{<:Any, <:SizedMatrix}, Adjoint{<:Any, <:SizedMatrix}}
 
 _data(S::SizedArray) = S.data
diff --git a/test/testhelpers/coverage_file.info b/test/testhelpers/coverage_file.info
index c83e75dee8060..b03b0e07e6977 100644
--- a/test/testhelpers/coverage_file.info
+++ b/test/testhelpers/coverage_file.info
@@ -10,9 +10,10 @@ DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
-DA:19,2
+DA:18,1
+DA:19,1
 DA:20,1
 DA:22,1
-LH:12
-LF:14
+LH:13
+LF:15
 end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad b/test/testhelpers/coverage_file.info.bad
deleted file mode 100644
index 311f6379381ee..0000000000000
--- a/test/testhelpers/coverage_file.info.bad
+++ /dev/null
@@ -1,20 +0,0 @@
-SF:<FILENAME>
-DA:3,1
-DA:4,1
-DA:5,0
-DA:7,1
-DA:8,1
-DA:9,3
-DA:10,5
-DA:11,1
-DA:12,1
-DA:14,0
-DA:17,1
-DA:18,0
-DA:19,2
-DA:20,1
-DA:22,1
-DA:1234,0
-LH:12
-LF:16
-end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad2 b/test/testhelpers/coverage_file.info.bad2
deleted file mode 100644
index a766597be4c17..0000000000000
--- a/test/testhelpers/coverage_file.info.bad2
+++ /dev/null
@@ -1,20 +0,0 @@
-SF:<FILENAME>
-DA:3,1
-DA:4,1
-DA:5,0
-DA:7,1
-DA:8,1
-DA:9,3
-DA:10,5
-DA:11,0
-DA:12,1
-DA:14,0
-DA:17,1
-DA:18,0
-DA:19,0
-DA:20,0
-DA:22,1
-DA:1234,0
-LH:9
-LF:16
-end_of_record
diff --git a/test/testhelpers/coverage_file.jl b/test/testhelpers/coverage_file.jl
index e8e0355952d80..577cc6bb5d2ca 100644
--- a/test/testhelpers/coverage_file.jl
+++ b/test/testhelpers/coverage_file.jl
@@ -24,6 +24,6 @@ end
 
 success = code_coverage_test() == [1, 2, 3] &&
           short_form_func_coverage_test(2) == 4
-exit(success ?  0 : 1)
+exit(success ? 0 : 1)
 
 # end of file
diff --git a/test/threads.jl b/test/threads.jl
index 307742a4c292b..6265368c2ac79 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -288,18 +288,16 @@ close(proc.in)
         proc = run(cmd; wait = false)
         done = Threads.Atomic{Bool}(false)
         timeout = false
-        timer = Timer(100) do _
+        timer = Timer(200) do _
             timeout = true
-            for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL]
-                for _ in 1:1000
+            for sig in (Base.SIGQUIT, Base.SIGKILL)
+                for _ in 1:3
                     kill(proc, sig)
+                    sleep(1)
                     if done[]
-                        if sig != Base.SIGTERM
-                            @warn "Terminating `$script` required signal $sig"
-                        end
+                        @warn "Terminating `$script` required signal $sig"
                         return
                     end
-                    sleep(0.001)
                 end
             end
         end
@@ -309,16 +307,11 @@ close(proc.in)
             done[] = true
             close(timer)
         end
-        if ( !success(proc) ) || ( timeout )
+        if !success(proc) || timeout
             @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
         end
-        if Sys.iswindows() || Sys.isapple()
-            # Known failure: https://github.com/JuliaLang/julia/issues/43124
-            @test_skip success(proc)
-        else
-            @test success(proc)
-            @test !timeout
-        end
+        @test success(proc)
+        @test !timeout
     end
 end
 
@@ -341,3 +334,43 @@ end
 @testset "Base.Threads docstrings" begin
     @test isempty(Docs.undocumented_names(Threads))
 end
+
+@testset "wait failed task" begin
+    @testset "wait without throw keyword" begin
+        t = Threads.@spawn error("Error")
+        @test_throws TaskFailedException wait(t)
+    end
+
+    @testset "wait with throw=false" begin
+        t = Threads.@spawn error("Error")
+        wait(t; throw=false)
+        @test istaskfailed(t)
+    end
+end
+
+@testset "jl_*affinity" begin
+    cpumasksize = @ccall uv_cpumask_size()::Cint
+    if cpumasksize > 0 # otherwise affinities are not supported on the platform (UV_ENOTSUP)
+        jl_getaffinity = (tid, mask, cpumasksize) -> ccall(:jl_getaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
+        jl_setaffinity = (tid, mask, cpumasksize) -> ccall(:jl_setaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
+        mask = zeros(Cchar, cpumasksize)
+        @test jl_getaffinity(0, mask, cpumasksize) == 0
+        @test !all(iszero, mask)
+        @test jl_setaffinity(0, mask, cpumasksize) == 0
+    end
+end
+
+# Make sure default number of BLAS threads respects CPU affinity: issue #55572.
+@testset "LinearAlgebra number of default threads" begin
+    if AFFINITY_SUPPORTED
+        allowed_cpus = findall(uv_thread_getaffinity())
+        cmd = addenv(`$(Base.julia_cmd()) --startup-file=no -E 'using LinearAlgebra; BLAS.get_num_threads()'`,
+                     # Remove all variables which could affect the default number of threads
+                     "OPENBLAS_NUM_THREADS"=>nothing,
+                     "GOTO_NUM_THREADS"=>nothing,
+                     "OMP_NUM_THREADS"=>nothing)
+        for n in 1:min(length(allowed_cpus), 8) # Cap to 8 to avoid too many tests on large systems
+            @test readchomp(setcpuaffinity(cmd, allowed_cpus[1:n])) == string(max(1, n ÷ 2))
+        end
+    end
+end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index e18e4ad8809fb..595f8991d58d7 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -1122,23 +1122,25 @@ end
 
 # issue #41546, thread-safe package loading
 @testset "package loading" begin
-    ch = Channel{Bool}(threadpoolsize())
+    ntasks = max(threadpoolsize(), 4)
+    ch = Channel{Bool}(ntasks)
     barrier = Base.Event()
     old_act_proj = Base.ACTIVE_PROJECT[]
     try
         pushfirst!(LOAD_PATH, "@")
         Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
         @sync begin
-            for _ in 1:threadpoolsize()
+            for _ in 1:ntasks
                 Threads.@spawn begin
                     put!(ch, true)
                     wait(barrier)
                     @eval using TestPkg
                 end
             end
-            for _ in 1:threadpoolsize()
+            for _ in 1:ntasks
                 take!(ch)
             end
+            close(ch)
             notify(barrier)
         end
         @test Base.root_module(@__MODULE__, :TestPkg) isa Module
@@ -1186,4 +1188,127 @@ end
 @testset "threadcall + threads" begin
     threadcall_threads() #Shouldn't crash!
 end
+
+@testset "Wait multiple tasks" begin
+    convert_tasks(t, x) = x
+    convert_tasks(::Set{Task}, x::Vector{Task}) = Set{Task}(x)
+    convert_tasks(::Tuple{Task}, x::Vector{Task}) = tuple(x...)
+
+    function create_tasks()
+        tasks = Task[]
+        event = Threads.Event()
+        push!(tasks,
+              Threads.@spawn begin
+                  sleep(0.01)
+              end)
+        push!(tasks,
+              Threads.@spawn begin
+                  sleep(0.02)
+              end)
+        push!(tasks,
+              Threads.@spawn begin
+                  wait(event)
+              end)
+        return tasks, event
+    end
+
+    function teardown(tasks, event)
+        notify(event)
+        waitall(resize!(tasks, 3), throw=true)
+    end
+
+    for tasks_type in (Vector{Task}, Set{Task}, Tuple{Task})
+        @testset "waitany" begin
+            @testset "throw=false" begin
+                tasks, event = create_tasks()
+                wait(tasks[1])
+                wait(tasks[2])
+                done,  pending = waitany(convert_tasks(tasks_type, tasks); throw=false)
+                @test length(done) == 2
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test length(pending) == 1
+                @test tasks[3] ∈ pending
+                teardown(tasks, event)
+            end
+
+            @testset "throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                @test_throws CompositeException begin
+                    waitany(convert_tasks(tasks_type, tasks); throw=true)
+                end
+
+                teardown(tasks, event)
+            end
+        end
+
+        @testset "waitall" begin
+            @testset "All tasks succeed" begin
+                tasks, event = create_tasks()
+
+                wait(tasks[1])
+                wait(tasks[2])
+                waiter = Threads.@spawn waitall(convert_tasks(tasks_type, tasks))
+                @test !istaskdone(waiter)
+
+                notify(event)
+                done, pending = fetch(waiter)
+                @test length(done) == 3
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test tasks[3] ∈ done
+                @test length(pending) == 0
+            end
+
+            @testset "failfast=true, throw=false" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                wait(tasks[1])
+                wait(tasks[2])
+                waiter = Threads.@spawn waitall(convert_tasks(tasks_type, tasks); failfast=true, throw=false)
+
+                done, pending = fetch(waiter)
+                @test length(done) == 3
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test tasks[4] ∈ done
+                @test length(pending) == 1
+                @test tasks[3] ∈ pending
+
+                teardown(tasks, event)
+            end
+
+            @testset "failfast=false, throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                notify(event)
+
+                @test_throws CompositeException begin
+                    waitall(convert_tasks(tasks_type, tasks); failfast=false, throw=true)
+                end
+
+                @test all(istaskdone.(tasks))
+
+                teardown(tasks, event)
+            end
+
+            @testset "failfast=true, throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                @test_throws CompositeException begin
+                    waitall(convert_tasks(tasks_type, tasks); failfast=true, throw=true)
+                end
+
+                @test !istaskdone(tasks[3])
+
+                teardown(tasks, event)
+            end
+        end
+    end
+end
 end # main testset
diff --git a/test/tuple.jl b/test/tuple.jl
index 4b0bfbbb3b055..355ad965f9584 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -209,6 +209,8 @@ end
 
     @test eachindex((2,5,"foo")) === Base.OneTo(3)
     @test eachindex((2,5,"foo"), (1,2,5,7)) === Base.OneTo(4)
+
+    @test Core.Compiler.is_nothrow(Base.infer_effects(iterate, (Tuple{Int,Int,Int}, Int)))
 end
 
 
@@ -530,6 +532,8 @@ end
     for n = 0:15
         @test ntuple(identity, Val(n)) == ntuple(identity, n)
     end
+
+    @test Base.infer_return_type(ntuple, Tuple{typeof(identity), Val}) == Tuple{Vararg{Int}}
 end
 
 struct A_15703{N}
@@ -652,6 +656,8 @@ end
 
     f() = Base.setindex((1:1, 2:2, 3:3), 9, 1)
     @test @inferred(f()) == (9, 2:2, 3:3)
+
+    @test Base.return_types(Base.setindex, Tuple{Tuple,Nothing,Int}) == [Tuple]
 end
 
 @testset "inferable range indexing with constant values" begin
@@ -829,4 +835,13 @@ end
     @test @inferred(Base.circshift(t3, 7)) == ('b', 'c', 'd', 'a')
     @test @inferred(Base.circshift(t3, -1)) == ('b', 'c', 'd', 'a')
     @test_throws MethodError circshift(t1, 'a')
+    @test Base.infer_return_type(circshift, Tuple{Tuple,Integer}) <: Tuple
+    @test Base.infer_return_type(circshift, Tuple{Tuple{Vararg{Any,10}},Integer}) <: Tuple{Vararg{Any,10}}
+    for len ∈ 0:5
+        v = 1:len
+        t = Tuple(v)
+        for shift ∈ -6:6
+            @test circshift(v, shift) == collect(circshift(t, shift))
+        end
+    end
 end
diff --git a/test/vecelement.jl b/test/vecelement.jl
index 6638f06f4f358..b89eb097ee560 100644
--- a/test/vecelement.jl
+++ b/test/vecelement.jl
@@ -1,5 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+using InteractiveUtils
 make_value(::Type{T}, i::Integer) where {T<:Integer} = 3*i%T
 make_value(::Type{T},i::Integer) where {T<:AbstractFloat} = T(3*i)
 
@@ -120,3 +120,9 @@ for T in (Float64, Float32, Int64, Int32)
         @test b == result
     end
 end
+@testset "vecelement overalignment" begin
+    io = IOBuffer()
+    code_llvm(io,getindex, (Array{NTuple{5, VecElement{Float64}}, 1}, Int64), optimize=false)
+    ir = String(take!(io))
+    @test match(r"align 64", ir) === nothing
+end
diff --git a/test/worlds.jl b/test/worlds.jl
index e9d8895fc90d4..268a6664571fb 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -257,11 +257,10 @@ end
 # avoid adding this to Base
 function equal(ci1::Core.CodeInfo, ci2::Core.CodeInfo)
     return ci1.code == ci2.code &&
-           ci1.codelocs == ci2.codelocs &&
+           ci1.debuginfo == ci2.debuginfo &&
            ci1.ssavaluetypes == ci2.ssavaluetypes &&
            ci1.ssaflags == ci2.ssaflags &&
            ci1.method_for_inference_limit_heuristics == ci2.method_for_inference_limit_heuristics &&
-           ci1.linetable == ci2.linetable &&
            ci1.slotnames == ci2.slotnames &&
            ci1.slotflags == ci2.slotflags &&
            ci1.slottypes == ci2.slottypes
@@ -478,3 +477,26 @@ Base.delete_method(fshadow_m2)
 @test Base.morespecific(fshadow_m2, fshadow_m1)
 @test Base.morespecific(fshadow_m3, fshadow_m1)
 @test !Base.morespecific(fshadow_m2, fshadow_m3)
+
+# Generated functions without edges must have min_world = 1.
+# N.B.: If changing this, move this test to precompile and make sure
+# that the specialization survives revalidation.
+function generated_no_edges_gen(world, args...)
+    src = ccall(:jl_new_code_info_uninit, Ref{Core.CodeInfo}, ())
+    src.code = Any[Core.ReturnNode(nothing)]
+    src.slotnames = Symbol[:self]
+    src.slotflags = UInt8[0x00]
+    src.ssaflags = UInt32[0x00]
+    src.ssavaluetypes = 1
+    src.nargs = 1
+    src.min_world = first(Base._methods(generated_no_edges, Tuple{}, -1, world)).method.primary_world
+
+    return src
+end
+
+@eval function generated_no_edges()
+    $(Expr(:meta, :generated, generated_no_edges_gen))
+    $(Expr(:meta, :generated_only))
+end
+
+@test_throws ErrorException("Generated function result with `edges == nothing` and `max_world == typemax(UInt)` must have `min_world == 1`") generated_no_edges()