Skip to content

Commit

Permalink
optimizer: Julia-level escape analysis (#43800)
Browse files Browse the repository at this point in the history
This commit ports [EscapeAnalysis.jl](https://github.com/aviatesk/EscapeAnalysis.jl) into Julia base.
You can find the documentation of this escape analysis at [this GitHub page](https://aviatesk.github.io/EscapeAnalysis.jl/dev/)[^1].

[^1]: The same documentation will be included into Julia's developer
      documentation by this commit.

This escape analysis will hopefully be an enabling technology for various
memory-related optimizations at Julia's high level compilation pipeline.
Possible target optimization includes alias aware SROA (#43888),
array SROA (#43909), `mutating_arrayfreeze` optimization (#42465),
stack allocation of mutables, finalizer elision and so on[^2].

[^2]: It would be also interesting if LLVM-level optimizations can consume
      IPO information derived by this escape analysis to broaden
      optimization possibilities.

The primary motivation for porting EA by this PR is to check its impact
on latency as well as to get feedbacks from a broader range of developers.
The plan is that we first introduce EA to Julia Base by this commit, and
then merge the depending PRs built on top of this commit later.

This commit simply defines EA inside Julia base compiler and enables the
existing test suite with it. In this commit we don't run EA at all, and
so this commit shouldn't affect Julia-level compilation latency.

In the depending PRs, EA will run in two stages:
- `IPO EA`: run EA on pre-inlining state to generate IPO-valid cache
- `Local EA`: run EA on post-inlining state to generate local escape
              information used for various optimizations

In order to integrate `IPO EA` with our compilation cache system,
this commit also implements a new `CodeInstance.argescapes` field that
keeps the IPO-valid cache generated by `IPO EA`.
  • Loading branch information
aviatesk authored Feb 16, 2022
1 parent 2d1ea3c commit 8207f5d
Show file tree
Hide file tree
Showing 24 changed files with 5,690 additions and 96 deletions.
75 changes: 41 additions & 34 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -394,40 +394,47 @@ struct VecElement{T}
end
VecElement(arg::T) where {T} = VecElement{T}(arg)

_new(typ::Symbol, argty::Symbol) = eval(Core, :($typ(@nospecialize n::$argty) = $(Expr(:new, typ, :n))))
_new(:GotoNode, :Int)
_new(:NewvarNode, :SlotNumber)
_new(:QuoteNode, :Any)
_new(:SSAValue, :Int)
_new(:Argument, :Int)
_new(:ReturnNode, :Any)
eval(Core, :(ReturnNode() = $(Expr(:new, :ReturnNode)))) # unassigned val indicates unreachable
eval(Core, :(GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))))
eval(Core, :(LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))))
eval(Core, :(LineNumberNode(l::Int, @nospecialize(f)) = $(Expr(:new, :LineNumberNode, :l, :f))))
LineNumberNode(l::Int, f::String) = LineNumberNode(l, Symbol(f))
eval(Core, :(GlobalRef(m::Module, s::Symbol) = $(Expr(:new, :GlobalRef, :m, :s))))
eval(Core, :(SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))))
eval(Core, :(TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))))
eval(Core, :(PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))))
eval(Core, :(PiNode(val, typ) = $(Expr(:new, :PiNode, :val, :typ))))
eval(Core, :(PhiCNode(values::Array{Any, 1}) = $(Expr(:new, :PhiCNode, :values))))
eval(Core, :(UpsilonNode(val) = $(Expr(:new, :UpsilonNode, :val))))
eval(Core, :(UpsilonNode() = $(Expr(:new, :UpsilonNode))))
eval(Core, :(LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int, inlined_at::Int) =
$(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))))
eval(Core, :(CodeInstance(mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
@nospecialize(inferred), const_flags::Int32,
min_world::UInt, max_world::UInt, ipo_effects::UInt8, effects::UInt8,
relocatability::UInt8) =
ccall(:jl_new_codeinst, Ref{CodeInstance}, (Any, Any, Any, Any, Int32, UInt, UInt, UInt8, UInt8, UInt8),
mi, rettype, inferred_const, inferred, const_flags, min_world, max_world, ipo_effects, effects, relocatability)))
eval(Core, :(Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))))
eval(Core, :(PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))))
eval(Core, :(PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source::Method) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))))
eval(Core, :(InterConditional(slot::Int, @nospecialize(vtype), @nospecialize(elsetype)) = $(Expr(:new, :InterConditional, :slot, :vtype, :elsetype))))
eval(Core, :(MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) =
$(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))))
eval(Core, quote
GotoNode(label::Int) = $(Expr(:new, :GotoNode, :label))
NewvarNode(slot::SlotNumber) = $(Expr(:new, :NewvarNode, :slot))
QuoteNode(@nospecialize value) = $(Expr(:new, :QuoteNode, :value))
SSAValue(id::Int) = $(Expr(:new, :SSAValue, :id))
Argument(n::Int) = $(Expr(:new, :Argument, :n))
ReturnNode(@nospecialize val) = $(Expr(:new, :ReturnNode, :val))
ReturnNode() = $(Expr(:new, :ReturnNode)) # unassigned val indicates unreachable
GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))
LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))
function LineNumberNode(l::Int, @nospecialize(f))
isa(f, String) && (f = Symbol(f))
return $(Expr(:new, :LineNumberNode, :l, :f))
end
LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int, inlined_at::Int) =
$(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
GlobalRef(m::Module, s::Symbol) = $(Expr(:new, :GlobalRef, :m, :s))
SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))
PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
PhiCNode(values::Array{Any, 1}) = $(Expr(:new, :PhiCNode, :values))
UpsilonNode(@nospecialize(val)) = $(Expr(:new, :UpsilonNode, :val))
UpsilonNode() = $(Expr(:new, :UpsilonNode))
function CodeInstance(
mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
@nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
ipo_effects::UInt8, effects::UInt8, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
relocatability::UInt8)
return ccall(:jl_new_codeinst, Ref{CodeInstance},
(Any, Any, Any, Any, Int32, UInt, UInt, UInt8, UInt8, Any, UInt8),
mi, rettype, inferred_const, inferred, const_flags, min_world, max_world,
ipo_effects, effects, argescapes,
relocatability)
end
Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))
PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))
PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source::Method) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))
InterConditional(slot::Int, @nospecialize(vtype), @nospecialize(elsetype)) = $(Expr(:new, :InterConditional, :slot, :vtype, :elsetype))
MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
end)

Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)

Expand Down
10 changes: 8 additions & 2 deletions base/compiler/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ let
world = get_world_counter()
interp = NativeInterpreter(world)

analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, Bool, typeof(null_escape_cache)}
fs = Any[
# we first create caches for the optimizer, because they contain many loop constructions
# and they're better to not run in interpreter even during bootstrapping
run_passes,
#=analyze_escapes_tt,=# run_passes,
# then we create caches for inference entries
typeinf_ext, typeinf, typeinf_edge,
]
Expand All @@ -32,7 +33,12 @@ let
end
starttime = time()
for f in fs
for m in _methods_by_ftype(Tuple{typeof(f), Vararg{Any}}, 10, typemax(UInt))
if isa(f, DataType) && f.name === typename(Tuple)
tt = f
else
tt = Tuple{typeof(f), Vararg{Any}}
end
for m in _methods_by_ftype(tt, 10, typemax(UInt))
# remove any TypeVars from the intersection
typ = Any[m.spec_types.parameters...]
for i = 1:length(typ)
Expand Down
2 changes: 2 additions & 0 deletions base/compiler/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ ntuple(f, n) = (Any[f(i) for i = 1:n]...,)

# core docsystem
include("docs/core.jl")
import Core.Compiler.CoreDocs
Core.atdoc!(CoreDocs.docm)

# sorting
function sort end
Expand Down
94 changes: 60 additions & 34 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,35 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

#############
# constants #
#############

# The slot has uses that are not statically dominated by any assignment
# This is implied by `SLOT_USEDUNDEF`.
# If this is not set, all the uses are (statically) dominated by the defs.
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
const SLOT_STATICUNDEF = 1 # slot might be used before it is defined (structurally)
const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
# const SLOT_CALLED = 64

# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c

const IR_FLAG_NULL = 0x00
# This statement is marked as @inbounds by user.
# Ff replaced by inlining, any contained boundschecks may be removed.
const IR_FLAG_INBOUNDS = 0x01 << 0
# This statement is marked as @inline by user
const IR_FLAG_INLINE = 0x01 << 1
# This statement is marked as @noinline by user
const IR_FLAG_NOINLINE = 0x01 << 2
const IR_FLAG_THROW_BLOCK = 0x01 << 3
# This statement may be removed if its result is unused. In particular it must
# thus be both pure and effect free.
const IR_FLAG_EFFECT_FREE = 0x01 << 4

const TOP_TUPLE = GlobalRef(Core, :tuple)

#####################
# OptimizationState #
#####################
Expand All @@ -21,10 +51,10 @@ function push!(et::EdgeTracker, ci::CodeInstance)
push!(et, ci.def)
end

struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter}
struct InliningState{S <: Union{EdgeTracker, Nothing}, MICache, I<:AbstractInterpreter}
params::OptimizationParams
et::S
mi_cache::T
mi_cache::MICache # TODO move this to `OptimizationState` (as used by EscapeAnalysis as well)
interp::I
end

Expand Down Expand Up @@ -121,36 +151,6 @@ function ir_to_codeinf!(opt::OptimizationState)
return src
end

#############
# constants #
#############

# The slot has uses that are not statically dominated by any assignment
# This is implied by `SLOT_USEDUNDEF`.
# If this is not set, all the uses are (statically) dominated by the defs.
# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
const SLOT_STATICUNDEF = 1 # slot might be used before it is defined (structurally)
const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
# const SLOT_CALLED = 64

# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c

const IR_FLAG_NULL = 0x00
# This statement is marked as @inbounds by user.
# Ff replaced by inlining, any contained boundschecks may be removed.
const IR_FLAG_INBOUNDS = 0x01 << 0
# This statement is marked as @inline by user
const IR_FLAG_INLINE = 0x01 << 1
# This statement is marked as @noinline by user
const IR_FLAG_NOINLINE = 0x01 << 2
const IR_FLAG_THROW_BLOCK = 0x01 << 3
# This statement may be removed if its result is unused. In particular it must
# thus be both pure and effect free.
const IR_FLAG_EFFECT_FREE = 0x01 << 4

const TOP_TUPLE = GlobalRef(Core, :tuple)

#########
# logic #
#########
Expand Down Expand Up @@ -502,11 +502,37 @@ end
# run the optimization work
function optimize(interp::AbstractInterpreter, opt::OptimizationState,
params::OptimizationParams, caller::InferenceResult)
@timeit "optimizer" ir = run_passes(opt.src, opt)
@timeit "optimizer" ir = run_passes(opt.src, opt, caller)
return finish(interp, opt, params, ir, caller)
end

function run_passes(ci::CodeInfo, sv::OptimizationState)
using .EscapeAnalysis
import .EscapeAnalysis: EscapeState, ArgEscapeCache, is_ipo_profitable

"""
cache_escapes!(caller::InferenceResult, estate::EscapeState)
Transforms escape information of call arguments of `caller`,
and then caches it into a global cache for later interprocedural propagation.
"""
cache_escapes!(caller::InferenceResult, estate::EscapeState) =
caller.argescapes = ArgEscapeCache(estate)

function ipo_escape_cache(mi_cache::MICache) where MICache
return function (linfo::Union{InferenceResult,MethodInstance})
if isa(linfo, InferenceResult)
argescapes = linfo.argescapes
else
codeinst = get(mi_cache, linfo, nothing)
isa(codeinst, CodeInstance) || return nothing
argescapes = codeinst.argescapes
end
return argescapes !== nothing ? argescapes::ArgEscapeCache : nothing
end
end
null_escape_cache(linfo::Union{InferenceResult,MethodInstance}) = nothing

function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult)
@timeit "convert" ir = convert_to_ircode(ci, sv)
@timeit "slot2reg" ir = slot2reg(ir, ci, sv)
# TODO: Domsorting can produce an updated domtree - no need to recompute here
Expand Down
Loading

0 comments on commit 8207f5d

Please sign in to comment.