Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use SIMD.jl directly instead of LV.jl for fast_findmin() #84

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,31 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c"
LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3"
MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"
StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"

[weakdeps]
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
EDM4hep = "eb32b910-dde9-4347-8fce-cd6be3498f0c"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"

[extensions]
JetVisualisation = "Makie"
EDM4hepJets = "EDM4hep"
JetVisualisation = "Makie"

[compat]
Accessors = "0.1.36"
CodecZlib = "0.7.4"
EDM4hep = "0.4.0"
EnumX = "1.0.4"
JSON = "0.21.4"
LoopVectorization = "0.12.170"
LorentzVectorHEP = "0.1.6"
LorentzVectors = "0.4.3"
Makie = "0.20, 0.21"
MuladdMacro = "0.2.4"
SIMD = "3.6"
StructArrays = "0.6.18"
julia = "1.9"

Expand Down
1 change: 1 addition & 0 deletions src/JetReconstruction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module JetReconstruction
using LorentzVectorHEP
using MuladdMacro
using StructArrays
using SIMD

# Import from LorentzVectorHEP methods for those 4-vector types
pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p)
Expand Down
2 changes: 0 additions & 2 deletions src/PlainAlgo.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using LoopVectorization

"""
dist(i, j, rapidity_array, phi_array)

Expand Down
1 change: 0 additions & 1 deletion src/TiledAlgoLL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

using Logging
using Accessors
using LoopVectorization

# Include struct definitions and basic operations
include("TiledAlgoLLStructs.jl")
Expand Down
46 changes: 35 additions & 11 deletions src/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,7 @@ end
"""
fast_findmin(dij, n)

Find the minimum value and its index in the first `n` elements of the `dij`
array. The use of `@turbo` macro gives a significant performance boost.
Find the minimum value and its index in the first `n` elements of the `dij` array.

# Arguments
- `dij`: An array of values.
Expand All @@ -133,14 +132,39 @@ array. The use of `@turbo` macro gives a significant performance boost.
- `dij_min`: The minimum value in the first `n` elements of the `dij` array.
- `best`: The index of the minimum value in the `dij` array.
"""
fast_findmin(dij, n) = begin
# findmin(@inbounds @view dij[1:n])
best = 1
@inbounds dij_min = dij[1]
@turbo for here in 2:n
newmin = dij[here] < dij_min
best = newmin ? here : best
dij_min = newmin ? dij[here] : dij_min
function fast_findmin(dij::DenseVector{T}, n) where {T}
laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8))
minvals = SIMD.Vec{8, T}(Inf)
min_indices = SIMD.Vec{8, Int}(0)

n_batches, remainder = divrem(n, 8)
lane = VecRange{8}(0)
i = 1
@inbounds @fastmath for _ in 1:n_batches
dijs = dij[lane + i]
predicate = dijs < minvals
minvals = vifelse(predicate, dijs, minvals)
min_indices = vifelse(predicate, laneIndices, min_indices)

i += 8
laneIndices += 8
end
dij_min, best

min_value = SIMD.minimum(minvals)
min_index = @inbounds min_value == minvals[1] ? min_indices[1] :
min_value == minvals[2] ? min_indices[2] :
min_value == minvals[3] ? min_indices[3] :
min_value == minvals[4] ? min_indices[4] :
min_value == minvals[5] ? min_indices[5] :
min_value == minvals[6] ? min_indices[6] :
min_value == minvals[7] ? min_indices[7] : min_indices[8]

@inbounds @fastmath for _ in 1:remainder
xi = dij[i]
pred = dij[i] < min_value
min_value = ifelse(pred, xi, min_value)
min_index = ifelse(pred, i, min_index)
i += 1
end
return min_value, min_index
end
Loading