Skip to content

Commit

Permalink
removed unnecessary synchronize statements and added Const to vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
anicusan committed Nov 3, 2024
1 parent 42b021e commit 56d9d23
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 56 deletions.
31 changes: 12 additions & 19 deletions src/traverse/traverse_pair_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,14 @@ function traverse_nodes_pair!(bvh1, bvh2, src::AbstractGPUVector, dst::AbstractG
)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[idst_offsets]
end


@kernel cpu=false inbounds=true function _traverse_nodes_pair_gpu!(
tree1, tree2,
nodes1, nodes2,
src, dst, num_src, dst_offsets, idst_offsets,
@Const(nodes1), @Const(nodes2),
@Const(src), dst, num_src, dst_offsets, idst_offsets,
num_skips1, num_skips2,
)
# Group (block) and local (thread) indices
Expand Down Expand Up @@ -147,15 +146,14 @@ function traverse_nodes_left!(bvh1, bvh2, src::AbstractGPUVector, dst::AbstractG
)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[idst_offsets]
end


@kernel cpu=false inbounds=true function _traverse_nodes_left_gpu!(
tree1, tree2,
nodes1, nodes2,
src, dst, num_src, dst_offsets, idst_offsets,
@Const(nodes1), @Const(nodes2),
@Const(src), dst, num_src, dst_offsets, idst_offsets,
num_skips1, num_skips2,
)
# Group (block) and local (thread) indices
Expand Down Expand Up @@ -250,15 +248,14 @@ function traverse_nodes_right!(bvh1, bvh2, src::AbstractGPUVector, dst::Abstract
)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[idst_offsets]
end


@kernel cpu=false inbounds=true function _traverse_nodes_right_gpu!(
tree1, tree2,
nodes1, nodes2,
src, dst, num_src, dst_offsets, idst_offsets,
@Const(nodes1), @Const(nodes2),
@Const(src), dst, num_src, dst_offsets, idst_offsets,
num_skips1, num_skips2,
)
# Group (block) and local (thread) indices
Expand Down Expand Up @@ -352,15 +349,14 @@ function traverse_nodes_leaves_left!(bvh1, bvh2, src::AbstractGPUVector, dst::Ab
)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[idst_offsets]
end


@kernel cpu=false inbounds=true function _traverse_nodes_leaves_left_gpu!(
tree1, tree2,
nodes1, leaves2, order2,
src, dst, num_src, dst_offsets, idst_offsets,
@Const(nodes1), @Const(leaves2), @Const(order2),
@Const(src), dst, num_src, dst_offsets, idst_offsets,
num_skips1, num_above2,
)
# Group (block) and local (thread) indices
Expand Down Expand Up @@ -456,15 +452,14 @@ function traverse_nodes_leaves_right!(bvh1, bvh2, src::AbstractGPUVector, dst::A
)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[idst_offsets]
end


@kernel cpu=false inbounds=true function _traverse_nodes_leaves_right_gpu!(
tree1, tree2,
leaves1, nodes2, order1,
src, dst, num_src, dst_offsets, idst_offsets,
@Const(leaves1), @Const(nodes2), @Const(order1),
@Const(src), dst, num_src, dst_offsets, idst_offsets,
num_above1, num_skips2,
)
# Group (block) and local (thread) indices
Expand Down Expand Up @@ -552,14 +547,13 @@ function traverse_leaves_pair!(bvh1, bvh2, src::AbstractGPUVector, contacts::Abs
)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[end]
end


@kernel cpu=false inbounds=true function _traverse_leaves_pair_gpu!(
leaves1, leaves2, order1, order2,
src, dst,
@Const(leaves1), @Const(leaves2), @Const(order1), @Const(order2),
@Const(src), dst,
num_src, dst_offsets,
num_above1, num_above2,
)
Expand Down Expand Up @@ -614,4 +608,3 @@ end
dst[offset + ithread] = temp[ithread]
end
end

73 changes: 36 additions & 37 deletions src/traverse/traverse_single_gpu.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
function traverse_nodes!(bvh, src::AbstractGPUVector, dst::AbstractGPUVector,
num_src, dst_offsets, level, self_checks, options)
# Traverse levels above leaves => no contacts, only further BVTT sprouting

# Compute number of virtual elements before this level to skip when computing the memory index
virtual_nodes_level = bvh.tree.virtual_leaves >> (bvh.tree.levels - (level - 1))
virtual_nodes_before = 2 * virtual_nodes_level - count_ones(virtual_nodes_level)

block_size = options.block_size
num_blocks = (num_src + block_size - 1) ÷ block_size
backend = get_backend(src)

kernel! = _traverse_nodes_gpu!(backend, block_size)
kernel!(bvh.tree, bvh.nodes, src, dst, num_src, dst_offsets, level, virtual_nodes_before, self_checks,
ndrange=num_blocks * block_size)

# We need to know how many checks we have written into dst
@allowscalar dst_offsets[level]
end




@kernel cpu=false inbounds=true function _traverse_nodes_gpu!(
tree, nodes,
src, dst,
tree, @Const(nodes),
@Const(src), dst,
num_src, dst_offsets, level,
num_skips, self_checks,
)
Expand Down Expand Up @@ -93,33 +116,31 @@ end



function traverse_nodes!(bvh, src::AbstractGPUVector, dst::AbstractGPUVector,
num_src, dst_offsets, level, self_checks, options)
# Traverse levels above leaves => no contacts, only further BVTT sprouting
function traverse_leaves!(bvh, src::AbstractGPUVector, contacts::AbstractGPUVector,
num_src, dst_offsets, options)
# Traverse final level, only doing leaf-leaf checks

# Compute number of virtual elements before this level to skip when computing the memory index
virtual_nodes_level = bvh.tree.virtual_leaves >> (bvh.tree.levels - (level - 1))
virtual_nodes_before = 2 * virtual_nodes_level - count_ones(virtual_nodes_level)
# Number of implicit indices above leaf-level
num_skips = pow2(bvh.tree.levels - 1) - 1

block_size = options.block_size
num_blocks = (num_src + block_size - 1) ÷ block_size
backend = get_backend(src)

kernel! = _traverse_nodes_gpu!(backend, block_size)
kernel!(bvh.tree, bvh.nodes, src, dst, num_src, dst_offsets, level, virtual_nodes_before, self_checks,
kernel! = _traverse_leaves_gpu!(backend, block_size)
kernel!(bvh.leaves, bvh.order, src, contacts, num_src, dst_offsets, num_skips,
ndrange=num_blocks * block_size)

# We need to know how many checks we have written into dst
synchronize(backend)
@allowscalar dst_offsets[level]
# We need to know how many pairs we have written into contacts
@allowscalar dst_offsets[end]
end




@kernel cpu=false inbounds=true function _traverse_leaves_gpu!(
leaves, order,
src, dst,
@Const(leaves), @Const(order),
@Const(src), dst,
num_src, dst_offsets,
num_skips,
)
Expand Down Expand Up @@ -178,25 +199,3 @@ end
dst[offset + ithread] = temp[ithread]
end
end


function traverse_leaves!(bvh, src::AbstractGPUVector, contacts::AbstractGPUVector,
num_src, dst_offsets, options)
# Traverse final level, only doing leaf-leaf checks

# Number of implicit indices above leaf-level
num_skips = pow2(bvh.tree.levels - 1) - 1

block_size = options.block_size
num_blocks = (num_src + block_size - 1) ÷ block_size
backend = get_backend(src)

kernel! = _traverse_leaves_gpu!(backend, block_size)
kernel!(bvh.leaves, bvh.order, src, contacts, num_src, dst_offsets, num_skips,
ndrange=num_blocks * block_size)

# We need to know how many pairs we have written into contacts
synchronize(backend)
@allowscalar dst_offsets[end]
end

0 comments on commit 56d9d23

Please sign in to comment.