diff --git a/src/cunumeric/index/select.cu b/src/cunumeric/index/select.cu index e99fac0e9..ceaa8259c 100644 --- a/src/cunumeric/index/select.cu +++ b/src/cunumeric/index/select.cu @@ -86,6 +86,7 @@ struct SelectImplBody { const size_t blocks = (out_size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; auto stream = get_cached_stream(); + if (dense && (DIM <= 1 || rect.volume() == 0)) { auto cond_arr = create_buffer(condlist.size(), legate::Memory::Kind::Z_COPY_MEM); for (uint32_t idx = 0; idx < condlist.size(); ++idx) cond_arr[idx] = condlist[idx].ptr(rect); @@ -93,17 +94,19 @@ struct SelectImplBody { create_buffer(choicelist.size(), legate::Memory::Kind::Z_COPY_MEM); for (uint32_t idx = 0; idx < choicelist.size(); ++idx) choice_arr[idx] = choicelist[idx].ptr(rect); + VAL* outptr = out.ptr(rect); select_kernel_dense<<>>( outptr, narrays, cond_arr, choice_arr, default_val, out_size); - } else { + + } else { // not dense auto cond_arr = create_buffer>(condlist.size(), legate::Memory::Kind::Z_COPY_MEM); for (uint32_t idx = 0; idx < condlist.size(); ++idx) cond_arr[idx] = condlist[idx]; - auto choice_arr = create_buffer>(choicelist.size(), legate::Memory::Kind::Z_COPY_MEM); for (uint32_t idx = 0; idx < choicelist.size(); ++idx) choice_arr[idx] = choicelist[idx]; + if (out_size == 0) return; select_kernel<<>>( out, narrays, cond_arr, choice_arr, default_val, rect, pitches, out_size, rect.volume());