From 23c4276d017c2861fbd42a1eb71a279fa59189a9 Mon Sep 17 00:00:00 2001 From: "Ryan D. Friese" Date: Sat, 26 Aug 2023 13:40:31 -0700 Subject: [PATCH] batched operations use variable sized indicies based on len of array --- Cargo.toml | 2 +- impl/src/array_ops.rs | 1040 +++------- src/array.rs | 51 +- src/array/atomic/operations.rs | 40 - src/array/generic_atomic.rs | 141 +- src/array/generic_atomic/iteration.rs | 81 +- src/array/generic_atomic/operations.rs | 175 -- src/array/global_lock_atomic.rs | 40 +- src/array/global_lock_atomic/iteration.rs | 83 +- src/array/global_lock_atomic/operations.rs | 175 -- src/array/iterator/consumer.rs | 85 +- src/array/iterator/distributed_iterator.rs | 66 +- .../distributed_iterator/consumer/collect.rs | 155 +- .../distributed_iterator/consumer/for_each.rs | 102 +- src/array/iterator/local_iterator.rs | 123 +- .../local_iterator/consumer/collect.rs | 88 +- .../iterator/local_iterator/consumer/count.rs | 63 +- .../local_iterator/consumer/for_each.rs | 90 +- .../local_iterator/consumer/reduce.rs | 358 +--- .../iterator/local_iterator/consumer/sum.rs | 28 +- src/array/iterator/mod.rs | 2 - src/array/local_lock_atomic.rs | 36 +- src/array/local_lock_atomic/iteration.rs | 87 +- src/array/local_lock_atomic/operations.rs | 171 -- src/array/native_atomic.rs | 39 +- src/array/native_atomic/iteration.rs | 83 +- src/array/native_atomic/operations.rs | 56 - src/array/operations.rs | 1264 +++++------- src/array/operations/access.rs | 37 +- src/array/operations/arithmetic.rs | 170 +- src/array/operations/bitwise.rs | 102 +- src/array/operations/compare_exchange.rs | 27 +- src/array/operations/read_only.rs | 21 +- src/array/operations/shift.rs | 72 +- src/array/read_only.rs | 40 +- src/array/read_only/iteration.rs | 81 +- src/array/unsafe.rs | 92 +- src/array/unsafe/iteration/local.rs | 89 +- src/array/unsafe/operations.rs | 1708 ++++++----------- src/darc.rs | 8 +- src/lamellae/command_queues.rs | 15 +- src/lamellar_task_group.rs | 124 +- 42 files changed, 2541 insertions(+), 4769 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1390e779..a969c6db 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ rand = "0.8.5" parking_lot = {version = "0.12.1", features = ["arc_lock", "send_guard", "serde"] } indexmap = "1.9.1" #lamellar_alloc core_affinity = "0.5.10" -log = "0.4.17" +log = "0.4.19" simple_logger = "4.0.0" async-task = "4.3.0" async-trait = "0.1.58" diff --git a/impl/src/array_ops.rs b/impl/src/array_ops.rs index 4a02140e..d1701966 100644 --- a/impl/src/array_ops.rs +++ b/impl/src/array_ops.rs @@ -195,681 +195,14 @@ fn native_atomic_slice( } } -fn create_buf_ops( - typeident: syn::Type, - array_type: syn::Ident, - byte_array_type: syn::Ident, - optypes: &Vec, - rt: bool, -) -> proc_macro2::TokenStream { - let lamellar = if rt { - quote::format_ident!("crate") - } else { - quote::format_ident!("__lamellar") - }; - - let (am_data, am): (syn::Path, syn::Path) = if rt { - ( - syn::parse("lamellar_impl::AmDataRT".parse().unwrap()).unwrap(), - syn::parse("lamellar_impl::rt_am".parse().unwrap()).unwrap(), - ) - } else { - ( - syn::parse("lamellar::AmData".parse().unwrap()).unwrap(), - syn::parse("lamellar::am".parse().unwrap()).unwrap(), - ) - }; - - let res_t = quote! { - let res_t = unsafe{std::slice::from_raw_parts_mut(results_u8.as_mut_ptr().offset(results_offset as isize) as *mut #typeident,1)}; - results_offset += std::mem::size_of::<#typeident>(); - }; - let mut expanded = quote! {}; - let ( - lhs, - assign, - fetch_add, - fetch_sub, - fetch_mul, - fetch_div, - fetch_rem, - fetch_and, - fetch_or, - fetch_xor, - load, - swap, - compare_exchange, - compare_exchange_eps, - shl, - fetch_shl, - shr, - fetch_shr, - ) = if array_type == "NativeAtomicArray" { - let (_slice, val) = native_atomic_slice(&typeident, &lamellar); - ( - quote! { #val }, //lhs - quote! {slice[index].store(val, Ordering::SeqCst)}, //assign - quote! {#res_t res_t[0] = slice[index].fetch_add(val, Ordering::SeqCst);}, //fetch_add - quote! {#res_t res_t[0] = slice[index].fetch_sub(val, Ordering::SeqCst);}, //fetch_sub - quote! { //fetch_mul - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old * val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old * val; - } - #res_t res_t[0] = old; - }, - quote! { //fetch_div - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old / val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old / val; - } - #res_t res_t[0] = old; - }, - quote! { //fetch_rem - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old % val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old % val; - } - #res_t res_t[0] = old; - }, - quote! {#res_t res_t[0] = slice[index].fetch_and(val, Ordering::SeqCst);}, //fetch_and - quote! {#res_t res_t[0] = slice[index].fetch_or(val, Ordering::SeqCst);}, //fetch_or - quote! {#res_t res_t[0] = slice[index].fetch_xor(val, Ordering::SeqCst);}, //fetch_or - quote! {slice[index].load(Ordering::SeqCst)}, //load - quote! { //swap - let mut old = slice[index].load(Ordering::SeqCst); - while slice[index].compare_exchange(old, val, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - } - #res_t res_t[0] = old; - }, - quote! { //compare_exchange - let old = match slice[index].compare_exchange(old, val, Ordering::SeqCst, Ordering::SeqCst) { - Ok(old) => { - results_u8[results_offset] = 0; - results_offset+=1; - old - }, - Err(old) => { - results_u8[results_offset] = 1; - results_offset+=1; - old - }, - }; - #res_t res_t[0] = old; - }, - quote! { //compare exchange epsilon - let old = match slice[index].compare_exchange(old, val, Ordering::SeqCst, Ordering::SeqCst) { - Ok(orig) => { //woohoo dont need to do worry about the epsilon - results_u8[results_offset] = 0; - results_offset+=1; - val - }, - Err(orig) => { //we dont match exactly, so we need to do the epsilon check - let mut done = false; - let mut orig = orig; - while (orig.abs_diff(old) as #typeident) < eps && !done{ //keep trying while under epsilon - orig = match slice[index].compare_exchange(orig, val, Ordering::SeqCst, Ordering::SeqCst) { - Ok(old_val) => { //we did it! - done = true; - old_val - }, - Err(old_val) => { //someone else exchanged first! - old_val - }, - } - } - if done{ - results_u8[results_offset] = 0; - results_offset+=1; - } - else{ - results_u8[results_offset] = 1; - results_offset+=1; - } - orig - }, - }; - #res_t res_t[0] = old; - }, - quote! { //shl - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old << val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old << val; - } - }, - quote! { //fetch_shl - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old << val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old << val; - } - #res_t res_t[0] = old; - }, - quote! { //shr - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old >> val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old >> val; - } - }, - quote! { //fetch_shr - let mut old = slice[index].load(Ordering::SeqCst); - let mut new = old >> val; - while slice[index].compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst).is_err() { - std::thread::yield_now(); - old = slice[index].load(Ordering::SeqCst); - new = old >> val; - } - #res_t res_t[0] = old; - }, - ) - } else if array_type == "ReadOnlyArray" { - ( - quote! { panic!("assign a valid op for Read Only Arrays");}, //lhs - quote! { panic!("assign/store not a valid op for Read Only Arrays");}, //assign - quote! { panic!("fetch_add not a valid op for Read Only Arrays"); }, //fetch_add -- we lock the index before this point so its actually atomic - quote! { panic!("fetch_sub not a valid op for Read Only Arrays"); }, //fetch_sub --we lock the index before this point so its actually atomic - quote! { panic!("fetch_mul not a valid op for Read Only Arrays"); }, //fetch_mul --we lock the index before this point so its actually atomic - quote! { panic!("fetch_div not a valid op for Read Only Arrays"); }, //fetch_div --we lock the index before this point so its actually atomic - quote! { panic!("fetch_rem not a valid op for Read Only Arrays"); }, //fetch_rem --we lock the index before this point so its actually atomic - quote! { panic!("fetch_and not a valid op for Read Only Arrays"); }, //fetch_and --we lock the index before this point so its actually atomic - quote! { panic!("fetch_or not a valid op for Read Only Arrays"); }, //fetch_or --we lock the index before this point so its actually atomic - quote! { panic!("fetch_xor not a valid op for Read Only Arrays"); }, //fetch_xor --we lock the index before this point so its actually atomic - quote! {slice[index]}, //load - quote! { panic!("swap not a valid op for Read Only Arrays"); }, //swap we lock the index before this point so its actually atomic - quote! { panic!("compare exchange not a valid op for Read Only Arrays"); }, // compare_exchange -- we lock the index before this point so its actually atomic - quote! { panic!("compare exchange eps not a valid op for Read Only Arrays"); }, //compare exchange epsilon - quote! { panic!("shl not a valid op for Read Only Arrays"); }, //shl - quote! { panic!("fetch_shl not a valid op for Read Only Arrays"); }, //fetch_shl - quote! { panic!("shr not a valid op for Read Only Arrays"); }, //shr - quote! { panic!("fetch_shr not a valid op for Read Only Arrays"); }, //fetch_shr - ) - } else { - ( - quote! {slice[index]}, //lhs - quote! {slice[index] = val}, //assign - quote! {#res_t res_t[0] = slice[index]; slice[index] += val; }, //fetch_add -- we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] -= val; }, //fetch_sub --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] *= val; }, //fetch_mul --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] /= val; }, //fetch_div --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] %= val; }, //fetch_rem --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] &= val; }, //fetch_and --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] |= val; }, //fetch_or --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] ^= val; }, //fetch_xor --we lock the index before this point so its actually atomic - quote! {slice[index]}, //load - quote! {#res_t res_t[0] = slice[index]; slice[index] = val; }, //swap we lock the index before this point so its actually atomic - quote! { // compare_exchange -- we lock the index before this point so its actually atomic - let old = if old == slice[index]{ - slice[index] = val; - results_u8[results_offset] = 0; - results_offset+=1; - old - } else { - results_u8[results_offset] = 1; - results_offset+=1; - slice[index] - }; - #res_t res_t[0] = old; - }, - quote! { //compare exchange epsilon - let same = if old > slice[index] { - old - slice[index] < eps - } - else{ - slice[index] - old < eps - }; - let old = if same { - slice[index] = val; - results_u8[results_offset] = 0; - results_offset+=1; - old - } else { - results_u8[results_offset] = 1; - results_offset+=1; - slice[index] - }; - #res_t res_t[0] = old; - }, - quote! { slice[index] <<= val; }, //shl --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] <<= val; }, //fetch_shl --we lock the index before this point so its actually atomic - quote! { slice[index] >>= val; }, //shr --we lock the index before this point so its actually atomic - quote! {#res_t res_t[0] = slice[index]; slice[index] >>= val; }, //fetch_shr --we lock the index before this point so its actually atomic - ) - }; - let (lock, slice) = if array_type == "GenericAtomicArray" { - ( - quote! {let _lock = self.data.lock_index(index);}, - quote! {let mut slice = unsafe{self.data.__local_as_mut_slice()};}, - ) - } else if array_type == "NativeAtomicArray" { - let (slice, _val) = native_atomic_slice(&typeident, &lamellar); - ( - quote! {}, //no lock since its native atomic - quote! { #slice }, - ) - } else if array_type == "LocalLockArray" { - ( - quote! {}, //no explicit lock since the slice handle is a lock guard - quote! {let mut slice = self.data.write_local_data();}, //this is the lock - ) - } else if array_type == "GlobalLockArray" { - ( - quote! {}, //no explicit lock since the slice handle is a lock guard - quote! {let mut slice = self.data.async_write_local_data().await;}, //this is the lock - ) - } else if array_type == "ReadOnlyArray" { - ( - quote! {}, //no explicit lock since the slice handle is a lock guard - quote! {let slice = self.data.local_data();}, //this is the lock - ) - } else { - ( - quote! {}, //no lock cause either readonly or unsafe - quote! {let mut slice = unsafe{self.data.mut_local_data()};}, - ) - }; - - let mut match_stmts = quote! {}; - for optype in optypes { - match optype { - OpType::Arithmetic => match_stmts.extend(quote! { - ArrayOpCmd::Add=>{ #lhs += val }, - ArrayOpCmd::FetchAdd=> { - #fetch_add - }, - ArrayOpCmd::Sub=>{#lhs -= val}, - ArrayOpCmd::FetchSub=>{ - #fetch_sub - }, - ArrayOpCmd::Mul=>{ #lhs *= val}, - ArrayOpCmd::FetchMul=>{ - #fetch_mul - }, - ArrayOpCmd::Div=>{ #lhs /= val }, - ArrayOpCmd::FetchDiv=>{ - #fetch_div - }, - ArrayOpCmd::Rem=>{ #lhs %= val }, - ArrayOpCmd::FetchRem=>{ - #fetch_rem - }, - ArrayOpCmd::Put => {#assign}, - ArrayOpCmd::Get =>{ - #res_t res_t[0] = #load; - - } - }), - OpType::Bitwise => match_stmts.extend(quote! { - ArrayOpCmd::And=>{#lhs &= val}, - ArrayOpCmd::FetchAnd=>{ - #fetch_and - }, - ArrayOpCmd::Or=>{#lhs |= val}, - ArrayOpCmd::FetchOr=>{ - #fetch_or - }, - ArrayOpCmd::Xor=>{#lhs ^= val}, - ArrayOpCmd::FetchXor=>{ - #fetch_xor - }, - }), - OpType::Access => match_stmts.extend(quote! { - ArrayOpCmd::Store=>{ - #assign - }, - ArrayOpCmd::Swap=>{ - #swap - }, - }), - OpType::CompEx => match_stmts.extend(quote! { - ArrayOpCmd::CompareExchange(old) =>{ - #compare_exchange - } - }), - OpType::CompExEps => match_stmts.extend(quote! { - ArrayOpCmd::CompareExchangeEps(old,eps) =>{ - #compare_exchange_eps - } - }), - OpType::ReadOnly => match_stmts.extend(quote! { - ArrayOpCmd::Load=>{ - #res_t res_t[0] = #load; - }, - }), - OpType::Shift => match_stmts.extend(quote! { - ArrayOpCmd::Shl=>{#shl}, - ArrayOpCmd::FetchShl=>{ - #fetch_shl - }, - ArrayOpCmd::Shr=>{#shr}, - ArrayOpCmd::FetchShr=>{ - #fetch_shr - }, - }), - } - } - - let buf_op_name = quote::format_ident!("{}_{}_op_buf", array_type, type_to_string(&typeident)); - let am_buf_name = quote::format_ident!("{}_{}_am_buf", array_type, type_to_string(&typeident)); - let dist_am_buf_name = - quote::format_ident!("{}_{}_am_buf", array_type, type_to_string(&typeident)); - let reg_name = quote::format_ident!("{}OpBuf", array_type); - - let inner_op = quote! { - let index = *index; - let val = *val; - #lock //this will get dropped at end of loop - let orig = #load; - - match op{ - # match_stmts - _ => {panic!("shouldnt happen {:?}",op)} - } - }; - - expanded.extend(quote! { - struct #buf_op_name{ - data: #lamellar::array::#byte_array_type, - ops: Mutex,usize)>>, - // new_ops: Mutex,usize)>>, - cur_len: AtomicUsize, //this could probably just be a normal usize cause we only update it after we get ops lock - complete: RwLock>, - results_offset: RwLock>, - results: RwLock, - } - #[#am_data(Debug,AmGroup(false))] - struct #am_buf_name{ - data: #lamellar::array::#array_type<#typeident>, - // ops: Vec<(ArrayOpCmd<#typeident>,#lamellar::array::OpAmInputToValue<#typeident>)>, - ops: Vec, - // new_ops: Vec>, - // ops2: OneSidedMemoryRegion, - res_buf_size: usize, - orig_pe: usize, - } - impl #lamellar::array::BufferOp for #buf_op_name{ - // #[#lamellar::tracing::instrument(skip_all)] - fn add_ops(&self, op_ptr: *const u8, op_data_ptr: *const u8, team: Pin>) -> (bool,Arc){ - // let span1 = #lamellar::tracing::trace_span!("convert"); - // let _guard = span1.enter(); - let op_data = unsafe{(&*(op_data_ptr as *const #lamellar::array::InputToValue<'_,#typeident>)).as_op_am_input()}; - let op = unsafe{*(op_ptr as *const ArrayOpCmd<#typeident>)}; - // drop(_guard); - // let span2 = #lamellar::tracing::trace_span!("lock"); - // let _guard = span2.enter(); - // let mut bufs = self.new_ops.lock(); - let mut bufs = self.ops.lock(); - - // let mut buf = self.ops.lock(); - - // drop(_guard); - // let span3 = #lamellar::tracing::trace_span!("update"); - // let _guard = span3.enter(); - let first = bufs.len() == 0; - let op_size = op.num_bytes(); - let data_size = op_data.num_bytes(); - if first { - // bufs.push((team.alloc_one_sided_mem_region(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE,op_size+data_size)),0)); - let mut v = Vec::with_capacity(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)); - unsafe {v.set_len(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size));} - bufs.push((v,0)); - } - else { - if bufs.last().unwrap().1 + op_size+data_size > #lamellar::array::OPS_BUFFER_SIZE{ - // bufs.push((team.alloc_one_sided_mem_region(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)),0)); - let mut v = Vec::with_capacity(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)); - unsafe {v.set_len(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size));} - bufs.push((v,0)); - } - } - // let mut buf: &mut (OneSidedMemoryRegion, usize) = bufs.last_mut().unwrap(); - let mut buf: &mut (Vec, usize) = bufs.last_mut().unwrap(); - - let _temp = self.cur_len.fetch_add(op_data.len(),Ordering::SeqCst); - - // let mut buf_slice = unsafe{buf.0.as_mut_slice().unwrap()}; - - let mut buf_slice = buf.0.as_mut_slice(); - - buf.1 += op.to_bytes(&mut buf_slice[(buf.1)..]); - buf.1 += op_data.to_bytes(&mut buf_slice[(buf.1)..]); - (first,self.complete.read().clone()) - } - // #[#lamellar::tracing::instrument(skip_all)] - fn add_fetch_ops(&self, pe: usize, op_ptr: *const u8, op_data_ptr: *const u8, req_ids: &Vec, res_map: OpResults, team: Pin>) -> (bool,Arc,Option){ - let op_data = unsafe{(&*(op_data_ptr as *const #lamellar::array::InputToValue<'_,#typeident>)).as_op_am_input()}; - let op = unsafe{*(op_ptr as *const ArrayOpCmd<#typeident>)}; - let mut res_offsets = vec![]; - let mut bufs = self.ops.lock(); - // let mut bufs = self.new_ops.lock(); - - for rid in req_ids{ - let res_size = op.result_size(); - res_offsets.push((*rid,self.results_offset.read().fetch_add(res_size,Ordering::SeqCst),res_size)); - } - // let first = buf.len() == 0; - // buf.push((op,op_data)); - let first = bufs.len() == 0; - let op_size = op.num_bytes(); - let data_size = op_data.num_bytes(); - if first { - // bufs.push((team.alloc_one_sided_mem_region(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)),0)); - let mut v = Vec::with_capacity(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)); - unsafe {v.set_len(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size));} - bufs.push((v,0)); - } - else { - if bufs.last().unwrap().1 + op_size+data_size > #lamellar::array::OPS_BUFFER_SIZE{ - // bufs.push((team.alloc_one_sided_mem_region(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)),0)); - let mut v = Vec::with_capacity(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size)); - unsafe {v.set_len(std::cmp::max(#lamellar::array::OPS_BUFFER_SIZE, op_size+data_size));} - bufs.push((v,0)); - } - } - // let mut buf: &mut (OneSidedMemoryRegion, usize) = bufs.last_mut().unwrap(); - let mut buf: &mut (Vec, usize) = bufs.last_mut().unwrap(); - let _temp = self.cur_len.fetch_add(op_data.len(),Ordering::SeqCst); - // let mut buf_slice = unsafe{buf.0.as_mut_slice().unwrap()}; - let mut buf_slice = buf.0.as_mut_slice(); - - buf.1 += op.to_bytes(&mut buf_slice[(buf.1)..]); - buf.1 += op_data.to_bytes(&mut buf_slice[(buf.1)..]); - res_map.insert(pe,self.results.read().clone()); - (first,self.complete.read().clone(),Some(res_offsets)) - } - // #[#lamellar::tracing::instrument(skip_all)] - fn into_arc_am(&self, pe: usize, sub_array: std::ops::Range)-> (Vec>,usize,Arc, PeOpResults){ - - let mut ams: Vec> = Vec::new(); - // let mut bufs = self.new_ops.lock(); - let mut bufs = self.ops.lock(); - - let mut ops = Vec::new(); - let len = self.cur_len.load(Ordering::SeqCst); - self.cur_len.store(0,Ordering::SeqCst); - std::mem::swap(&mut ops, &mut bufs); - let mut complete = Arc::new(AtomicBool::new(false)); - std::mem::swap(&mut complete, &mut self.complete.write()); - let mut results = Arc::new(Mutex::new(Vec::new())); - - std::mem::swap(&mut results, &mut self.results.write()); - let mut result_buf_size = self.results_offset.read().swap(0,Ordering::SeqCst); - let mut cur_size = 0; - let mut op_i = ops.len() as isize-1; - let data: #lamellar::array::#array_type<#typeident> = self.data.upgrade().expect("array invalid").into(); - for (mut lmr,size) in ops.drain(..){ - unsafe { lmr.set_len(size);} - let mut am = #am_buf_name{ - // wait: wait.clone(), - ops: lmr, - data: data.sub_array(sub_array.clone()), - // ops2: lmr.sub_region(0..size), - res_buf_size: result_buf_size, - orig_pe: data.my_pe(), - }; - ams.push(Arc::new(am)); - } - - (ams,len,complete,results) - } - } - - impl #am_buf_name{ - // async fn get_ops(&self, team: &std::sync::Arc<#lamellar::LamellarTeam>) -> #lamellar::OneSidedMemoryRegion{ - // unsafe{ - // let serialized_ops = if self.ops2.data_local(){ - // self.ops2.clone() - // } - // else{ - // let serialized_ops = team.alloc_one_sided_mem_region::(self.ops2.len()); - // let local_slice = serialized_ops.as_mut_slice().unwrap(); - // local_slice[self.ops2.len()- 1] = 255u8; - // // self.ops2.get_unchecked(0, serialized_ops.clone()); - // self.ops2.iget(0, serialized_ops.clone()); - - // while local_slice[self.ops2.len()- 1] == 255u8 { - // // async_std::task::yield_now().await; - // std::thread::yield_now(); - // } - // serialized_ops - // }; - // serialized_ops - // // self.ops2.iget(0,serialized_ops.clone()); - // // #lamellar::deserialize(serialized_ops.as_mut_slice().unwrap(),false).unwrap() - // } - // } - - fn get_op<'a>(&self, buf: &'a [u8]) -> (usize,(ArrayOpCmd<#typeident>,RemoteOpAmInputToValue<'a,#typeident>)){ - let mut bytes_read = 0; - let (cmd,size) = ArrayOpCmd::from_bytes(buf); - bytes_read += size; - let (data,size) = RemoteOpAmInputToValue::from_bytes(& buf[bytes_read..]); - bytes_read += size; - (bytes_read,(cmd,data)) - } - } - #[#am(AmGroup(false))] - impl LamellarAM for #am_buf_name{ //eventually we can return fetchs here too... - async fn exec(&self) -> Vec{ - // let timer=std::time::Instant::now(); - #slice - let u8_len = self.res_buf_size; - let mut results_u8: Vec = if u8_len > 0 { - let mut temp = Vec::with_capacity(u8_len); - unsafe{temp.set_len(u8_len)}; - temp - } - else{ - vec![] - }; - let mut results_offset=0; - // let mut results_slice = unsafe{std::slice::from_raw_parts_mut(results_u8.as_mut_ptr() as *mut #typeident,self.num_fetch_ops)}; - // let local_ops = self.get_ops(&lamellar::team).await; - // let local_ops_slice = local_ops.as_slice().unwrap(); - let local_ops_slice = &self.ops; - let mut cur_index = 0; - while cur_index < local_ops_slice.len(){ - let (bytes_read,(op,ops)) = self.get_op(&local_ops_slice[cur_index..]); - cur_index += bytes_read; - match ops{ - RemoteOpAmInputToValue::OneToOne(index,val) => { - // let index = *index; - // let val = *val; - #inner_op - }, - RemoteOpAmInputToValue::OneToMany(index,vals) => { - for val in vals{ //there maybe an optimization here where we grab the index lock outside of the loop - #inner_op - } - }, - RemoteOpAmInputToValue::ManyToOne(indices,val) => { - for index in indices{ - #inner_op - } - }, - RemoteOpAmInputToValue::ManyToMany(indices,vals) => { - for (index,val) in indices.iter().zip(vals.iter()){ - #inner_op - } - }, - } - } - // membarrier::heavy(); - // for (op, ops) in &local_ops { //(ArrayOpCmd,OpAmInputToValue) - // for (op, ops) in &self.ops { //(ArrayOpCmd,OpAmInputToValue) - // match ops{ - // OpAmInputToValue::OneToOne(index,val) => { - // #inner_op - // }, - // OpAmInputToValue::OneToMany(index,vals) => { - // for val in vals{ //there maybe an optimization here where we grab the index lock outside of the loop - // #inner_op - // } - // }, - // OpAmInputToValue::ManyToOne(indices,val) => { - // for index in indices{ - // #inner_op - // } - // }, - // OpAmInputToValue::ManyToMany(indices,vals) => { - // for (index,val) in indices.iter().zip(vals.iter()){ - // #inner_op - // } - // }, - // } - // } - unsafe { results_u8.set_len(results_offset)}; - results_u8 - } - } - #[allow(non_snake_case)] - fn #dist_am_buf_name(array: #lamellar::array::#byte_array_type) -> Arc{ - Arc::new(#buf_op_name{ - data: array, - ops: Mutex::new(Vec::new()), - // new_ops: Mutex::new(Vec::new()), - cur_len: AtomicUsize::new(0), - complete: RwLock::new(Arc::new(AtomicBool::new(false))), - results_offset: RwLock::new(Arc::new(AtomicUsize::new(0))), - results: RwLock::new(Arc::new(Mutex::new(Vec::new()))), - }) - } - inventory::submit! { - // #![crate = #lamellar] - #lamellar::array::#reg_name{ - id: std::any::TypeId::of::<#typeident>(), - op: #dist_am_buf_name, - } - } - }); - expanded -} fn gen_multi_val_multi_idx(op_type: proc_macro2::TokenStream, lock: &proc_macro2::TokenStream, op: proc_macro2::TokenStream) -> proc_macro2::TokenStream{ quote! { #op_type =>{ for elem in idx_vals{ - let index = elem.index; + let index = elem.index as usize; let val = elem.val; #lock #op @@ -878,11 +211,11 @@ fn gen_multi_val_multi_idx(op_type: proc_macro2::TokenStream, lock: &proc_macro2 } } -fn gen_single_idx_multi_val(op_type: proc_macro2::TokenStream, lock: &proc_macro2::TokenStream, op: proc_macro2::TokenStream) -> proc_macro2::TokenStream{ +fn gen_single_val_multi_idx(op_type: proc_macro2::TokenStream, lock: &proc_macro2::TokenStream, op: proc_macro2::TokenStream) -> proc_macro2::TokenStream{ quote! { #op_type =>{ - for index in self.indices.iter(){ - let index = *index; + for index in indices.iter(){ + let index = (*index) as usize; #lock #op } @@ -1201,30 +534,30 @@ fn create_buf_ops2( let multi_val_multi_idx_match_stmts = quote! {}; let single_val_multi_idx_match_stmts = quote! {}; let multi_val_single_idx_match_stmts = quote! {}; - let mut all_match_stmts: Vec<(proc_macro2::TokenStream, fn(proc_macro2::TokenStream, & proc_macro2::TokenStream, proc_macro2::TokenStream) -> proc_macro2::TokenStream)> = vec![(multi_val_multi_idx_match_stmts,gen_multi_val_multi_idx),(single_val_multi_idx_match_stmts,gen_single_idx_multi_val),(multi_val_single_idx_match_stmts,gen_multi_val_single_idx)]; + let mut all_match_stmts: Vec<(proc_macro2::TokenStream, fn(proc_macro2::TokenStream, & proc_macro2::TokenStream, proc_macro2::TokenStream) -> proc_macro2::TokenStream)> = vec![(multi_val_multi_idx_match_stmts,gen_multi_val_multi_idx),(single_val_multi_idx_match_stmts,gen_single_val_multi_idx),(multi_val_single_idx_match_stmts,gen_multi_val_single_idx)]; for (match_stmts, gen_fn) in all_match_stmts.iter_mut() { for optype in optypes { match optype { OpType::Arithmetic => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Add},&lock,quote!{ #lhs += val; })); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Sub},&lock,quote!{#lhs -= val; })); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Mul},&lock,quote!{#lhs *= val;})); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Div},&lock,quote!{#lhs /= val; })); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Rem},&lock,quote!{#lhs %= val; })); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Put},&lock,assign.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Add},&lock,quote!{ #lhs += val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Sub},&lock,quote!{#lhs -= val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Mul},&lock,quote!{#lhs *= val;})); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Div},&lock,quote!{#lhs /= val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Rem},&lock,quote!{#lhs %= val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Put},&lock,assign.clone())); } , OpType::Bitwise => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::And},&lock,quote!{#lhs &= val; })); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Or},&lock,quote!{#lhs |= val; })); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Xor},&lock,quote!{#lhs ^= val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::And},&lock,quote!{#lhs &= val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Or},&lock,quote!{#lhs |= val; })); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Xor},&lock,quote!{#lhs ^= val; })); }, OpType::Access => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Store},&lock,assign.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Store},&lock,assign.clone())); } OpType::Shift => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Shl},&lock,shl.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Shr},&lock,shr.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Shl},&lock,shl.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Shr},&lock,shr.clone())); }, _ => {} //for fetch, readonly, and compex ops do nothing } @@ -1243,33 +576,33 @@ fn create_buf_ops2( let multi_val_single_idx_fetch_match_stmts = quote! {}; let mut all_match_stmts: Vec<(proc_macro2::TokenStream, fn(proc_macro2::TokenStream, & proc_macro2::TokenStream, proc_macro2::TokenStream) -> proc_macro2::TokenStream)> = vec![(multi_val_multi_idx_fetch_match_stmts,gen_multi_val_multi_idx), - (single_val_multi_idx_fetch_match_stmts,gen_single_idx_multi_val), + (single_val_multi_idx_fetch_match_stmts,gen_single_val_multi_idx), (multi_val_single_idx_fetch_match_stmts,gen_multi_val_single_idx)]; for (match_stmts, gen_fn) in all_match_stmts.iter_mut() { for optype in optypes { match optype { OpType::Arithmetic => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchAdd},&lock,fetch_add.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchSub},&lock,fetch_sub.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchMul},&lock,fetch_mul.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchDiv},&lock,fetch_div.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchRem},&lock,fetch_rem.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Get},&lock,quote!{res.push(#load);})); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchAdd},&lock,fetch_add.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchSub},&lock,fetch_sub.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchMul},&lock,fetch_mul.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchDiv},&lock,fetch_div.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchRem},&lock,fetch_rem.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Get},&lock,quote!{res.push(#load);})); }, OpType::Bitwise => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchAnd},&lock,fetch_and.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchOr},&lock,fetch_or.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchXor},&lock,fetch_xor.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchAnd},&lock,fetch_and.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchOr},&lock,fetch_or.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchXor},&lock,fetch_xor.clone())); }, OpType::Access => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Swap},&lock,swap.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Swap},&lock,swap.clone())); }, OpType::ReadOnly => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::Load},&lock,quote!{res.push(#load);})); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::Load},&lock,quote!{res.push(#load);})); }, OpType::Shift => { - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchShl},&lock,fetch_shl.clone())); - match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::FetchShr},&lock,fetch_shr.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchShl},&lock,fetch_shl.clone())); + match_stmts.extend(gen_fn(quote! {ArrayOpCmd::FetchShr},&lock,fetch_shr.clone())); }, _ => {} //dont handle result ops (CompEx,CompExEs) here @@ -1289,13 +622,13 @@ fn create_buf_ops2( let multi_val_single_idx_result_match_stmts = quote! {}; let mut all_match_stmts: Vec<(proc_macro2::TokenStream, fn(proc_macro2::TokenStream, & proc_macro2::TokenStream, proc_macro2::TokenStream) -> proc_macro2::TokenStream)> = vec![(multi_val_multi_idx_result_match_stmts,gen_multi_val_multi_idx), - (single_val_multi_idx_result_match_stmts,gen_single_idx_multi_val), + (single_val_multi_idx_result_match_stmts,gen_single_val_multi_idx), (multi_val_single_idx_result_match_stmts,gen_multi_val_single_idx)]; for (match_stmts, gen_fn) in all_match_stmts.iter_mut() { for optype in optypes { match optype { - OpType::CompEx => match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::CompareExchange(old)},&lock,compare_exchange.clone())), - OpType::CompExEps => match_stmts.extend(gen_fn(quote! {ArrayOpCmd2::CompareExchangeEps(old,eps)},&lock,compare_exchange_eps.clone())), + OpType::CompEx => match_stmts.extend(gen_fn(quote! {ArrayOpCmd::CompareExchange(old)},&lock,compare_exchange.clone())), + OpType::CompExEps => match_stmts.extend(gen_fn(quote! {ArrayOpCmd::CompareExchangeEps(old,eps)},&lock,compare_exchange_eps.clone())), _ => {} //current only ops that return results are CompEx, CompExEps } } @@ -1352,25 +685,56 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #multi_val_multi_idx_am_buf_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, idx_vals: Vec, + index_size: u8, } #[#am(AmGroup(false))] impl LamellarAM for #multi_val_multi_idx_am_buf_name{ //eventually we can return fetchs here too... async fn exec(&self) { #slice - let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal<#typeident>, self.idx_vals.len()/std::mem::size_of::>())}; - match self.op { - #multi_val_multi_idx_match_stmts - } + + match self.index_size{ + 1 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_match_stmts + } + } + 2 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_match_stmts + } + } + 4 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_match_stmts + } + } + 8 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_match_stmts + } + } + _ => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_match_stmts + } + } + }; } } #[allow(non_snake_case)] - fn #dist_multi_val_multi_idx_am_buf_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, idx_vals: Vec) -> Arc{ + fn #dist_multi_val_multi_idx_am_buf_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, idx_vals: Vec, index_size: u8) -> Arc{ Arc::new(#multi_val_multi_idx_am_buf_name{ data: array.into(), op: op.into(), idx_vals: idx_vals, + index_size: index_size, }) } inventory::submit! { @@ -1383,23 +747,53 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #single_val_multi_idx_am_buf_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, val: #typeident, - indices: Vec, - + indices: Vec, + index_size: u8, } #[#am(AmGroup(false))] impl LamellarAM for #single_val_multi_idx_am_buf_name{ //eventually we can return fetchs here too... async fn exec(&self) { #slice let val = self.val; - match self.op { - #single_val_multi_idx_match_stmts + match self.index_size{ + 1 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u8, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_match_stmts + } + } + 2 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u16, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_match_stmts + } + } + 4 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u32, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_match_stmts + } + } + 8 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u64, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_match_stmts + } + } + _ => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const usize, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_match_stmts + } + } + } } } #[allow(non_snake_case)] - fn #dist_single_val_multi_idx_am_buf_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, val: Vec, indicies: Vec) -> Arc{ + fn #dist_single_val_multi_idx_am_buf_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, val: Vec, indicies: Vec, index_size: u8) -> Arc{ let val_slice = unsafe {std::slice::from_raw_parts(val.as_ptr() as *const #typeident, std::mem::size_of::<#typeident>())}; let val = val_slice[0]; Arc::new(#single_val_multi_idx_am_buf_name{ @@ -1407,6 +801,7 @@ fn create_buf_ops2( op: op.into(), val: val, indices: indicies, + index_size: index_size, }) } inventory::submit! { @@ -1419,7 +814,7 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #multi_val_single_idx_am_buf_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, vals: Vec, index: usize, @@ -1436,7 +831,7 @@ fn create_buf_ops2( } } #[allow(non_snake_case)] - fn #dist_multi_val_single_idx_am_buf_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, vals: Vec, index: usize) -> Arc{ + fn #dist_multi_val_single_idx_am_buf_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, vals: Vec, index: usize) -> Arc{ Arc::new(#multi_val_single_idx_am_buf_name{ data: array.into(), op: op.into(), @@ -1458,27 +853,57 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #multi_val_multi_idx_am_buf_result_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, idx_vals: Vec, + index_size: u8, } #[#am(AmGroup(false))] impl LamellarAM for #multi_val_multi_idx_am_buf_result_name{ //eventually we can return fetchs here too... async fn exec(&self) -> Vec> { #slice - let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal<#typeident>, self.idx_vals.len()/std::mem::size_of::>())}; let mut res = Vec::new(); - match self.op { - #multi_val_multi_idx_result_match_stmts - } + match self.index_size{ + 1 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_result_match_stmts + } + } + 2 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_result_match_stmts + } + } + 4 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_result_match_stmts + } + } + 8 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_result_match_stmts + } + } + _ => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_result_match_stmts + } + } + }; res } } #[allow(non_snake_case)] - fn #dist_multi_val_multi_idx_am_buf_result_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, idx_vals: Vec) -> Arc{ + fn #dist_multi_val_multi_idx_am_buf_result_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, idx_vals: Vec, index_size: u8) -> Arc{ Arc::new(#multi_val_multi_idx_am_buf_result_name{ data: array.into(), op: op.into(), idx_vals: idx_vals, + index_size: index_size, }) } inventory::submit! { @@ -1491,10 +916,10 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #single_val_multi_idx_am_buf_result_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, val: #typeident, - indices: Vec, - + indices: Vec, + index_size: u8, } #[#am(AmGroup(false))] impl LamellarAM for #single_val_multi_idx_am_buf_result_name{ //eventually we can return fetchs here too... @@ -1502,15 +927,45 @@ fn create_buf_ops2( #slice let val = self.val; let mut res = Vec::new(); - match self.op { - #single_val_multi_idx_result_match_stmts + match self.index_size{ + 1 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u8, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_result_match_stmts + } + } + 2 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u16, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_result_match_stmts + } + } + 4 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u32, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_result_match_stmts + } + } + 8 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u64, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_result_match_stmts + } + } + _ => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const usize, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_result_match_stmts + } + } + } // println!("res: {:?}",res); res } } #[allow(non_snake_case)] - fn #dist_single_val_multi_idx_am_buf_result_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, val: Vec, indicies: Vec) -> Arc{ + fn #dist_single_val_multi_idx_am_buf_result_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, val: Vec, indicies: Vec, index_size: u8) -> Arc{ let val_slice = unsafe {std::slice::from_raw_parts(val.as_ptr() as *const #typeident, std::mem::size_of::<#typeident>())}; let val = val_slice[0]; Arc::new(#single_val_multi_idx_am_buf_result_name{ @@ -1518,6 +973,7 @@ fn create_buf_ops2( op: op.into(), val: val, indices: indicies, + index_size: index_size, }) } inventory::submit! { @@ -1530,7 +986,7 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #multi_val_single_idx_am_buf_result_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, vals: Vec, index: usize, @@ -1550,7 +1006,7 @@ fn create_buf_ops2( } } #[allow(non_snake_case)] - fn #dist_multi_val_single_idx_am_buf_result_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, vals: Vec, index: usize) -> Arc{ + fn #dist_multi_val_single_idx_am_buf_result_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, vals: Vec, index: usize) -> Arc{ Arc::new(#multi_val_single_idx_am_buf_result_name{ data: array.into(), op: op.into(), @@ -1573,28 +1029,59 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #multi_val_multi_idx_am_buf_fetch_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, idx_vals: Vec, + index_size: u8, } #[#am(AmGroup(false))] impl LamellarAM for #multi_val_multi_idx_am_buf_fetch_name{ //eventually we can return fetchs here too... async fn exec(&self) -> Vec<#typeident> { #slice - let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal<#typeident>, self.idx_vals.len()/std::mem::size_of::>())}; let mut res = Vec::new(); - match self.op { - #multi_val_multi_idx_fetch_match_stmts - } + match self.index_size{ + 1 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_fetch_match_stmts + } + } + 2 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_fetch_match_stmts + } + } + 4 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_fetch_match_stmts + } + } + 8 => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_fetch_match_stmts + } + } + _ => { + let idx_vals = unsafe {std::slice::from_raw_parts(self.idx_vals.as_ptr() as *const IdxVal, self.idx_vals.len()/std::mem::size_of::>())}; + match self.op { + #multi_val_multi_idx_fetch_match_stmts + } + } + }; + // println!("res: {:?}",res); res } } #[allow(non_snake_case)] - fn #dist_multi_val_multi_idx_am_buf_fetch_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, idx_vals: Vec) -> Arc{ + fn #dist_multi_val_multi_idx_am_buf_fetch_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, idx_vals: Vec,index_usize: u8) -> Arc{ Arc::new(#multi_val_multi_idx_am_buf_fetch_name{ data: array.into(), op: op.into(), idx_vals: idx_vals, + index_size: index_usize, }) } inventory::submit! { @@ -1607,10 +1094,10 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #single_val_multi_idx_am_buf_fetch_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, val: #typeident, - indices: Vec, - + indices: Vec, + index_size: u8, } #[#am(AmGroup(false))] impl LamellarAM for #single_val_multi_idx_am_buf_fetch_name{ //eventually we can return fetchs here too... @@ -1618,15 +1105,46 @@ fn create_buf_ops2( #slice let val = self.val; let mut res = Vec::new(); - match self.op { - #single_val_multi_idx_fetch_match_stmts + match self.index_size{ + 1 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u8, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_fetch_match_stmts + } + } + 2 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u16, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_fetch_match_stmts + } + } + 4 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u32, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_fetch_match_stmts + } + } + 8 => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const u64, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_fetch_match_stmts + } + } + _ => { + let indices = unsafe {std::slice::from_raw_parts(self.indices.as_ptr() as *const usize, self.indices.len()/std::mem::size_of::())}; + match self.op { + #single_val_multi_idx_fetch_match_stmts + } + } + } + // println!("res: {:?}",res); res } } #[allow(non_snake_case)] - fn #dist_single_val_multi_idx_am_buf_fetch_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, val: Vec, indicies: Vec) -> Arc{ + fn #dist_single_val_multi_idx_am_buf_fetch_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, val: Vec, indicies: Vec,index_size: u8) -> Arc{ let val_slice = unsafe {std::slice::from_raw_parts(val.as_ptr() as *const #typeident, std::mem::size_of::<#typeident>())}; let val = val_slice[0]; Arc::new(#single_val_multi_idx_am_buf_fetch_name{ @@ -1634,6 +1152,7 @@ fn create_buf_ops2( op: op.into(), val: val, indices: indicies, + index_size: index_size, }) } inventory::submit! { @@ -1646,7 +1165,7 @@ fn create_buf_ops2( #[#am_data(Debug,AmGroup(false))] struct #multi_val_single_idx_am_buf_fetch_name{ data: #lamellar::array::#array_type<#typeident>, - op: #lamellar::array::ArrayOpCmd2<#typeident>, + op: #lamellar::array::ArrayOpCmd<#typeident>, vals: Vec, index: usize, @@ -1666,7 +1185,7 @@ fn create_buf_ops2( } } #[allow(non_snake_case)] - fn #dist_multi_val_single_idx_am_buf_fetch_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd2>, vals: Vec, index: usize) -> Arc{ + fn #dist_multi_val_single_idx_am_buf_fetch_name(array: #lamellar::array::LamellarByteArray, op: #lamellar::array::ArrayOpCmd>, vals: Vec, index: usize) -> Arc{ Arc::new(#multi_val_single_idx_am_buf_fetch_name{ data: array.into(), op: op.into(), @@ -1737,14 +1256,7 @@ fn create_buffered_ops( let ro_optypes = vec![OpType::ReadOnly]; //, vec![OpType::Arithmetic, OpType::Access]; - let buf_op_impl = create_buf_ops( - typeident.clone(), - quote::format_ident!("ReadOnlyArray"), - quote::format_ident!("ReadOnlyByteArrayWeak"), - &ro_optypes, - rt, - ); - expanded.extend(buf_op_impl); + let buf_op_impl = create_buf_ops2( typeident.clone(), quote::format_ident!("ReadOnlyArray"), @@ -1754,15 +1266,6 @@ fn create_buffered_ops( ); expanded.extend(buf_op_impl); - let buf_op_impl = create_buf_ops( - typeident.clone(), - quote::format_ident!("UnsafeArray"), - quote::format_ident!("UnsafeByteArrayWeak"), - &optypes, - rt, - ); - expanded.extend(buf_op_impl); - let buf_op_impl = create_buf_ops2( typeident.clone(), quote::format_ident!("UnsafeArray"), @@ -1772,15 +1275,7 @@ fn create_buffered_ops( ); expanded.extend(buf_op_impl); - for (array_type, byte_array_type_weak, byte_array_type) in atomic_array_types { - let buf_op_impl = create_buf_ops( - typeident.clone(), - array_type.clone(), - byte_array_type_weak.clone(), - &optypes, - rt, - ); - expanded.extend(buf_op_impl); + for (array_type, _byte_array_type_weak, byte_array_type) in atomic_array_types { let buf_op_impl = create_buf_ops2( typeident.clone(), array_type.clone(), @@ -1793,11 +1288,6 @@ fn create_buffered_ops( expanded - // if lamellar == "crate" { - // expanded - // } else { - // user_expanded - // } } pub(crate) fn __generate_ops_for_type_rt(item: TokenStream) -> TokenStream { @@ -1995,9 +1485,7 @@ pub(crate) fn __derive_arrayops(input: TokenStream) -> TokenStream { use __lamellar::darc::prelude::*; use __lamellar::array::{ ArrayOpCmd, - ArrayOpCmd2, OpResultOffsets, - RemoteOpAmInputToValue, PeOpResults, OpResults, IdxVal, diff --git a/src/array.rs b/src/array.rs index 35919de6..e7c08d58 100644 --- a/src/array.rs +++ b/src/array.rs @@ -76,11 +76,11 @@ use crate::{active_messaging::*, LamellarTeamRT}; use async_trait::async_trait; use enum_dispatch::enum_dispatch; use futures_lite::Future; -use parking_lot::{Mutex, RwLock}; +use parking_lot::Mutex; use std::collections::HashMap; use std::marker::PhantomData; use std::pin::Pin; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; // use serde::de::DeserializeOwned; @@ -111,10 +111,17 @@ pub mod prelude; pub(crate) mod r#unsafe; pub use r#unsafe::{ - operations::{UnsafeArrayOpBuf,MultiValMultiIdxOps,MultiValSingleIdxOps,SingleValMultiIdxOps,BatchReturnType}, UnsafeArray, UnsafeByteArray, UnsafeByteArrayWeak, + operations::{ + BatchReturnType, MultiValMultiIdxOps, MultiValSingleIdxOps, SingleValMultiIdxOps, + }, + UnsafeArray, UnsafeByteArray, UnsafeByteArrayWeak, }; pub(crate) mod read_only; -pub use read_only::{ReadOnlyArray, ReadOnlyArrayOpBuf, /*ReadOnlyArrayMultiMultiOps, ReadOnlyArrayMultiSingleOps,*/ ReadOnlyByteArray, ReadOnlyByteArrayWeak}; +pub use read_only::{ + ReadOnlyArray, ReadOnlyArrayOpBuf, + /*ReadOnlyArrayMultiMultiOps, ReadOnlyArrayMultiSingleOps,*/ ReadOnlyByteArray, + ReadOnlyByteArrayWeak, +}; // pub(crate) mod local_only; // pub use local_only::LocalOnlyArray; @@ -130,26 +137,24 @@ pub use atomic::{ pub(crate) mod generic_atomic; pub use generic_atomic::{ - operations::{GenericAtomicArrayOpBuf, /*GenericAtomicArrayMultiMultiOps, GenericAtomicArrayMultiSingleOps*/}, GenericAtomicArray, GenericAtomicByteArray, - GenericAtomicByteArrayWeak, GenericAtomicLocalData, + GenericAtomicArray, GenericAtomicByteArray, GenericAtomicByteArrayWeak, GenericAtomicLocalData, }; pub(crate) mod native_atomic; pub use native_atomic::{ - operations::{NativeAtomicArrayOpBuf,/*NativeAtomicArrayMultiMultiOps, NativeAtomicArrayMultiSingleOps*/}, NativeAtomicArray, NativeAtomicByteArray, - NativeAtomicByteArrayWeak, NativeAtomicLocalData, + NativeAtomicArray, NativeAtomicByteArray, NativeAtomicByteArrayWeak, NativeAtomicLocalData, }; pub(crate) mod local_lock_atomic; pub use local_lock_atomic::{ - operations::{LocalLockArrayOpBuf,/*LocalLockArrayMultiMultiOps, LocalLockArrayMultiSingleOps*/}, LocalLockArray, LocalLockByteArray, LocalLockByteArrayWeak, - LocalLockLocalData, LocalLockMutLocalData, + LocalLockArray, LocalLockByteArray, LocalLockByteArrayWeak, LocalLockLocalData, + LocalLockMutLocalData, }; pub(crate) mod global_lock_atomic; pub use global_lock_atomic::{ - operations::{GlobalLockArrayOpBuf,/*GlobalLockArrayMultiMultiOps, GlobalLockArrayMultiSingleOps*/}, GlobalLockArray, GlobalLockByteArray, - GlobalLockByteArrayWeak, GlobalLockLocalData, GlobalLockMutLocalData, + GlobalLockArray, GlobalLockByteArray, GlobalLockByteArrayWeak, GlobalLockLocalData, + GlobalLockMutLocalData, }; pub mod iterator; @@ -190,8 +195,6 @@ crate::inventory::collect!(ReduceKey); // lamellar_impl::generate_ops_for_type_rt!(true, true, u8,usize); // impl Dist for bool {} - - lamellar_impl::generate_reductions_for_type_rt!(true, u8, u16, u32, u64, usize); lamellar_impl::generate_reductions_for_type_rt!(false, u128); lamellar_impl::generate_ops_for_type_rt!(true, true, true, u8, u16, u32, u64, usize); @@ -402,8 +405,8 @@ impl TeamFrom<&LamellarArrayRdmaOutput> for LamellarArrayRdmaOutput< } } -impl TeamFrom<(&Vec,Distribution)> for Vec { - fn team_from(vals: (&Vec,Distribution), _team: &Pin>) -> Self { +impl TeamFrom<(&Vec, Distribution)> for Vec { + fn team_from(vals: (&Vec, Distribution), _team: &Pin>) -> Self { vals.0.to_vec() } } @@ -434,14 +437,18 @@ pub enum LamellarByteArray { GlobalLockArray(GlobalLockByteArray), } -impl LamellarByteArray{ +impl LamellarByteArray { pub fn type_id(&self) -> std::any::TypeId { - match self{ + match self { LamellarByteArray::UnsafeArray(_) => std::any::TypeId::of::(), LamellarByteArray::ReadOnlyArray(_) => std::any::TypeId::of::(), LamellarByteArray::AtomicArray(_) => std::any::TypeId::of::(), - LamellarByteArray::NativeAtomicArray(_) => std::any::TypeId::of::(), - LamellarByteArray::GenericAtomicArray(_) => std::any::TypeId::of::(), + LamellarByteArray::NativeAtomicArray(_) => { + std::any::TypeId::of::() + } + LamellarByteArray::GenericAtomicArray(_) => { + std::any::TypeId::of::() + } LamellarByteArray::LocalLockArray(_) => std::any::TypeId::of::(), LamellarByteArray::GlobalLockArray(_) => std::any::TypeId::of::(), } @@ -615,8 +622,8 @@ impl LamellarArrayCompa pub(crate) mod private { use crate::active_messaging::*; use crate::array::{ - AtomicArray, GlobalLockArray, - /*NativeAtomicArray, GenericAtomicArray,*/ LamellarReadArray, LamellarWriteArray, LamellarByteArray, + AtomicArray, GlobalLockArray, LamellarByteArray, + /*NativeAtomicArray, GenericAtomicArray,*/ LamellarReadArray, LamellarWriteArray, LocalLockArray, ReadOnlyArray, UnsafeArray, }; use crate::lamellar_request::{LamellarMultiRequest, LamellarRequest}; diff --git a/src/array/atomic/operations.rs b/src/array/atomic/operations.rs index fd66981a..dde8129a 100644 --- a/src/array/atomic/operations.rs +++ b/src/array/atomic/operations.rs @@ -1,46 +1,6 @@ use crate::array::atomic::*; use crate::array::*; -use std::any::TypeId; -use std::collections::HashMap; - -type BufFn = fn(AtomicByteArrayWeak) -> Arc; -// type OpFn = fn(UnsafeByteArray,ArrayOpCmd2,Vec) -> LamellarArcAm; - - -lazy_static! { - pub(crate) static ref BUFOPS: HashMap = { - let mut map = HashMap::new(); - for op in crate::inventory::iter:: { - map.insert(op.id.clone(), op.op); - } - map - }; - - // pub(crate) static ref NEWBUFOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; -} - -#[doc(hidden)] -pub struct AtomicArrayOpBuf { - pub id: TypeId, - pub op: BufFn, -} -// #[doc(hidden)] -// pub struct AtomicArrayOpBufNew { -// pub id: TypeId, -// pub op: OpFn, -// } - -crate::inventory::collect!(AtomicArrayOpBuf); - -// crate::inventory::collect!(AtomicArrayOpBufNew); - impl ReadOnlyOps for AtomicArray {} impl AccessOps for AtomicArray {} diff --git a/src/array/generic_atomic.rs b/src/array/generic_atomic.rs index 34443eb1..2da75449 100644 --- a/src/array/generic_atomic.rs +++ b/src/array/generic_atomic.rs @@ -2,7 +2,6 @@ pub(crate) mod iteration; pub(crate) mod operations; mod rdma; use crate::array::atomic::AtomicElement; -use crate::array::generic_atomic::operations::BUFOPS; use crate::array::private::LamellarArrayPrivate; use crate::array::r#unsafe::{UnsafeByteArray, UnsafeByteArrayWeak}; use crate::array::*; @@ -12,7 +11,6 @@ use crate::lamellar_team::{IntoLamellarTeam, LamellarTeamRT}; use crate::memregion::Dist; use parking_lot::{Mutex, MutexGuard}; use serde::ser::SerializeSeq; -use std::any::TypeId; // use std::ops::{Deref, DerefMut}; use std::ops::{ @@ -404,7 +402,7 @@ impl Iterator for GenericAtomicLocalDataIter { } } -impl GenericAtomicArray { +impl GenericAtomicArray { pub(crate) fn new>( team: U, array_size: usize, @@ -419,19 +417,6 @@ impl GenericAtomicArray { } let locks = Darc::new(team, vec).unwrap(); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let mut op_bufs = array.inner.data.op_buffers.write(); - let bytearray = GenericAtomicByteArray { - locks: locks.clone(), - array: array.clone().into(), - }; - - for _pe in 0..array.num_pes() { - op_bufs.push(func(GenericAtomicByteArray::downgrade(&bytearray))); - } - // println!("{}", op_bufs.len()); - } - GenericAtomicArray { locks: locks, array: array, @@ -588,8 +573,8 @@ impl GenericAtomicArray { } } -impl TeamFrom<(Vec,Distribution)> for GenericAtomicArray { - fn team_from(input: (Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(Vec, Distribution)> for GenericAtomicArray { + fn team_from(input: (Vec, Distribution), team: &Pin>) -> Self { let (vals, distribution) = input; let input = (&vals, distribution); let array: UnsafeArray = input.team_into(team); @@ -606,16 +591,7 @@ impl From> for GenericAtomicArray { vec.push(Mutex::new(())); } let locks = Darc::new(array.team(), vec).unwrap(); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let bytearray = GenericAtomicByteArray { - locks: locks.clone(), - array: array.clone().into(), - }; - let mut op_bufs = array.inner.data.op_buffers.write(); - for _pe in 0..array.inner.data.num_pes { - op_bufs.push(func(GenericAtomicByteArray::downgrade(&bytearray))) - } - } + GenericAtomicArray { locks: locks, array: array, @@ -642,11 +618,10 @@ impl From> for LamellarByteArray { } impl From for GenericAtomicArray { - fn from(array:LamellarByteArray) -> Self { + fn from(array: LamellarByteArray) -> Self { if let LamellarByteArray::GenericAtomicArray(array) = array { array.into() - } - else { + } else { panic!("Expected LamellarByteArray::GenericAtomicArray") } } @@ -808,13 +783,11 @@ impl LamellarArrayCompa } } - #[doc(hidden)] pub struct LocalGenericAtomicElement { pub(crate) val: Mutex, } - impl From> for AtomicElement { fn from(element: LocalGenericAtomicElement) -> AtomicElement { AtomicElement::LocalGenericAtomicElement(element) @@ -836,32 +809,29 @@ impl LocalGenericAtomicElement { } impl LocalGenericAtomicElement { pub fn fetch_add(&self, val: T) -> T { - let old = *self.val.lock(); - *self.val.lock() += val; - old + let old = *self.val.lock(); + *self.val.lock() += val; + old } pub fn fetch_sub(&self, val: T) -> T { - let old = *self.val.lock(); - *self.val.lock() -= val; - old + let old = *self.val.lock(); + *self.val.lock() -= val; + old } pub fn fetch_mul(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() *= val; - old + let old = *self.val.lock(); + *self.val.lock() *= val; + old } pub fn fetch_div(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() /= val; - old + let old = *self.val.lock(); + *self.val.lock() /= val; + old } pub fn fetch_rem(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() %= val; - old + let old = *self.val.lock(); + *self.val.lock() %= val; + old } } @@ -869,13 +839,12 @@ impl LocalGenericAtomicElement { pub fn compare_exchange(&self, current: T, new: T) -> Result { let current_val = *self.val.lock(); if current_val == current { - unsafe { - *self.val.lock() = new; - } + *self.val.lock() = new; + Ok(current_val) } else { Err(current_val) - } + } } } impl> @@ -889,9 +858,8 @@ impl LocalGenericAtomicElement { pub fn fetch_and(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() &= val; - old + let old = *self.val.lock(); + *self.val.lock() &= val; + old } pub fn fetch_or(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() |= val; - old + let old = *self.val.lock(); + *self.val.lock() |= val; + old } pub fn fetch_xor(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() ^= val; - old + let old = *self.val.lock(); + *self.val.lock() ^= val; + old } } impl LocalGenericAtomicElement { pub fn fetch_shl(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() <<= val; - old + let old = *self.val.lock(); + *self.val.lock() <<= val; + old } pub fn fetch_shr(&self, val: T) -> T { - - let old = *self.val.lock(); - *self.val.lock() >>= val; - old + let old = *self.val.lock(); + *self.val.lock() >>= val; + old } } impl AddAssign for LocalGenericAtomicElement { fn add_assign(&mut self, val: T) { // self.add(val) - *self.val.lock() += val + *self.val.lock() += val } } impl SubAssign for LocalGenericAtomicElement { fn sub_assign(&mut self, val: T) { - *self.val.lock() -= val + *self.val.lock() -= val } } impl MulAssign for LocalGenericAtomicElement { fn mul_assign(&mut self, val: T) { - *self.val.lock() *= val + *self.val.lock() *= val } } impl DivAssign for LocalGenericAtomicElement { fn div_assign(&mut self, val: T) { - *self.val.lock() /= val + *self.val.lock() /= val } } impl RemAssign for LocalGenericAtomicElement { fn rem_assign(&mut self, val: T) { - *self.val.lock() %= val + *self.val.lock() %= val } } impl BitAndAssign for LocalGenericAtomicElement { fn bitand_assign(&mut self, val: T) { - *self.val.lock() &= val + *self.val.lock() &= val } } impl BitOrAssign for LocalGenericAtomicElement { fn bitor_assign(&mut self, val: T) { - *self.val.lock() |= val + *self.val.lock() |= val } } impl BitXorAssign for LocalGenericAtomicElement { fn bitxor_assign(&mut self, val: T) { - *self.val.lock() ^= val + *self.val.lock() ^= val } } impl ShlAssign for LocalGenericAtomicElement { fn shl_assign(&mut self, val: T) { - self.val.lock().shl_assign(val) + self.val.lock().shl_assign(val) } } impl ShrAssign for LocalGenericAtomicElement { fn shr_assign(&mut self, val: T) { - self.val.lock().shr_assign(val) + self.val.lock().shr_assign(val) } } @@ -1001,4 +964,4 @@ impl std::fmt::Debug for LocalGenericAtomicElement let current_val = *self.val.lock(); write!(f, "{current_val:?}") } -} \ No newline at end of file +} diff --git a/src/array/generic_atomic/iteration.rs b/src/array/generic_atomic/iteration.rs index 180b5b2b..f4ed088b 100644 --- a/src/array/generic_atomic/iteration.rs +++ b/src/array/generic_atomic/iteration.rs @@ -1,6 +1,8 @@ use crate::array::generic_atomic::*; -use crate::array::iterator::distributed_iterator::{DistIter,DistIteratorLauncher,IndexedDistributedIterator,DistributedIterator}; -use crate::array::iterator::local_iterator::{LocalIter,LocalIteratorLauncher,IndexedLocalIterator,LocalIterator}; +use crate::array::iterator::distributed_iterator::{ + DistIteratorLauncher, DistributedIterator, IndexedDistributedIterator, +}; +use crate::array::iterator::local_iterator::{LocalIterator, LocalIteratorLauncher}; use crate::array::iterator::one_sided_iterator::OneSidedIter; use crate::array::iterator::{LamellarArrayIterators, LamellarArrayMutIterators, Schedule}; use crate::array::*; @@ -252,7 +254,7 @@ impl DistIteratorLauncher for GenericAtomicArray { F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - DistIteratorLauncher::for_each_async(&self.array,iter, op) + DistIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -272,16 +274,21 @@ impl DistIteratorLauncher for GenericAtomicArray { where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self,sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } @@ -292,9 +299,9 @@ impl DistIteratorLauncher for GenericAtomicArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_async(&self.array, iter, d) } @@ -307,11 +314,11 @@ impl DistIteratorLauncher for GenericAtomicArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - DistIteratorLauncher::collect_async_with_schedule(&self.array, sched,iter, d) + DistIteratorLauncher::collect_async_with_schedule(&self.array, sched, iter, d) } fn team(&self) -> Pin> { self.array.team_rt().clone() @@ -333,7 +340,7 @@ impl LocalIteratorLauncher for GenericAtomicArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each(&self.array,iter, op) + LocalIteratorLauncher::for_each(&self.array, iter, op) } fn for_each_with_schedule( &self, @@ -345,19 +352,15 @@ impl LocalIteratorLauncher for GenericAtomicArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each_with_schedule(&self.array,sched, iter, op) + LocalIteratorLauncher::for_each_with_schedule(&self.array, sched, iter, op) } - fn for_each_async( - &self, - iter: &I, - op: F, - ) -> Pin + Send>> + fn for_each_async(&self, iter: &I, op: F) -> Pin + Send>> where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - LocalIteratorLauncher::for_each_async(&self.array,iter, op) + LocalIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -382,7 +385,12 @@ impl LocalIteratorLauncher for GenericAtomicArray { LocalIteratorLauncher::reduce(&self.array, iter, op) } - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, @@ -415,21 +423,26 @@ impl LocalIteratorLauncher for GenericAtomicArray { where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } -// fn collect_async( + // fn collect_async( // &self, // iter: &I, // d: Distribution, @@ -460,14 +473,18 @@ impl LocalIteratorLauncher for GenericAtomicArray { fn count(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count(&self.array, iter) } - - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count_with_schedule(&self.array, sched, iter) } @@ -479,8 +496,12 @@ impl LocalIteratorLauncher for GenericAtomicArray { { LocalIteratorLauncher::sum(&self.array, iter) } - - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, diff --git a/src/array/generic_atomic/operations.rs b/src/array/generic_atomic/operations.rs index 7af58f3f..a69520f7 100644 --- a/src/array/generic_atomic/operations.rs +++ b/src/array/generic_atomic/operations.rs @@ -1,60 +1,5 @@ use crate::array::generic_atomic::*; use crate::array::*; -use std::any::TypeId; -use std::collections::HashMap; - -type BufFn = fn(GenericAtomicByteArrayWeak) -> Arc; -// type MultiMultiFn = fn(GenericAtomicByteArray,ArrayOpCmd2,Vec) -> LamellarArcAm; -// type MultiSingleFn = fn(GenericAtomicByteArray,ArrayOpCmd2,Vec,Vec) -> LamellarArcAm; - -lazy_static! { - pub(crate) static ref BUFOPS: HashMap = { - let mut map = HashMap::new(); - for op in crate::inventory::iter:: { - map.insert(op.id.clone(), op.op); - } - map - }; - - // pub(crate) static ref MULTIMULTIOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; - - // pub(crate) static ref MULTISINGLEOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; -} - -#[doc(hidden)] -pub struct GenericAtomicArrayOpBuf { - pub id: TypeId, - pub op: BufFn, -} - -// #[doc(hidden)] -// pub struct GenericAtomicArrayMultiMultiOps { -// pub id: TypeId, -// pub op: MultiMultiFn, -// } - -// #[doc(hidden)] -// pub struct GenericAtomicArrayMultiSingleOps { -// pub id: TypeId, -// pub op: MultiSingleFn, -// } - -crate::inventory::collect!(GenericAtomicArrayOpBuf); -// crate::inventory::collect!(GenericAtomicArrayMultiMultiOps); -// crate::inventory::collect!(GenericAtomicArrayMultiSingleOps); - impl ReadOnlyOps for GenericAtomicArray {} @@ -72,123 +17,3 @@ impl CompareExchangeEpsilonOps for GenericAtomicArray { } - -// // impl GenericAtomicArray { -// impl LocalArithmeticOps for GenericAtomicArray { -// fn local_fetch_add(&self, index: usize, val: T) -> T { -// // println!("local_add LocalArithmeticOps for GenericAtomicArray "); -// // let _lock = self.lock.write(); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; //this locks the -// slice[index] += val; -// orig -// } -// fn local_fetch_sub(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for GenericAtomicArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] -= val; -// orig -// } -// fn local_fetch_mul(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for GenericAtomicArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] *= val; -// orig -// } -// fn local_fetch_div(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for GenericAtomicArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] /= val; -// // println!("div i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// } -// impl LocalBitWiseOps for GenericAtomicArray { -// fn local_fetch_bit_and(&self, index: usize, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// // println!("local_sub LocalArithmeticOps for GenericAtomicArray "); -// let orig = slice[index]; -// slice[index] &= val; -// // println!("and i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// fn local_fetch_bit_or(&self, index: usize, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// // println!("local_sub LocalArithmeticOps for GenericAtomicArray "); -// let orig = slice[index]; -// slice[index] |= val; -// orig -// } -// } -// impl LocalAtomicOps for GenericAtomicArray { -// fn local_load(&self, index: usize, _val: T) -> T { -// self.local_as_mut_slice()[index] -// } - -// fn local_store(&self, index: usize, val: T) { -// self.local_as_mut_slice()[index] = val; //this locks the array -// } - -// fn local_swap(&self, index: usize, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] = val; -// orig -// } -// } -// // } - -// #[macro_export] -// macro_rules! GenericAtomicArray_create_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Add,[<$name dist_add>],[<$name local_add>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::FetchAdd,[<$name dist_fetch_add>],[<$name local_add>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Sub,[<$name dist_sub>],[<$name local_sub>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::FetchSub,[<$name dist_fetch_sub>],[<$name local_sub>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Mul,[<$name dist_mul>],[<$name local_mul>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::FetchMul,[<$name dist_fetch_mul>],[<$name local_mul>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Div,[<$name dist_div>],[<$name local_div>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::FetchDiv,[<$name dist_fetch_div>],[<$name local_div>]} - -// } -// } -// } - -// #[macro_export] -// macro_rules! GenericAtomicArray_create_bitwise_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::And,[<$name dist_bit_and>],[<$name local_bit_and>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::FetchAnd,[<$name dist_fetch_bit_and>],[<$name local_bit_and>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Or,[<$name dist_bit_or>],[<$name local_bit_or>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::FetchOr,[<$name dist_fetch_bit_or>],[<$name local_bit_or>]} -// } -// } -// } - -// #[macro_export] -// macro_rules! GenericAtomicArray_create_atomic_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Store,[<$name dist_store>],[<$name local_store>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Load,[<$name dist_load>],[<$name local_load>]} -// $crate::GenericAtomicArray_register!{$a,ArrayOpCmd::Swap,[<$name dist_swap>],[<$name local_swap>]} -// } -// } -// } -// #[macro_export] -// macro_rules! GenericAtomicArray_register { -// ($id:ident, $optype:path, $op:ident, $local:ident) => { -// inventory::submit! { -// #![crate =$crate] -// $crate::array::GenericAtomicArrayOp{ -// id: ($optype,std::any::TypeId::of::<$id>()), -// op: $op, -// } -// } -// }; -// } diff --git a/src/array/global_lock_atomic.rs b/src/array/global_lock_atomic.rs index 2a761e61..104cb42b 100644 --- a/src/array/global_lock_atomic.rs +++ b/src/array/global_lock_atomic.rs @@ -1,7 +1,6 @@ mod iteration; pub(crate) mod operations; mod rdma; -use crate::array::global_lock_atomic::operations::BUFOPS; use crate::array::private::LamellarArrayPrivate; use crate::array::r#unsafe::{UnsafeByteArray, UnsafeByteArrayWeak}; use crate::array::*; @@ -11,11 +10,6 @@ use crate::darc::global_rw_darc::{ use crate::darc::DarcMode; use crate::lamellar_team::{IntoLamellarTeam, LamellarTeamRT}; use crate::memregion::Dist; -// use parking_lot::{ -// lock_api::{ArcRwLockReadGuard, ArcRwLockWriteGuard}, -// RawRwLock, -// }; -use std::any::TypeId; use std::ops::{Deref, DerefMut}; /// A safe abstraction of a distributed array, providing read/write access protected by locks. @@ -218,7 +212,7 @@ impl Deref for GlobalLockLocalData<'_, T> { } } -impl GlobalLockArray { +impl GlobalLockArray { #[doc(alias = "Collective")] /// Construct a new GlobalLockArray with a length of `array_size` whose data will be layed out with the provided `distribution` on the PE's specified by the `team`. /// `team` is commonly a [LamellarWorld][crate::LamellarWorld] or [LamellarTeam][crate::LamellarTeam] (instance or reference). @@ -240,18 +234,6 @@ impl GlobalLockArray { let array = UnsafeArray::new(team.clone(), array_size, distribution); let lock = GlobalRwDarc::new(team, ()).unwrap(); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let mut op_bufs = array.inner.data.op_buffers.write(); - let bytearray = GlobalLockByteArray { - lock: lock.clone(), - array: array.clone().into(), - }; - - for pe in 0..op_bufs.len() { - op_bufs[pe] = func(GlobalLockByteArray::downgrade(&bytearray)); - } - } - GlobalLockArray { lock: lock, array: array, @@ -704,8 +686,8 @@ impl GlobalLockArray { } } -impl TeamFrom<(Vec,Distribution)> for GlobalLockArray { - fn team_from(input: (Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(Vec, Distribution)> for GlobalLockArray { + fn team_from(input: (Vec, Distribution), team: &Pin>) -> Self { let (vals, distribution) = input; let input = (&vals, distribution); let array: UnsafeArray = input.team_into(team); @@ -718,16 +700,7 @@ impl From> for GlobalLockArray { // println!("GlobalLock from unsafe"); array.block_on_outstanding(DarcMode::GlobalLockArray); let lock = GlobalRwDarc::new(array.team(), ()).unwrap(); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let bytearray = GlobalLockByteArray { - lock: lock.clone(), - array: array.clone().into(), - }; - let mut op_bufs = array.inner.data.op_buffers.write(); - for _pe in 0..array.inner.data.num_pes { - op_bufs.push(func(GlobalLockByteArray::downgrade(&bytearray))) - } - } + GlobalLockArray { lock: lock, array: array, @@ -781,11 +754,10 @@ impl From> for LamellarByteArray { } impl From for GlobalLockArray { - fn from(array:LamellarByteArray) -> Self { + fn from(array: LamellarByteArray) -> Self { if let LamellarByteArray::GlobalLockArray(array) = array { array.into() - } - else { + } else { panic!("Expected LamellarByteArray::GlobalLockArray") } } diff --git a/src/array/global_lock_atomic/iteration.rs b/src/array/global_lock_atomic/iteration.rs index 73171dbd..587de65e 100644 --- a/src/array/global_lock_atomic/iteration.rs +++ b/src/array/global_lock_atomic/iteration.rs @@ -1,6 +1,10 @@ use crate::array::global_lock_atomic::*; -use crate::array::iterator::distributed_iterator::{DistIter,DistIteratorLauncher,IndexedDistributedIterator,DistributedIterator}; -use crate::array::iterator::local_iterator::{LocalIter,LocalIteratorLauncher,IndexedLocalIterator,LocalIterator}; +use crate::array::iterator::distributed_iterator::{ + DistIteratorLauncher, DistributedIterator, IndexedDistributedIterator, +}; +use crate::array::iterator::local_iterator::{ + IndexedLocalIterator, LocalIterator, LocalIteratorLauncher, +}; use crate::array::iterator::one_sided_iterator::OneSidedIter; use crate::array::iterator::{LamellarArrayIterators, LamellarArrayMutIterators, Schedule}; use crate::array::private::LamellarArrayPrivate; @@ -398,7 +402,7 @@ impl DistIteratorLauncher for GlobalLockArray { F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - DistIteratorLauncher::for_each_async(&self.array,iter, op) + DistIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -418,16 +422,21 @@ impl DistIteratorLauncher for GlobalLockArray { where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self,sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } @@ -438,9 +447,9 @@ impl DistIteratorLauncher for GlobalLockArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_async(&self.array, iter, d) } @@ -453,11 +462,11 @@ impl DistIteratorLauncher for GlobalLockArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - DistIteratorLauncher::collect_async_with_schedule(&self.array, sched,iter, d) + DistIteratorLauncher::collect_async_with_schedule(&self.array, sched, iter, d) } fn team(&self) -> Pin> { self.array.team_rt().clone() @@ -479,7 +488,7 @@ impl LocalIteratorLauncher for GlobalLockArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each(&self.array,iter, op) + LocalIteratorLauncher::for_each(&self.array, iter, op) } fn for_each_with_schedule( &self, @@ -491,19 +500,15 @@ impl LocalIteratorLauncher for GlobalLockArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each_with_schedule(&self.array,sched, iter, op) + LocalIteratorLauncher::for_each_with_schedule(&self.array, sched, iter, op) } - fn for_each_async( - &self, - iter: &I, - op: F, - ) -> Pin + Send>> + fn for_each_async(&self, iter: &I, op: F) -> Pin + Send>> where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - LocalIteratorLauncher::for_each_async(&self.array,iter, op) + LocalIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -528,7 +533,12 @@ impl LocalIteratorLauncher for GlobalLockArray { LocalIteratorLauncher::reduce(&self.array, iter, op) } - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, @@ -561,21 +571,26 @@ impl LocalIteratorLauncher for GlobalLockArray { where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } -// fn collect_async( + // fn collect_async( // &self, // iter: &I, // d: Distribution, @@ -606,14 +621,18 @@ impl LocalIteratorLauncher for GlobalLockArray { fn count(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count(&self.array, iter) } - - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count_with_schedule(&self.array, sched, iter) } @@ -625,8 +644,12 @@ impl LocalIteratorLauncher for GlobalLockArray { { LocalIteratorLauncher::sum(&self.array, iter) } - - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, diff --git a/src/array/global_lock_atomic/operations.rs b/src/array/global_lock_atomic/operations.rs index 6d821959..4d30e208 100644 --- a/src/array/global_lock_atomic/operations.rs +++ b/src/array/global_lock_atomic/operations.rs @@ -1,60 +1,5 @@ use crate::array::global_lock_atomic::*; use crate::array::*; -use std::any::TypeId; -use std::collections::HashMap; - -type BufFn = fn(GlobalLockByteArrayWeak) -> Arc; -// type MultiMultiFn = fn(GlobalLockByteArray,ArrayOpCmd2,Vec) -> LamellarArcAm; -// type MultiSingleFn = fn(GlobalLockByteArray,ArrayOpCmd2,Vec,Vec) -> LamellarArcAm; - -lazy_static! { - pub(crate) static ref BUFOPS: HashMap = { - let mut map = HashMap::new(); - for op in crate::inventory::iter:: { - map.insert(op.id.clone(), op.op); - } - map - }; - - // pub(crate) static ref MULTIMULTIOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; - - // pub(crate) static ref MULTISINGLEOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; -} - -#[doc(hidden)] -pub struct GlobalLockArrayOpBuf { - pub id: TypeId, - pub op: BufFn, -} - -// #[doc(hidden)] -// pub struct GlobalLockArrayMultiMultiOps { -// pub id: TypeId, -// pub op: MultiMultiFn, -// } - -// #[doc(hidden)] -// pub struct GlobalLockArrayMultiSingleOps { -// pub id: TypeId, -// pub op: MultiSingleFn, -// } - -crate::inventory::collect!(GlobalLockArrayOpBuf); -// crate::inventory::collect!(GlobalLockArrayMultiMultiOps); -// crate::inventory::collect!(GlobalLockArrayMultiSingleOps); - impl ReadOnlyOps for GlobalLockArray {} impl AccessOps for GlobalLockArray {} @@ -68,123 +13,3 @@ impl ShiftOps for GlobalLockArray {} impl CompareExchangeOps for GlobalLockArray {} impl CompareExchangeEpsilonOps for GlobalLockArray {} - -// // impl GlobalLockArray { -// impl LocalArithmeticOps for GlobalLockArray { -// fn local_fetch_add(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_add LocalArithmeticOps for GlobalLockArray "); -// // let _lock = self.lock.write(); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; //this locks the -// slice[index] += val; -// orig -// } -// fn local_fetch_sub(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for GlobalLockArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] -= val; -// orig -// } -// fn local_fetch_mul(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for GlobalLockArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] *= val; -// orig -// } -// fn local_fetch_div(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for GlobalLockArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] /= val; -// // println!("div i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// } -// impl LocalBitWiseOps for GlobalLockArray { -// fn local_fetch_bit_and(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// // println!("local_sub LocalArithmeticOps for GlobalLockArray "); -// let orig = slice[index]; -// slice[index] &= val; -// // println!("and i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// fn local_fetch_bit_or(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// // println!("local_sub LocalArithmeticOps for GlobalLockArray "); -// let orig = slice[index]; -// slice[index] |= val; -// orig -// } -// } -// impl LocalAtomicOps for GlobalLockArray { -// fn local_load(&self, index: impl OpInput<'a,usize>, _val: T) -> T { -// self.local_as_mut_slice()[index] -// } - -// fn local_store(&self, index: impl OpInput<'a,usize>, val: T) { -// self.local_as_mut_slice()[index] = val; //this locks the array -// } - -// fn local_swap(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] = val; -// orig -// } -// } -// // } - -// #[macro_export] -// macro_rules! GlobalLockArray_create_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Add,[<$name dist_add>],[<$name local_add>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::FetchAdd,[<$name dist_fetch_add>],[<$name local_add>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Sub,[<$name dist_sub>],[<$name local_sub>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::FetchSub,[<$name dist_fetch_sub>],[<$name local_sub>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Mul,[<$name dist_mul>],[<$name local_mul>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::FetchMul,[<$name dist_fetch_mul>],[<$name local_mul>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Div,[<$name dist_div>],[<$name local_div>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::FetchDiv,[<$name dist_fetch_div>],[<$name local_div>]} - -// } -// } -// } - -// #[macro_export] -// macro_rules! GlobalLockArray_create_bitwise_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::And,[<$name dist_bit_and>],[<$name local_bit_and>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::FetchAnd,[<$name dist_fetch_bit_and>],[<$name local_bit_and>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Or,[<$name dist_bit_or>],[<$name local_bit_or>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::FetchOr,[<$name dist_fetch_bit_or>],[<$name local_bit_or>]} -// } -// } -// } - -// #[macro_export] -// macro_rules! GlobalLockArray_create_atomic_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Store,[<$name dist_store>],[<$name local_store>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Load,[<$name dist_load>],[<$name local_load>]} -// $crate::GlobalLockArray_register!{$a,ArrayOpCmd::Swap,[<$name dist_swap>],[<$name local_swap>]} -// } -// } -// } -// #[macro_export] -// macro_rules! GlobalLockArray_register { -// ($id:ident, $optype:path, $op:ident, $local:ident) => { -// inventory::submit! { -// #![crate =$crate] -// $crate::array::GlobalLockArrayOp{ -// id: ($optype,std::any::TypeId::of::<$id>()), -// op: $op, -// } -// } -// }; -// } diff --git a/src/array/iterator/consumer.rs b/src/array/iterator/consumer.rs index 6932931f..8a93aa08 100644 --- a/src/array/iterator/consumer.rs +++ b/src/array/iterator/consumer.rs @@ -8,20 +8,17 @@ // pub(crate) use for_each::*; // pub(crate) use reduce::*; +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::IterRequest; -use crate::array::iterator::local_iterator::LocalIterator; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; - -use std::sync::Arc; -use std::sync::atomic::{AtomicUsize,Ordering}; -use std::pin::Pin; use parking_lot::Mutex; -use rand::thread_rng; use rand::prelude::SliceRandom; - +use rand::thread_rng; +use std::pin::Pin; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; // trait Consumer{ // type Item; @@ -30,7 +27,6 @@ use rand::prelude::SliceRandom; // fn next(&self) -> Self::Item; // } - #[derive(Clone, Debug)] pub(crate) struct IterWorkStealer { pub(crate) range: Arc>, //start, end @@ -53,10 +49,10 @@ impl IterWorkStealer { None } } - fn set_done(&self) { - let mut range = self.range.lock(); - range.0 = range.1; - } + // fn set_done(&self) { + // let mut range = self.range.lock(); + // range.0 = range.1; + // } fn steal(&self) -> Option<(usize, usize)> { let mut range = self.range.lock(); @@ -73,28 +69,32 @@ impl IterWorkStealer { } #[derive(Clone, Debug)] -pub(crate) enum IterSchedule{ - Static(usize,usize), - Dynamic(Arc,usize), - Chunk(Vec<(usize, usize)>, Arc,), - WorkStealing(IterWorkStealer, Vec) +pub(crate) enum IterSchedule { + Static(usize, usize), + Dynamic(Arc, usize), + Chunk(Vec<(usize, usize)>, Arc), + WorkStealing(IterWorkStealer, Vec), } impl IterSchedule { pub(crate) fn init_iter(&self, iter: I) -> IterScheduleIter { match self { - IterSchedule::Static( start, end) => { - IterScheduleIter::Static(iter.init(*start,end-start)) + IterSchedule::Static(start, end) => { + IterScheduleIter::Static(iter.init(*start, end - start)) } IterSchedule::Dynamic(cur_i, max_i) => { IterScheduleIter::Dynamic(iter, cur_i.clone(), *max_i) } IterSchedule::Chunk(ranges, range_i) => { - IterScheduleIter::Chunk(iter.init(0,0), ranges.clone(),range_i.clone()) + IterScheduleIter::Chunk(iter.init(0, 0), ranges.clone(), range_i.clone()) } - IterSchedule::WorkStealing( range, siblings) => { + IterSchedule::WorkStealing(range, siblings) => { let (start, end) = *range.range.lock(); - IterScheduleIter::WorkStealing(iter.init(start, end-start), range.clone(), siblings.clone()) + IterScheduleIter::WorkStealing( + iter.init(start, end - start), + range.clone(), + siblings.clone(), + ) } } } @@ -116,25 +116,23 @@ impl IterSchedule { // } } -pub(crate) enum IterScheduleIter{ +pub(crate) enum IterScheduleIter { Static(I), - Dynamic(I,Arc,usize), - Chunk(I,Vec<(usize, usize)>, Arc), - WorkStealing(I,IterWorkStealer, Vec) + Dynamic(I, Arc, usize), + Chunk(I, Vec<(usize, usize)>, Arc), + WorkStealing(I, IterWorkStealer, Vec), } impl Iterator for IterScheduleIter { type Item = I::Item; fn next(&mut self) -> Option { match self { - IterScheduleIter::Static(iter) => { - iter.next() - } + IterScheduleIter::Static(iter) => iter.next(), IterScheduleIter::Dynamic(iter, cur_i, max_i) => { let mut ci = cur_i.fetch_add(1, Ordering::Relaxed); while ci < *max_i { // println!("ci {:?} maxi {:?} {:?}", ci, *max_i, std::thread::current().id()); - *iter = iter.init(ci,1); + *iter = iter.init(ci, 1); if let Some(elem) = iter.next() { return Some(elem); } @@ -145,20 +143,20 @@ impl Iterator for IterScheduleIter { IterScheduleIter::Chunk(iter, ranges, range_i) => { let mut next = iter.next(); // println!("next {:?} {:?}", next.is_none(), std::thread::current().id()); - if next.is_none(){ + if next.is_none() { let ri = range_i.fetch_add(1, Ordering::Relaxed); // println!("range {:?} {:?}", ri, std::thread::current().id()); if ri < ranges.len() { - *iter = iter.init(ranges[ri].0, ranges[ri].1-ranges[ri].0); + *iter = iter.init(ranges[ri].0, ranges[ri].1 - ranges[ri].0); next = iter.next(); } } next } IterScheduleIter::WorkStealing(iter, range, siblings) => { - let mut inner_next = |iter: &mut I| { - while let Some(ri) = range.next(){ - *iter = iter.init(ri,1); + let inner_next = |iter: &mut I| { + while let Some(ri) = range.next() { + *iter = iter.init(ri, 1); if let Some(elem) = iter.next() { return Some(elem); } @@ -167,7 +165,7 @@ impl Iterator for IterScheduleIter { // } } None - }; + }; let mut next = inner_next(iter); if next.is_none() { let mut rng = thread_rng(); @@ -187,8 +185,6 @@ impl Iterator for IterScheduleIter { } } - - pub(crate) trait IterConsumer: SyncSend { type AmOutput; type Output; @@ -196,15 +192,14 @@ pub(crate) trait IterConsumer: SyncSend { fn init(&self, start: usize, cnt: usize) -> Self; fn next(&mut self) -> Option; fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm; - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box>; + fn create_handle( + self, + team: Pin>, + reqs: Vec>>, + ) -> Box>; fn max_elems(&self, in_elems: usize) -> usize; - } // pub(crate) trait MonotonicIterConsumer: IterConsumer{ // fn monotonic(&self) -> I; // } - - - - diff --git a/src/array/iterator/distributed_iterator.rs b/src/array/iterator/distributed_iterator.rs index 396ef4d6..ad2dd7c3 100644 --- a/src/array/iterator/distributed_iterator.rs +++ b/src/array/iterator/distributed_iterator.rs @@ -37,10 +37,10 @@ use take::*; pub(crate) use consumer::*; use crate::array::iterator::one_sided_iterator::OneSidedIterator; -use crate::array::iterator::{IterRequest,Schedule}; +use crate::array::iterator::{IterRequest, Schedule}; use crate::array::{ - AtomicArray, Distribution, GenericAtomicArray, LamellarArray, LamellarArrayPut, - NativeAtomicArray, UnsafeArray, operations::ArrayOps, TeamFrom + operations::ArrayOps, AtomicArray, Distribution, GenericAtomicArray, LamellarArray, + LamellarArrayPut, NativeAtomicArray, TeamFrom, UnsafeArray, }; use crate::lamellar_request::LamellarRequest; use crate::memregion::Dist; @@ -55,8 +55,6 @@ use std::marker::PhantomData; use std::pin::Pin; use std::sync::Arc; -use rand::seq::SliceRandom; - #[lamellar_impl::AmLocalDataRT(Clone)] pub(crate) struct Collect where @@ -127,8 +125,6 @@ where } } - - #[doc(hidden)] pub struct DistIterForEachHandle { pub(crate) reqs: Vec>>, @@ -200,7 +196,9 @@ impl> + SyncSend> DistIterCollectHand } } #[async_trait] -impl> + SyncSend> IterRequest for DistIterCollectHandle { +impl> + SyncSend> IterRequest + for DistIterCollectHandle +{ type Output = A; async fn into_future(mut self: Box) -> Self::Output { let mut local_vals = vec![]; @@ -246,7 +244,7 @@ pub trait DistIteratorLauncher { where I: DistributedIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, - Fut: Future + Send + 'static,; + Fut: Future + Send + 'static; fn for_each_async_with_schedule( &self, @@ -257,19 +255,24 @@ pub trait DistIteratorLauncher { where I: DistributedIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, - Fut: Future + Send + 'static,; + Fut: Future + Send + 'static; fn collect(&self, iter: &I, d: Distribution) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static; - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static; fn collect_async( &self, @@ -278,21 +281,21 @@ pub trait DistIteratorLauncher { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static; fn collect_async_with_schedule( - &self, - sched: Schedule, - iter: &I, - d: Distribution, - ) -> Pin + Send>> - where - I: DistributedIterator, - I::Item: Future + Send + 'static, - B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static; + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> + where + I: DistributedIterator, + I::Item: Future + Send + 'static, + B: Dist + ArrayOps, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static; #[doc(hidden)] fn global_index_from_local(&self, index: usize, chunk_size: usize) -> Option; @@ -444,7 +447,7 @@ pub trait DistributedIterator: SyncSend + Clone + 'static { /// Similar to the Enumerate iterator (which can only be applied to `IndexedLocalIterators`), but the yielded indicies are only /// guaranteed to be unique and monotonically increasing, they should not be considered to have any relation to the underlying - /// location of data in the local array. + /// location of data in the local array. /// /// # Examples ///``` @@ -472,9 +475,8 @@ pub trait DistributedIterator: SyncSend + Clone + 'static { /// PE: 2 j: 0 i: 0 elem: 2.0 /// PE: 2 j: 1 i: 1 elem: 2.0 ///``` - fn monotonic(self) -> Monotonic - { - Monotonic::new(self,0) + fn monotonic(self) -> Monotonic { + Monotonic::new(self, 0) } /// Calls a closure on each element of a Distributed Iterator in parallel and distributed on each PE (which owns data of the iterated array). @@ -574,7 +576,7 @@ pub trait DistributedIterator: SyncSend + Clone + 'static { where // &'static Self: DistributedIterator + 'static, Self::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)>+ SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { self.array().collect(self, d) } @@ -620,7 +622,7 @@ pub trait DistributedIterator: SyncSend + Clone + 'static { // &'static Self: DistributedIterator + 'static, T: Dist + ArrayOps, Self::Item: Future + Send + 'static, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { self.array().collect_async(self, d) } @@ -721,8 +723,6 @@ pub trait IndexedDistributedIterator: DistributedIterator + SyncSend + Clone + ' Enumerate::new(self, 0) } - - /// An iterator that skips the first `n` elements /// /// # Examples diff --git a/src/array/iterator/distributed_iterator/consumer/collect.rs b/src/array/iterator/distributed_iterator/consumer/collect.rs index 76fd3013..fa2bf069 100644 --- a/src/array/iterator/distributed_iterator/consumer/collect.rs +++ b/src/array/iterator/distributed_iterator/consumer/collect.rs @@ -1,38 +1,38 @@ +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::consumer::*; +use crate::array::iterator::distributed_iterator::{DistributedIterator, Monotonic}; use crate::array::iterator::IterRequest; -use crate::array::iterator::distributed_iterator::{DistributedIterator,Monotonic}; -use crate::array::iterator::one_sided_iterator::OneSidedIterator; -use crate::array::{LamellarArray,Distribution,TeamFrom,TeamInto}; -use crate::array::operations::ArrayOps; -use crate::memregion::Dist; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; +use crate::array::operations::ArrayOps; +use crate::array::{Distribution, TeamFrom, TeamInto}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; +use crate::memregion::Dist; use async_trait::async_trait; -use futures::Future; use core::marker::PhantomData; +use futures::Future; use std::pin::Pin; use std::sync::Arc; -#[derive(Clone,Debug)] -pub struct Collect{ +#[derive(Clone, Debug)] +pub struct Collect { pub(crate) iter: Monotonic, pub(crate) distribution: Distribution, - pub(crate) _phantom: PhantomData + pub(crate) _phantom: PhantomData, } -impl IterConsumer for Collect +impl IterConsumer for Collect where I: DistributedIterator, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static,{ - type AmOutput = Vec<(usize,I::Item)>; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, +{ + type AmOutput = Vec<(usize, I::Item)>; type Output = A; - type Item = (usize,I::Item); - fn init(&self, start: usize, cnt: usize) -> Self{ - Collect{ - iter: self.iter.init(start,cnt), + type Item = (usize, I::Item); + fn init(&self, start: usize, cnt: usize) -> Self { + Collect { + iter: self.iter.init(start, cnt), distribution: self.distribution.clone(), _phantom: self._phantom.clone(), } @@ -41,12 +41,16 @@ where self.iter.next() } fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { - Arc::new(CollectAm{ + Arc::new(CollectAm { iter: self.clone(), - schedule + schedule, }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box> { + fn create_handle( + self, + team: Pin>, + reqs: Vec>>, + ) -> Box> { Box::new(DistIterCollectHandle { reqs, distribution: self.distribution, @@ -54,31 +58,31 @@ where _phantom: self._phantom, }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } } #[derive(Debug)] -pub struct CollectAsync{ +pub struct CollectAsync { pub(crate) iter: Monotonic, pub(crate) distribution: Distribution, - pub(crate) _phantom: PhantomData<(A,B)> + pub(crate) _phantom: PhantomData<(A, B)>, } -impl IterConsumer for CollectAsync +impl IterConsumer for CollectAsync where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, - { - type AmOutput = Vec<(usize,B)>; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, +{ + type AmOutput = Vec<(usize, B)>; type Output = A; - type Item = (usize,I::Item); - fn init(&self, start: usize, cnt: usize) -> Self{ - CollectAsync{ - iter: self.iter.init(start,cnt), + type Item = (usize, I::Item); + fn init(&self, start: usize, cnt: usize) -> Self { + CollectAsync { + iter: self.iter.init(start, cnt), distribution: self.distribution.clone(), _phantom: self._phantom.clone(), } @@ -87,12 +91,16 @@ where self.iter.next() } fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { - Arc::new(CollectAsyncAm{ + Arc::new(CollectAsyncAm { iter: self.clone(), - schedule + schedule, }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box> { + fn create_handle( + self, + team: Pin>, + reqs: Vec>>, + ) -> Box> { Box::new(DistIterCollectHandle { reqs, distribution: self.distribution, @@ -100,19 +108,20 @@ where _phantom: PhantomData, }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } } -impl Clone for CollectAsync +impl Clone for CollectAsync where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static,{ + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, +{ fn clone(&self) -> Self { - CollectAsync{ + CollectAsync { iter: self.iter.clone(), distribution: self.distribution.clone(), _phantom: self._phantom.clone(), @@ -121,70 +130,76 @@ where } #[doc(hidden)] -pub struct DistIterCollectHandle TeamFrom<(&'a Vec,Distribution)> + SyncSend> { - pub(crate) reqs: Vec>>>, +pub struct DistIterCollectHandle< + T: Dist + ArrayOps, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend, +> { + pub(crate) reqs: Vec>>>, pub(crate) distribution: Distribution, pub(crate) team: Pin>, pub(crate) _phantom: PhantomData, } -impl TeamFrom<(&'a Vec,Distribution)> + SyncSend> DistIterCollectHandle { +impl TeamFrom<(&'a Vec, Distribution)> + SyncSend> + DistIterCollectHandle +{ fn create_array(&self, local_vals: &Vec) -> A { let input = (local_vals, self.distribution); input.team_into(&self.team) } } #[async_trait] -impl TeamFrom<(&'a Vec,Distribution)> + SyncSend> IterRequest +impl TeamFrom<(&'a Vec, Distribution)> + SyncSend> IterRequest for DistIterCollectHandle { type Output = A; async fn into_future(mut self: Box) -> Self::Output { let mut temp_vals = vec![]; - println!("num collect reqs: {:?}",self.reqs.len()); + println!("num collect reqs: {:?}", self.reqs.len()); for req in self.reqs.drain(0..) { let v = req.into_future().await; - println!("num vals in req: {:?}",v.len()); + println!("num vals in req: {:?}", v.len()); temp_vals.extend(v); } temp_vals.sort_by(|a, b| a.0.cmp(&b.0)); - let mut local_vals = temp_vals.into_iter().map(|v| { - println!("local_val idx: {:?}",v.0); - v.1 - }).collect::>(); - println!("local_val len {:?}",local_vals.len()); + let local_vals = temp_vals + .into_iter() + .map(|v| { + println!("local_val idx: {:?}", v.0); + v.1 + }) + .collect::>(); + println!("local_val len {:?}", local_vals.len()); self.create_array(&local_vals) } fn wait(mut self: Box) -> Self::Output { - let mut num_local_vals = 0; + // let mut num_local_vals = 0; let mut temp_vals = vec![]; for req in self.reqs.drain(0..) { let v = req.get(); temp_vals.extend(v); } temp_vals.sort_by(|a, b| a.0.cmp(&b.0)); - let mut local_vals = temp_vals.into_iter().map(|v| v.1).collect(); + let local_vals = temp_vals.into_iter().map(|v| v.1).collect(); self.create_array(&local_vals) } } #[lamellar_impl::AmLocalDataRT(Clone)] -pub(crate) struct CollectAm -{ - pub(crate) iter: Collect, +pub(crate) struct CollectAm { + pub(crate) iter: Collect, pub(crate) schedule: IterSchedule, } - #[lamellar_impl::rt_am_local] -impl LamellarAm for CollectAm +impl LamellarAm for CollectAm where I: DistributedIterator, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { async fn exec(&self) -> Vec { - let mut iter = self.schedule.init_iter(self.iter.clone()); + let iter = self.schedule.init_iter(self.iter.clone()); iter.collect::>() } } @@ -193,32 +208,28 @@ where pub(crate) struct CollectAsyncAm where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - pub(crate) iter: CollectAsync, + pub(crate) iter: CollectAsync, pub(crate) schedule: IterSchedule, } - #[lamellar_impl::rt_am_local] impl LamellarAm for CollectAsyncAm where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - async fn exec(&self) -> Vec<(usize,B)> { + async fn exec(&self) -> Vec<(usize, B)> { let mut iter = self.schedule.init_iter(self.iter.clone()); let mut res = vec![]; - while let Some((index,elem)) = iter.next(){ - res.push((index,elem.await)); + while let Some((index, elem)) = iter.next() { + res.push((index, elem.await)); } res } } - - - diff --git a/src/array/iterator/distributed_iterator/consumer/for_each.rs b/src/array/iterator/distributed_iterator/consumer/for_each.rs index 7f1d3fa1..4fc41c00 100644 --- a/src/array/iterator/distributed_iterator/consumer/for_each.rs +++ b/src/array/iterator/distributed_iterator/consumer/for_each.rs @@ -1,7 +1,7 @@ +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::consumer::*; +use crate::array::iterator::distributed_iterator::DistributedIterator; use crate::array::iterator::IterRequest; -use crate::array::iterator::distributed_iterator::{DistributedIterator}; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; @@ -10,9 +10,8 @@ use futures::Future; use std::pin::Pin; use std::sync::Arc; - #[derive(Clone, Debug)] -pub struct ForEach +pub struct ForEach where I: DistributedIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, @@ -21,42 +20,44 @@ where pub(crate) op: F, } -impl IterConsumer for ForEach +impl IterConsumer for ForEach where I: DistributedIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, - { +{ type AmOutput = (); type Output = (); type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ - ForEach{ - iter: self.iter.init(start,cnt), + fn init(&self, start: usize, cnt: usize) -> Self { + ForEach { + iter: self.iter.init(start, cnt), op: self.op.clone(), } } fn next(&mut self) -> Option { self.iter.next() } - fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm{ - Arc::new(ForEachAm{ + fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { + Arc::new(ForEachAm { iter: self.clone(), op: self.op.clone(), - schedule + schedule, }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box>{ - Box::new(DistIterForEachHandle { - reqs - }) + fn create_handle( + self, + _team: Pin>, + reqs: Vec>>, + ) -> Box> { + Box::new(DistIterForEachHandle { reqs }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } } #[derive(Debug)] -pub struct ForEachAsync +pub struct ForEachAsync where I: DistributedIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, @@ -67,51 +68,52 @@ where // pub(crate) _phantom: PhantomData, } -impl IterConsumer for ForEachAsync +impl IterConsumer for ForEachAsync where -I: DistributedIterator + 'static, -F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -Fut: Future + Send + 'static, - { + I: DistributedIterator + 'static, + F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, + Fut: Future + Send + 'static, +{ type AmOutput = (); type Output = (); type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ - ForEachAsync{ - iter: self.iter.init(start,cnt), + fn init(&self, start: usize, cnt: usize) -> Self { + ForEachAsync { + iter: self.iter.init(start, cnt), op: self.op.clone(), } } fn next(&mut self) -> Option { self.iter.next() } - fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm{ - - Arc::new(ForEachAsyncAm{ + fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { + Arc::new(ForEachAsyncAm { iter: self.clone(), op: self.op.clone(), schedule, // _phantom: self._phantom.clone(), }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box>{ - Box::new(DistIterForEachHandle { - reqs - }) + fn create_handle( + self, + _team: Pin>, + reqs: Vec>>, + ) -> Box> { + Box::new(DistIterForEachHandle { reqs }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } // fn clone(&self) -> Self{ - + // } } -impl Clone for ForEachAsync +impl Clone for ForEachAsync where -I: DistributedIterator + 'static, -F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -Fut: Future + Send + 'static, + I: DistributedIterator + 'static, + F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, + Fut: Future + Send + 'static, { fn clone(&self) -> Self { ForEachAsync { @@ -121,7 +123,6 @@ Fut: Future + Send + 'static, } } - #[doc(hidden)] pub struct DistIterForEachHandle { pub(crate) reqs: Vec>>, @@ -144,28 +145,26 @@ impl IterRequest for DistIterForEachHandle { } #[lamellar_impl::AmLocalDataRT(Clone)] -pub(crate) struct ForEachAm +pub(crate) struct ForEachAm where I: DistributedIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { pub(crate) op: F, - pub(crate) iter: ForEach, - pub(crate) schedule: IterSchedule + pub(crate) iter: ForEach, + pub(crate) schedule: IterSchedule, } - - #[lamellar_impl::rt_am_local] impl LamellarAm for ForEachAm where - I: DistributedIterator + 'static, + I: DistributedIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { async fn exec(&self) { // println!("foreacham: {:?}", std::thread::current().id()); let mut iter = self.schedule.init_iter(self.iter.clone()); - while let Some(elem) = iter.next(){ + while let Some(elem) = iter.next() { (&self.op)(elem); } } @@ -179,7 +178,7 @@ where Fut: Future + Send + 'static, { pub(crate) op: F, - pub(crate) iter: ForEachAsync, + pub(crate) iter: ForEachAsync, pub(crate) schedule: IterSchedule, // pub(crate) _phantom: PhantomData } @@ -187,13 +186,13 @@ where #[lamellar_impl::rt_am_local] impl LamellarAm for ForEachAsyncAm where - I: DistributedIterator + 'static, + I: DistributedIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { async fn exec(&self) { let mut iter = self.schedule.init_iter(self.iter.clone()); - while let Some(elem) = iter.next(){ + while let Some(elem) = iter.next() { (&self.op)(elem).await; } } @@ -297,9 +296,6 @@ where // } // } - - - // #[lamellar_impl::AmLocalDataRT(Clone, Debug)] // pub(crate) struct ForEachWorkStealing // where diff --git a/src/array/iterator/local_iterator.rs b/src/array/iterator/local_iterator.rs index b2165d53..1f58f3bc 100644 --- a/src/array/iterator/local_iterator.rs +++ b/src/array/iterator/local_iterator.rs @@ -21,7 +21,6 @@ mod zip; pub(crate) mod consumer; - use chunks::*; use enumerate::*; use filter::*; @@ -35,10 +34,8 @@ use zip::*; pub(crate) use consumer::*; -use crate::array::iterator::one_sided_iterator::OneSidedIterator; -use crate::array::iterator::{Schedule,IterRequest}; -use crate::array::{AtomicArray, Distribution, LamellarArray, LamellarArrayPut,operations::ArrayOps, TeamFrom}; -use crate::lamellar_request::LamellarRequest; +use crate::array::iterator::Schedule; +use crate::array::{operations::ArrayOps, AtomicArray, Distribution, LamellarArray, TeamFrom}; use crate::memregion::Dist; use crate::LamellarTeamRT; @@ -50,8 +47,6 @@ use std::marker::PhantomData; use std::pin::Pin; use std::sync::Arc; -use rand::seq::SliceRandom; - #[doc(hidden)] #[enum_dispatch] pub trait LocalIteratorLauncher { @@ -78,7 +73,7 @@ pub trait LocalIteratorLauncher { where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, - Fut: Future + Send + 'static,; + Fut: Future + Send + 'static; fn for_each_async_with_schedule( &self, @@ -89,15 +84,24 @@ pub trait LocalIteratorLauncher { where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, - Fut: Future + Send + 'static,; - - fn reduce(&self, iter: &I, op: F) -> Pin> + Send>> + Fut: Future + Send + 'static; + + fn reduce( + &self, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, F: Fn(I::Item, I::Item) -> I::Item + SyncSend + Clone + 'static; - - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, @@ -109,7 +113,7 @@ pub trait LocalIteratorLauncher { // I::Item: SyncSend, // F: Fn(I::Item, I::Item) -> Fut + SyncSend + Clone + 'static, // Fut: Future + SyncSend + Clone + 'static; - + // fn reduce_async_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> // where // I: LocalIterator + 'static, @@ -121,13 +125,18 @@ pub trait LocalIteratorLauncher { where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static; - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static; // fn collect_async( // &self, @@ -139,7 +148,7 @@ pub trait LocalIteratorLauncher { // I::Item: Future + Send + 'static, // B: Dist + ArrayOps, // A: From> + SyncSend + Clone + 'static; - + // fn collect_async_with_schedule( // &self, // sched: Schedule, @@ -155,8 +164,12 @@ pub trait LocalIteratorLauncher { fn count(&self, iter: &I) -> Pin + Send>> where I: LocalIterator + 'static; - - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static; @@ -165,7 +178,11 @@ pub trait LocalIteratorLauncher { I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum; - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum; @@ -185,7 +202,7 @@ pub trait LocalIteratorLauncher { /// The functions in this trait are available on all local iterators. /// Additonaly functionality can be found in the [IndexedLocalIterator] trait: /// these methods are only available for local iterators where the number of elements is known in advance (e.g. after invoking `filter` these methods would be unavailable) -pub trait LocalIterator: SyncSend + Clone + 'static { +pub trait LocalIterator: SyncSend + Clone + 'static { /// The type of item this distributed iterator produces type Item: Send; @@ -278,7 +295,6 @@ pub trait LocalIterator: SyncSend + Clone + 'static { FilterMap::new(self, op) } - /// Applies `op` to each element producing a new iterator with the results /// /// # Examples @@ -313,7 +329,7 @@ pub trait LocalIterator: SyncSend + Clone + 'static { /// Similar to the Enumerate iterator (which can only be applied to `IndexedLocalIterators`), but the yielded indicies are only /// guaranteed to be unique and monotonically increasing, they should not be considered to have any relation to the underlying - /// location of data in the local array. + /// location of data in the local array. /// /// # Examples ///``` @@ -341,12 +357,10 @@ pub trait LocalIterator: SyncSend + Clone + 'static { /// PE: 2 j: 0 i: 0 elem: 2.0 /// PE: 2 j: 1 i: 1 elem: 2.0 ///``` - fn monotonic(self) -> Monotonic - { - Monotonic::new(self,0) + fn monotonic(self) -> Monotonic { + Monotonic::new(self, 0) } - /// Calls a closure on each element of a Local Iterator in parallel on the calling PE (the PE must have some local data of the array). /// /// This call utilizes the [Schedule::Static][crate::array::iterator::Schedule] policy. @@ -374,7 +388,7 @@ pub trait LocalIterator: SyncSend + Clone + 'static { self.array().for_each(self, op) } - /// Calls a closure on each element of a Local Iterator in parallel on the calling PE (the PE must have some local data of the array) using the specififed schedule policy. + /// Calls a closure on each element of a Local Iterator in parallel on the calling PE (the PE must have some local data of the array) using the specififed schedule policy. /// /// This function returns a future which can be used to poll for completion of the iteration. /// Note calling this function launches the iteration regardless of if the returned future is used or not. @@ -438,7 +452,7 @@ pub trait LocalIterator: SyncSend + Clone + 'static { self.array().for_each_async(self, op) } - /// Calls a closure on each element of a Local Iterator in parallel on the calling PE (the PE must have some local data of the array) using the specififed schedule policy. + /// Calls a closure on each element of a Local Iterator in parallel on the calling PE (the PE must have some local data of the array) using the specififed schedule policy. /// /// The supplied closure must return a future. /// @@ -469,11 +483,9 @@ pub trait LocalIterator: SyncSend + Clone + 'static { F: Fn(Self::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - self.array() - .for_each_async_with_schedule(sched, self, op) + self.array().for_each_async_with_schedule(sched, self, op) } - /// Reduces the elements of the local iterator using the provided closure /// /// This function returns a future which needs to be driven to completion to retrieve the new container. @@ -492,7 +504,7 @@ pub trait LocalIterator: SyncSend + Clone + 'static { where // &'static Self: LocalIterator + 'static, Self::Item: SyncSend, - F: Fn(Self::Item,Self::Item) -> Self::Item + SyncSend + Clone + 'static, + F: Fn(Self::Item, Self::Item) -> Self::Item + SyncSend + Clone + 'static, { self.array().reduce(self, op) } @@ -511,11 +523,15 @@ pub trait LocalIterator: SyncSend + Clone + 'static { /// let req = array.local_iter().reduce(|acc,elem| acc+elem); /// let sum = array.block_on(req); //wait on the collect request to get the new array ///``` - fn reduce_with_schedule(&self, sched: Schedule, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + op: F, + ) -> Pin> + Send>> where // &'static Self: LocalIterator + 'static, Self::Item: SyncSend, - F: Fn(Self::Item,Self::Item) -> Self::Item + SyncSend + Clone + 'static, + F: Fn(Self::Item, Self::Item) -> Self::Item + SyncSend + Clone + 'static, { self.array().reduce_with_schedule(sched, self, op) } @@ -544,7 +560,6 @@ pub trait LocalIterator: SyncSend + Clone + 'static { // self.array().reduce_async(self, op) // } - /// Collects the elements of the local iterator into the specified container type /// /// This function returns a future which needs to be driven to completion to retrieve the new container. @@ -556,7 +571,7 @@ pub trait LocalIterator: SyncSend + Clone + 'static { where // &'static Self: LocalIterator + 'static, Self::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { self.array().collect(self, d) } @@ -568,17 +583,19 @@ pub trait LocalIterator: SyncSend + Clone + 'static { /// # Examples ///``` ///``` - fn collect_with_schedule(&self,sched: Schedule, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + d: Distribution, + ) -> Pin + Send>> where // &'static Self: LocalIterator + 'static, Self::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { self.array().collect_with_schedule(sched, self, d) } - - // /// Collects the elements of the local iterator into the specified container type // /// Each element from the iterator must return a Future // /// @@ -606,7 +623,6 @@ pub trait LocalIterator: SyncSend + Clone + 'static { // self.array().collect_async(self, d) // } - /// Counts the number of the elements of the local iterator /// /// This function returns a future which needs to be driven to completion to retrieve the new container. @@ -621,8 +637,7 @@ pub trait LocalIterator: SyncSend + Clone + 'static { /// let req = array.local_iter().filter(|elem| elem < 10).collect::>(Distribution::Block); /// let new_vec = array.block_on(req); //wait on the collect request to get the new array ///``` - fn count(&self) -> Pin + Send>> - { + fn count(&self) -> Pin + Send>> { self.array().count(self) } @@ -640,15 +655,14 @@ pub trait LocalIterator: SyncSend + Clone + 'static { /// let req = array.local_iter().filter(|elem| elem < 10).collect::>(Distribution::Block); /// let new_vec = array.block_on(req); //wait on the collect request to get the new array ///``` - fn count_with_schedule(&self,sched: Schedule) -> Pin + Send>> - { - self.array().count_with_schedule(sched,self) + fn count_with_schedule(&self, sched: Schedule) -> Pin + Send>> { + self.array().count_with_schedule(sched, self) } /// Sums the elements of the local iterator. - /// + /// /// Takes each element, adds them together, and returns the result. - /// + /// /// An empty iterator returns the zero value of the type. /// /// This function returns a future which needs to be driven to completion to retrieve the sum @@ -664,9 +678,9 @@ pub trait LocalIterator: SyncSend + Clone + 'static { } /// Sums the elements of the local iterator, using the specified schedule - /// + /// /// Takes each element, adds them together, and returns the result. - /// + /// /// An empty iterator returns the zero value of the type. /// /// This function returns a future which needs to be driven to completion to retrieve the sum @@ -678,15 +692,12 @@ pub trait LocalIterator: SyncSend + Clone + 'static { where Self::Item: SyncSend + std::iter::Sum, { - self.array().sum_with_schedule(sched,self) + self.array().sum_with_schedule(sched, self) } - - } /// An interface for dealing with local iterators which are indexable, meaning it returns an iterator of known length pub trait IndexedLocalIterator: LocalIterator + SyncSend + Clone + 'static { - /// yields the local (to the calling PE) index along with each element /// /// # Examples diff --git a/src/array/iterator/local_iterator/consumer/collect.rs b/src/array/iterator/local_iterator/consumer/collect.rs index 24606ec5..0aabcade 100644 --- a/src/array/iterator/local_iterator/consumer/collect.rs +++ b/src/array/iterator/local_iterator/consumer/collect.rs @@ -1,52 +1,56 @@ +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::consumer::*; +use crate::array::iterator::local_iterator::{LocalIterator, Monotonic}; use crate::array::iterator::IterRequest; -use crate::array::iterator::local_iterator::{LocalIterator,Monotonic}; -use crate::array::iterator::one_sided_iterator::OneSidedIterator; -use crate::array::{LamellarArray,Distribution,TeamFrom,TeamInto}; -use crate::array::operations::ArrayOps; -use crate::memregion::Dist; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; +use crate::array::operations::ArrayOps; +use crate::array::{Distribution, TeamFrom, TeamInto}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; +use crate::memregion::Dist; use async_trait::async_trait; use core::marker::PhantomData; use std::pin::Pin; use std::sync::Arc; -#[derive(Clone,Debug)] -pub struct Collect{ +#[derive(Clone, Debug)] +pub struct Collect { pub(crate) iter: Monotonic, pub(crate) distribution: Distribution, - pub(crate) _phantom: PhantomData + pub(crate) _phantom: PhantomData, } -impl IterConsumer for Collect +impl IterConsumer for Collect where I: LocalIterator, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static,{ - type AmOutput = Vec<(usize,I::Item)>; + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, +{ + type AmOutput = Vec<(usize, I::Item)>; type Output = A; - type Item = (usize,I::Item); - fn init(&self, start: usize, cnt: usize) -> Self{ - Collect{ - iter: self.iter.init(start,cnt), + type Item = (usize, I::Item); + fn init(&self, start: usize, cnt: usize) -> Self { + Collect { + iter: self.iter.init(start, cnt), distribution: self.distribution.clone(), _phantom: self._phantom.clone(), } } - + fn next(&mut self) -> Option { self.iter.next() } fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { - Arc::new(CollectAm{ + Arc::new(CollectAm { iter: self.clone(), - schedule + schedule, }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box> { + fn create_handle( + self, + team: Pin>, + reqs: Vec>>, + ) -> Box> { Box::new(LocalIterCollectHandle { reqs, distribution: self.distribution, @@ -54,12 +58,12 @@ where _phantom: self._phantom, }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } } -// impl MonotonicIterConsumer for Collect +// impl MonotonicIterConsumer for Collect // where // I: LocalIterator, // I::Item: Dist + ArrayOps, @@ -72,7 +76,6 @@ where // } // } // } - // #[derive(Clone,Debug)] // pub struct CollectAsync{ @@ -81,7 +84,7 @@ where // pub(crate) _phantom: PhantomData<(A,B)> // } -// impl IterConsumer for CollectAsync +// impl IterConsumer for CollectAsync // where // I: LocalIterator, // I::Item: Future + Send + 'static, @@ -109,21 +112,26 @@ where // } #[doc(hidden)] -pub struct LocalIterCollectHandle TeamFrom<(&'a Vec,Distribution)> + SyncSend> { - pub(crate) reqs: Vec>>>, +pub struct LocalIterCollectHandle< + T: Dist + ArrayOps, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend, +> { + pub(crate) reqs: Vec>>>, pub(crate) distribution: Distribution, pub(crate) team: Pin>, pub(crate) _phantom: PhantomData, } -impl TeamFrom<(&'a Vec,Distribution)> + SyncSend> LocalIterCollectHandle { +impl TeamFrom<(&'a Vec, Distribution)> + SyncSend> + LocalIterCollectHandle +{ fn create_array(&self, local_vals: &Vec) -> A { let input = (local_vals, self.distribution); input.team_into(&self.team) } } #[async_trait] -impl TeamFrom<(&'a Vec,Distribution)> + SyncSend> IterRequest +impl TeamFrom<(&'a Vec, Distribution)> + SyncSend> IterRequest for LocalIterCollectHandle { type Output = A; @@ -134,39 +142,37 @@ impl TeamFrom<(&'a Vec,Distribution)> + SyncS temp_vals.extend(v); } temp_vals.sort_by(|a, b| a.0.cmp(&b.0)); - let mut local_vals = temp_vals.into_iter().map(|v| v.1).collect(); + let local_vals = temp_vals.into_iter().map(|v| v.1).collect(); self.create_array(&local_vals) } fn wait(mut self: Box) -> Self::Output { - let mut num_local_vals = 0; + // let mut num_local_vals = 0; let mut temp_vals = vec![]; for req in self.reqs.drain(0..) { let v = req.get(); temp_vals.extend(v); } temp_vals.sort_by(|a, b| a.0.cmp(&b.0)); - let mut local_vals = temp_vals.into_iter().map(|v| v.1).collect(); + let local_vals = temp_vals.into_iter().map(|v| v.1).collect(); self.create_array(&local_vals) } } #[lamellar_impl::AmLocalDataRT(Clone)] -pub(crate) struct CollectAm -{ - pub(crate) iter: Collect, +pub(crate) struct CollectAm { + pub(crate) iter: Collect, pub(crate) schedule: IterSchedule, } - #[lamellar_impl::rt_am_local] -impl LamellarAm for CollectAm +impl LamellarAm for CollectAm where I: LocalIterator, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - async fn exec(&self) -> Vec { - let mut iter = self.schedule.init_iter(self.iter.clone()); + async fn exec(&self) -> Vec { + let iter = self.schedule.init_iter(self.iter.clone()); iter.collect::>() } } @@ -180,7 +186,6 @@ where // pub(crate) schedule: IterSchedule, // } - // #[lamellar_impl::rt_am_local] // impl LamellarAm for CollectAsyncAm // where @@ -196,6 +201,3 @@ where // res // } // } - - - diff --git a/src/array/iterator/local_iterator/consumer/count.rs b/src/array/iterator/local_iterator/consumer/count.rs index 02f62919..b86d0ad8 100644 --- a/src/array/iterator/local_iterator/consumer/count.rs +++ b/src/array/iterator/local_iterator/consumer/count.rs @@ -1,7 +1,7 @@ +use crate::active_messaging::LamellarArcLocalAm; use crate::array::iterator::consumer::*; +use crate::array::iterator::local_iterator::LocalIterator; use crate::array::iterator::IterRequest; -use crate::array::iterator::local_iterator::{LocalIterator}; -use crate::active_messaging::LamellarArcLocalAm; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; @@ -14,73 +14,79 @@ pub struct Count { pub(crate) iter: I, } -impl IterConsumer for Count +impl IterConsumer for Count where I: LocalIterator, { type AmOutput = usize; type Output = usize; type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ - Count{ - iter: self.iter.init(start,cnt), + fn init(&self, start: usize, cnt: usize) -> Self { + Count { + iter: self.iter.init(start, cnt), } } fn next(&mut self) -> Option { self.iter.next() } - fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm{ - Arc::new(CountAm{ + fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { + Arc::new(CountAm { iter: self.clone(), - schedule + schedule, }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box>{ - Box::new(LocalIterCountHandle { - reqs - }) + fn create_handle( + self, + _team: Pin>, + reqs: Vec>>, + ) -> Box> { + Box::new(LocalIterCountHandle { reqs }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } -} - +} #[doc(hidden)] pub struct LocalIterCountHandle { pub(crate) reqs: Vec>>, } - #[doc(hidden)] #[async_trait] -impl IterRequest for LocalIterCountHandle{ +impl IterRequest for LocalIterCountHandle { type Output = usize; async fn into_future(mut self: Box) -> Self::Output { - let count = futures::future::join_all(self.reqs.drain(..).map(|req| req.into_future())).await.into_iter().sum::(); + let count = futures::future::join_all(self.reqs.drain(..).map(|req| req.into_future())) + .await + .into_iter() + .sum::(); // println!("count: {} {:?}", count, std::thread::current().id()); count } - fn wait(mut self: Box) -> Self::Output { - self.reqs.drain(..).map(|req| req.get()).into_iter().sum::() + fn wait(mut self: Box) -> Self::Output { + self.reqs + .drain(..) + .map(|req| req.get()) + .into_iter() + .sum::() } } - #[lamellar_impl::AmLocalDataRT(Clone)] -pub(crate) struct CountAm{ +pub(crate) struct CountAm { pub(crate) iter: Count, - pub(crate) schedule: IterSchedule + pub(crate) schedule: IterSchedule, } #[lamellar_impl::rt_am_local] impl LamellarAm for CountAm where - I: LocalIterator + 'static, + I: LocalIterator + 'static, { - async fn exec(&self) -> usize{ + async fn exec(&self) -> usize { let mut iter = self.schedule.init_iter(self.iter.clone()); - let mut count: usize = 0; + let mut count: usize = 0; while let Some(_) = iter.next() { count += 1; } @@ -88,6 +94,3 @@ where count } } - - - diff --git a/src/array/iterator/local_iterator/consumer/for_each.rs b/src/array/iterator/local_iterator/consumer/for_each.rs index f066f058..23e4ecc8 100644 --- a/src/array/iterator/local_iterator/consumer/for_each.rs +++ b/src/array/iterator/local_iterator/consumer/for_each.rs @@ -1,7 +1,7 @@ +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::consumer::*; +use crate::array::iterator::local_iterator::LocalIterator; use crate::array::iterator::IterRequest; -use crate::array::iterator::local_iterator::{LocalIterator}; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; @@ -10,9 +10,8 @@ use futures::Future; use std::pin::Pin; use std::sync::Arc; - #[derive(Clone, Debug)] -pub struct ForEach +pub struct ForEach where I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, @@ -21,18 +20,18 @@ where pub(crate) op: F, } -impl IterConsumer for ForEach +impl IterConsumer for ForEach where I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, - { +{ type AmOutput = (); type Output = (); type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ + fn init(&self, start: usize, cnt: usize) -> Self { // println!("ForEach before init start {:?} cnt {:?}", start,cnt); - let iter = ForEach{ - iter: self.iter.init(start,cnt), + let iter = ForEach { + iter: self.iter.init(start, cnt), op: self.op.clone(), }; // println!("ForEach after init start {:?} cnt {:?}", start,cnt); @@ -41,25 +40,27 @@ where fn next(&mut self) -> Option { self.iter.next() } - fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm{ - Arc::new(ForEachAm{ + fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { + Arc::new(ForEachAm { iter: self.clone(), op: self.op.clone(), - schedule + schedule, }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box>{ - Box::new(LocalIterForEachHandle { - reqs - }) + fn create_handle( + self, + _team: Pin>, + reqs: Vec>>, + ) -> Box> { + Box::new(LocalIterForEachHandle { reqs }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } } #[derive(Debug)] -pub struct ForEachAsync +pub struct ForEachAsync where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, @@ -70,47 +71,49 @@ where // pub(crate) _phantom: PhantomData, } -impl IterConsumer for ForEachAsync +impl IterConsumer for ForEachAsync where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, - { +{ type AmOutput = (); type Output = (); type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ - ForEachAsync{ - iter: self.iter.init(start,cnt), + fn init(&self, start: usize, cnt: usize) -> Self { + ForEachAsync { + iter: self.iter.init(start, cnt), op: self.op.clone(), } } fn next(&mut self) -> Option { self.iter.next() } - fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm{ - Arc::new(ForEachAsyncAm{ + fn into_am(&self, schedule: IterSchedule) -> LamellarArcLocalAm { + Arc::new(ForEachAsyncAm { iter: self.clone(), op: self.op.clone(), schedule, // _phantom: self._phantom.clone(), }) } - fn create_handle(self, team: Pin>, reqs: Vec>>) -> Box>{ - Box::new(LocalIterForEachHandle { - reqs - }) + fn create_handle( + self, + _team: Pin>, + reqs: Vec>>, + ) -> Box> { + Box::new(LocalIterForEachHandle { reqs }) } - fn max_elems(&self, in_elems: usize) -> usize{ + fn max_elems(&self, in_elems: usize) -> usize { self.iter.elems(in_elems) } } -impl Clone for ForEachAsync +impl Clone for ForEachAsync where -I: LocalIterator + 'static, -F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -Fut: Future + Send + 'static, + I: LocalIterator + 'static, + F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, + Fut: Future + Send + 'static, { fn clone(&self) -> Self { ForEachAsync { @@ -142,18 +145,16 @@ impl IterRequest for LocalIterForEachHandle { } #[lamellar_impl::AmLocalDataRT(Clone)] -pub(crate) struct ForEachAm +pub(crate) struct ForEachAm where I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { pub(crate) op: F, - pub(crate) iter: ForEach, - pub(crate) schedule: IterSchedule + pub(crate) iter: ForEach, + pub(crate) schedule: IterSchedule, } - - #[lamellar_impl::rt_am_local] impl LamellarAm for ForEachAm where @@ -163,7 +164,7 @@ where async fn exec(&self) { // println!("foreacham: {:?}", std::thread::current().id()); let mut iter = self.schedule.init_iter(self.iter.clone()); - while let Some(elem) = iter.next(){ + while let Some(elem) = iter.next() { (&self.op)(elem); } } @@ -177,7 +178,7 @@ where Fut: Future + Send + 'static, { pub(crate) op: F, - pub(crate) iter: ForEachAsync, + pub(crate) iter: ForEachAsync, pub(crate) schedule: IterSchedule, // pub(crate) _phantom: PhantomData } @@ -185,13 +186,13 @@ where #[lamellar_impl::rt_am_local] impl LamellarAm for ForEachAsyncAm where - I: LocalIterator + 'static, + I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { async fn exec(&self) { let mut iter = self.schedule.init_iter(self.iter.clone()); - while let Some(elem) = iter.next(){ + while let Some(elem) = iter.next() { (&self.op)(elem).await; } } @@ -295,9 +296,6 @@ where // } // } - - - // #[lamellar_impl::AmLocalDataRT(Clone, Debug)] // pub(crate) struct ForEachWorkStealing // where diff --git a/src/array/iterator/local_iterator/consumer/reduce.rs b/src/array/iterator/local_iterator/consumer/reduce.rs index b72bc038..f928a2c2 100644 --- a/src/array/iterator/local_iterator/consumer/reduce.rs +++ b/src/array/iterator/local_iterator/consumer/reduce.rs @@ -1,13 +1,11 @@ +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::consumer::*; +use crate::array::iterator::local_iterator::LocalIterator; use crate::array::iterator::IterRequest; -use crate::array::iterator::local_iterator::{LocalIterator}; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; use async_trait::async_trait; -use futures::Future; -use core::marker::PhantomData; use std::pin::Pin; use std::sync::Arc; @@ -26,9 +24,9 @@ where type AmOutput = Option; type Output = Option; type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ - Reduce{ - iter: self.iter.init(start,cnt), + fn init(&self, start: usize, cnt: usize) -> Self { + Reduce { + iter: self.iter.init(start, cnt), op: self.op.clone(), } } @@ -44,7 +42,7 @@ where } fn create_handle( self, - team: Pin>, + _team: Pin>, reqs: Vec>>, ) -> Box> { Box::new(LocalIterReduceHandle { op: self.op, reqs }) @@ -96,7 +94,6 @@ pub struct LocalIterReduceHandle { pub(crate) op: F, } - #[doc(hidden)] #[async_trait] impl IterRequest for LocalIterReduceHandle @@ -106,9 +103,7 @@ where { type Output = Option; async fn into_future(mut self: Box) -> Self::Output { - futures::future::join_all(self.reqs - .drain(..) - .map(|req| req.into_future())) + futures::future::join_all(self.reqs.drain(..).map(|req| req.into_future())) .await .into_iter() .filter_map(|res| res) @@ -125,21 +120,21 @@ where #[lamellar_impl::AmLocalDataRT(Clone)] pub(crate) struct ReduceAm { pub(crate) op: F, - pub(crate) iter: Reduce, + pub(crate) iter: Reduce, pub(crate) schedule: IterSchedule, } #[lamellar_impl::rt_am_local] impl LamellarAm for ReduceAm where - I: LocalIterator + 'static, + I: LocalIterator + 'static, I::Item: SyncSend, F: Fn(I::Item, I::Item) -> I::Item + SyncSend + Clone + 'static, { async fn exec(&self) -> Option { let mut iter = self.schedule.init_iter(self.iter.clone()); - match iter.next(){ - Some(mut accum) =>{ + match iter.next() { + Some(mut accum) => { while let Some(elem) = iter.next() { accum = (self.op)(accum, elem); } @@ -177,334 +172,3 @@ where // // println!("thread {:?} elems processed {:?}",std::thread::current().id(), cnt); // } // } - -// we may want to support different scheduling strategies for fold -// -// #[lamellar_impl::AmLocalDataRT(Clone, Debug)] -// pub(crate) struct ForEachDynamic -// where -// I: LocalIterator, -// F: Fn(I::Item), -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) cur_i: Arc, -// pub(crate) max_i: usize, -// } - -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachDynamic -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) + SyncSend + 'static, -// { -// async fn exec(&self) { -// // println!("in for each {:?} {:?}", self.start_i, self.end_i); -// let mut cur_i = self.cur_i.fetch_add(1, Ordering::Relaxed); - -// while cur_i < self.max_i { -// // println!("in for each {:?} {:?} {:?}", range_i, start_i, end_i); -// let mut iter = self.data.init(cur_i, 1); -// while let Some(item) = iter.next() { -// (self.op)(item); -// } -// cur_i = self.cur_i.fetch_add(1, Ordering::Relaxed); -// } -// // println!("done in for each"); -// } -// } - -// #[lamellar_impl::AmLocalDataRT(Clone, Debug)] -// pub(crate) struct ForEachChunk -// where -// I: LocalIterator, -// F: Fn(I::Item), -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) ranges: Vec<(usize, usize)>, -// pub(crate) range_i: Arc, -// } - -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachChunk -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) + SyncSend + 'static, -// { -// async fn exec(&self) { -// // println!("in for each {:?} {:?}", self.start_i, self.end_i); -// let mut range_i = self.range_i.fetch_add(1, Ordering::Relaxed); -// while range_i < self.ranges.len() { -// let (start_i, end_i) = self.ranges[range_i]; -// // println!("in for each {:?} {:?} {:?}", range_i, start_i, end_i); -// let mut iter = self.data.init(start_i, end_i - start_i); -// while let Some(item) = iter.next() { -// (self.op)(item); -// } -// range_i = self.range_i.fetch_add(1, Ordering::Relaxed); -// } -// // println!("done in for each"); -// } -// } - -// #[derive(Clone, Debug)] -// pub(crate) struct ForEachWorkStealer { -// pub(crate) range: Arc>, //start, end -// } - -// impl ForEachWorkStealer { -// fn set_range(&self, start: usize, end: usize) { -// let mut range = self.range.lock(); -// range.0 = start; -// range.1 = end; -// // println!("{:?} set range {:?}", std::thread::current().id(), range); -// } - -// fn next(&self) -> Option { -// let mut range = self.range.lock(); -// range.0 += 1; -// if range.0 <= range.1 { -// Some(range.0) -// } else { -// None -// } -// } -// fn set_done(&self) { -// let mut range = self.range.lock(); -// range.0 = range.1; -// } - -// fn steal(&self) -> Option<(usize, usize)> { -// let mut range = self.range.lock(); -// let start = range.0; -// let end = range.1; -// // println!("{:?} stealing {:?}", std::thread::current().id(), range); -// if end > start && end - start > 2 { -// let new_end = (start + end) / 2; -// range.1 = new_end; -// Some((new_end, end)) -// } else { -// None -// } -// } -// } - -// #[lamellar_impl::AmLocalDataRT(Clone, Debug)] -// pub(crate) struct ForEachWorkStealing -// where -// I: LocalIterator, -// F: Fn(I::Item), -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) range: ForEachWorkStealer, -// // pub(crate) ranges: Vec<(usize, usize)>, -// // pub(crate) range_i: Arc, -// pub(crate) siblings: Vec, -// } -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachWorkStealing -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) + SyncSend + 'static, -// { -// async fn exec(&self) { -// let (start, end) = *self.range.range.lock(); -// // println!("{:?} ForEachWorkStealing {:?} {:?}",std::thread::current().id(), start, end); -// let mut iter = self.data.init(start, end - start); -// while self.range.next().is_some() { -// if let Some(elem) = iter.next() { -// (&self.op)(elem); -// } else { -// self.range.set_done(); -// } -// } -// // println!("{:?} ForEachWorkStealing done with my range",std::thread::current().id()); -// let mut rng = thread_rng(); -// let mut workers = (0..self.siblings.len()).collect::>(); -// workers.shuffle(&mut rng); -// while let Some(worker) = workers.pop() { -// // println!("{:?} ForEachWorkStealing stealing from sibling",std::thread::current().id()); -// if let Some((start, end)) = self.siblings[worker].steal() { -// let mut iter = self.data.init(start, end - start); -// self.range.set_range(start, end); -// while self.range.next().is_some() { -// if let Some(elem) = iter.next() { -// (&self.op)(elem); -// } else { -// self.range.set_done(); -// } -// } -// workers = (0..self.siblings.len()).collect::>(); -// workers.shuffle(&mut rng); -// } -// } -// // println!("{:?} ForEachWorkStealing done",std::thread::current().id()); -// } -// } - -// //-------------------------async for each------------------------------- - -// #[lamellar_impl::AmLocalDataRT(Clone)] -// pub(crate) struct ForEachAsyncStatic -// where -// I: LocalIterator, -// F: Fn(I::Item) -> Fut + SyncSend + Clone, -// Fut: Future + Send, -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) start_i: usize, -// pub(crate) end_i: usize, -// } - -// impl std::fmt::Debug for ForEachAsyncStatic -// where -// I: LocalIterator, -// F: Fn(I::Item) -> Fut + SyncSend + Clone, -// Fut: Future + Send, -// { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!( -// f, -// "ForEachAsync {{ start_i: {:?}, end_i: {:?} }}", -// self.start_i, self.end_i -// ) -// } -// } - -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachAsyncStatic -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -// Fut: Future + Send + 'static, -// { -// async fn exec(&self) { -// let mut iter = self.data.init(self.start_i, self.end_i - self.start_i); -// while let Some(elem) = iter.next() { -// (&self.op)(elem).await; -// } -// } -// } - -// #[lamellar_impl::AmLocalDataRT(Clone, Debug)] -// pub(crate) struct ForEachAsyncDynamic -// where -// I: LocalIterator, -// F: Fn(I::Item) -> Fut + SyncSend + Clone, -// Fut: Future + Send, -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) cur_i: Arc, -// pub(crate) max_i: usize, -// } - -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachAsyncDynamic -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -// Fut: Future + Send + 'static, -// { -// async fn exec(&self) { -// // println!("in for each {:?} {:?}", self.start_i, self.end_i); -// let mut cur_i = self.cur_i.fetch_add(1, Ordering::Relaxed); - -// while cur_i < self.max_i { -// // println!("in for each {:?} {:?} {:?}", range_i, start_i, end_i); -// let mut iter = self.data.init(cur_i, 1); -// while let Some(item) = iter.next() { -// (self.op)(item).await; -// } -// cur_i = self.cur_i.fetch_add(1, Ordering::Relaxed); -// } -// // println!("done in for each"); -// } -// } - -// #[lamellar_impl::AmLocalDataRT(Clone, Debug)] -// pub(crate) struct ForEachAsyncChunk -// where -// I: LocalIterator, -// F: Fn(I::Item) -> Fut + SyncSend + Clone, -// Fut: Future + Send, -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) ranges: Vec<(usize, usize)>, -// pub(crate) range_i: Arc, -// } - -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachAsyncChunk -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -// Fut: Future + Send + 'static, -// { -// async fn exec(&self) { -// // println!("in for each {:?} {:?}", self.start_i, self.end_i); -// let mut range_i = self.range_i.fetch_add(1, Ordering::Relaxed); -// while range_i < self.ranges.len() { -// let (start_i, end_i) = self.ranges[range_i]; -// // println!("in for each {:?} {:?} {:?}", range_i, start_i, end_i); -// let mut iter = self.data.init(start_i, end_i - start_i); -// while let Some(item) = iter.next() { -// (self.op)(item).await; -// } -// range_i = self.range_i.fetch_add(1, Ordering::Relaxed); -// } -// // println!("done in for each"); -// } -// } - -// #[lamellar_impl::AmLocalDataRT(Clone, Debug)] -// pub(crate) struct ForEachAsyncWorkStealing -// where -// I: LocalIterator, -// F: Fn(I::Item) -> Fut + SyncSend + Clone, -// Fut: Future + Send, -// { -// pub(crate) op: F, -// pub(crate) data: I, -// pub(crate) range: ForEachWorkStealer, -// pub(crate) siblings: Vec, -// } -// #[lamellar_impl::rt_am_local] -// impl LamellarAm for ForEachAsyncWorkStealing -// where -// I: LocalIterator + 'static, -// F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, -// Fut: Future + Send + 'static, -// { -// async fn exec(&self) { -// // println!("in for each {:?} {:?}", self.start_i, self.end_i); -// let (start, end) = *self.range.range.lock(); -// let mut iter = self.data.init(start, end - start); -// while self.range.next().is_some() { -// if let Some(elem) = iter.next() { -// (&self.op)(elem); -// } -// } -// // let mut rng = thread_rng().gen(); -// let mut workers = (0..self.siblings.len()).collect::>(); -// workers.shuffle(&mut thread_rng()); -// while let Some(worker) = workers.pop() { -// if let Some((start, end)) = self.siblings[worker].steal() { -// let mut iter = self.data.init(start, end - start); -// self.range.set_range(start, end); -// while self.range.next().is_some() { -// if let Some(elem) = iter.next() { -// (&self.op)(elem).await; -// } -// } -// workers = (0..self.siblings.len()).collect::>(); -// workers.shuffle(&mut thread_rng()); -// } -// } -// // println!("done in for each"); -// } -// } diff --git a/src/array/iterator/local_iterator/consumer/sum.rs b/src/array/iterator/local_iterator/consumer/sum.rs index 568f9ddd..18ed88ec 100644 --- a/src/array/iterator/local_iterator/consumer/sum.rs +++ b/src/array/iterator/local_iterator/consumer/sum.rs @@ -1,19 +1,16 @@ +use crate::active_messaging::{LamellarArcLocalAm, SyncSend}; use crate::array::iterator::consumer::*; +use crate::array::iterator::local_iterator::LocalIterator; use crate::array::iterator::IterRequest; -use crate::array::iterator::local_iterator::{LocalIterator}; -use crate::active_messaging::{SyncSend,LamellarArcLocalAm}; use crate::lamellar_request::LamellarRequest; use crate::lamellar_team::LamellarTeamRT; use async_trait::async_trait; -use futures::Future; -use core::marker::PhantomData; use std::pin::Pin; use std::sync::Arc; #[derive(Clone, Debug)] -pub(crate) struct Sum -{ +pub(crate) struct Sum { pub(crate) iter: I, } @@ -25,9 +22,9 @@ where type AmOutput = I::Item; type Output = I::Item; type Item = I::Item; - fn init(&self, start: usize, cnt: usize) -> Self{ - Sum{ - iter: self.iter.init(start,cnt), + fn init(&self, start: usize, cnt: usize) -> Self { + Sum { + iter: self.iter.init(start, cnt), } } fn next(&mut self) -> Option { @@ -41,7 +38,7 @@ where } fn create_handle( self, - team: Pin>, + _team: Pin>, reqs: Vec>>, ) -> Box> { Box::new(LocalIterSumHandle { reqs }) @@ -56,7 +53,6 @@ pub struct LocalIterSumHandle { pub(crate) reqs: Vec>>, } - #[doc(hidden)] #[async_trait] impl IterRequest for LocalIterSumHandle @@ -65,9 +61,7 @@ where { type Output = T; async fn into_future(mut self: Box) -> Self::Output { - futures::future::join_all(self.reqs - .drain(..) - .map(|req| req.into_future())) + futures::future::join_all(self.reqs.drain(..).map(|req| req.into_future())) .await .into_iter() .sum::() @@ -80,7 +74,6 @@ where } } - #[lamellar_impl::AmLocalDataRT(Clone)] pub(crate) struct SumAm { pub(crate) iter: Sum, @@ -90,11 +83,11 @@ pub(crate) struct SumAm { #[lamellar_impl::rt_am_local] impl LamellarAm for SumAm where - I: LocalIterator + 'static, + I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, { async fn exec(&self) -> Option { - let mut iter = self.schedule.init_iter(self.iter.clone()); + let iter = self.schedule.init_iter(self.iter.clone()); iter.sum::() } } @@ -123,4 +116,3 @@ where // // println!("thread {:?} elems processed {:?}",std::thread::current().id(), cnt); // } // } - diff --git a/src/array/iterator/mod.rs b/src/array/iterator/mod.rs index 5548dee4..eece8f3c 100644 --- a/src/array/iterator/mod.rs +++ b/src/array/iterator/mod.rs @@ -6,7 +6,6 @@ use local_iterator::LocalIterator; pub mod one_sided_iterator; use one_sided_iterator::OneSidedIterator; pub mod consumer; -use consumer::IterConsumer; use crate::memregion::Dist; @@ -20,7 +19,6 @@ pub trait IterRequest { fn wait(self: Box) -> Self::Output; } - /// The Schedule type controls how elements of a LamellarArray are distributed to threads when /// calling `for_each_with_schedule` on a local or distributed iterator. /// diff --git a/src/array/local_lock_atomic.rs b/src/array/local_lock_atomic.rs index 6bdd26b0..e8a2003c 100644 --- a/src/array/local_lock_atomic.rs +++ b/src/array/local_lock_atomic.rs @@ -1,7 +1,6 @@ mod iteration; pub(crate) mod operations; mod rdma; -use crate::array::local_lock_atomic::operations::BUFOPS; use crate::array::private::LamellarArrayPrivate; use crate::array::r#unsafe::{UnsafeByteArray, UnsafeByteArrayWeak}; use crate::array::*; @@ -13,7 +12,6 @@ use parking_lot::{ lock_api::{ArcRwLockReadGuard, ArcRwLockWriteGuard}, RawRwLock, }; -use std::any::TypeId; use std::ops::{Deref, DerefMut}; /// A safe abstraction of a distributed array, providing read/write access protected by locks. @@ -183,7 +181,7 @@ impl Deref for LocalLockLocalData<'_, T> { } } -impl LocalLockArray { +impl LocalLockArray { #[doc(alias = "Collective")] /// Construct a new LocalLockArray with a length of `array_size` whose data will be layed out with the provided `distribution` on the PE's specified by the `team`. /// `team` is commonly a [LamellarWorld][crate::LamellarWorld] or [LamellarTeam][crate::LamellarTeam] (instance or reference). @@ -206,18 +204,6 @@ impl LocalLockArray { array.block_on_outstanding(DarcMode::LocalLockArray); let lock = LocalRwDarc::new(team, ()).unwrap(); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let mut op_bufs = array.inner.data.op_buffers.write(); - let bytearray = LocalLockByteArray { - lock: lock.clone(), - array: array.clone().into(), - }; - - for _pe in 0..array.num_pes() { - op_bufs.push(func(LocalLockByteArray::downgrade(&bytearray))); - } - } - LocalLockArray { lock: lock, array: array, @@ -574,8 +560,8 @@ impl LocalLockArray { } } -impl TeamFrom<(Vec,Distribution)> for LocalLockArray { - fn team_from(input: (Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(Vec, Distribution)> for LocalLockArray { + fn team_from(input: (Vec, Distribution), team: &Pin>) -> Self { let (vals, distribution) = input; let input = (&vals, distribution); let array: UnsafeArray = input.team_into(team); @@ -588,16 +574,7 @@ impl From> for LocalLockArray { // println!("locallock from unsafe"); array.block_on_outstanding(DarcMode::LocalLockArray); let lock = LocalRwDarc::new(array.team(), ()).unwrap(); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let bytearray = LocalLockByteArray { - lock: lock.clone(), - array: array.clone().into(), - }; - let mut op_bufs = array.inner.data.op_buffers.write(); - for _pe in 0..array.inner.data.num_pes { - op_bufs.push(func(LocalLockByteArray::downgrade(&bytearray))) - } - } + LocalLockArray { lock: lock, array: array, @@ -651,11 +628,10 @@ impl From> for LamellarByteArray { } impl From for LocalLockArray { - fn from(array:LamellarByteArray) -> Self { + fn from(array: LamellarByteArray) -> Self { if let LamellarByteArray::LocalLockArray(array) = array { array.into() - } - else { + } else { panic!("Expected LamellarByteArray::LocalLockArray") } } diff --git a/src/array/local_lock_atomic/iteration.rs b/src/array/local_lock_atomic/iteration.rs index c8cfddf7..ccb9931a 100644 --- a/src/array/local_lock_atomic/iteration.rs +++ b/src/array/local_lock_atomic/iteration.rs @@ -1,8 +1,12 @@ -use crate::array::local_lock_atomic::*; -use crate::array::iterator::distributed_iterator::{DistIter,DistIteratorLauncher,IndexedDistributedIterator,DistributedIterator}; -use crate::array::iterator::local_iterator::{LocalIter,LocalIteratorLauncher,IndexedLocalIterator,LocalIterator}; +use crate::array::iterator::distributed_iterator::{ + DistIteratorLauncher, DistributedIterator, IndexedDistributedIterator, +}; +use crate::array::iterator::local_iterator::{ + IndexedLocalIterator, LocalIterator, LocalIteratorLauncher, +}; use crate::array::iterator::one_sided_iterator::OneSidedIter; use crate::array::iterator::{LamellarArrayIterators, LamellarArrayMutIterators, Schedule}; +use crate::array::local_lock_atomic::*; use crate::array::private::LamellarArrayPrivate; use crate::array::*; use crate::memregion::Dist; @@ -366,7 +370,6 @@ impl LamellarArrayMutIterators for LocalLockArray { } } - impl DistIteratorLauncher for LocalLockArray { fn global_index_from_local(&self, index: usize, chunk_size: usize) -> Option { self.array.global_index_from_local(index, chunk_size) @@ -405,7 +408,7 @@ impl DistIteratorLauncher for LocalLockArray { F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - DistIteratorLauncher::for_each_async(&self.array,iter, op) + DistIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -425,16 +428,21 @@ impl DistIteratorLauncher for LocalLockArray { where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self,sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } @@ -445,9 +453,9 @@ impl DistIteratorLauncher for LocalLockArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_async(&self.array, iter, d) } @@ -460,11 +468,11 @@ impl DistIteratorLauncher for LocalLockArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - DistIteratorLauncher::collect_async_with_schedule(&self.array, sched,iter, d) + DistIteratorLauncher::collect_async_with_schedule(&self.array, sched, iter, d) } fn team(&self) -> Pin> { self.array.team_rt().clone() @@ -486,7 +494,7 @@ impl LocalIteratorLauncher for LocalLockArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each(&self.array,iter, op) + LocalIteratorLauncher::for_each(&self.array, iter, op) } fn for_each_with_schedule( &self, @@ -498,19 +506,15 @@ impl LocalIteratorLauncher for LocalLockArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each_with_schedule(&self.array,sched, iter, op) + LocalIteratorLauncher::for_each_with_schedule(&self.array, sched, iter, op) } - fn for_each_async( - &self, - iter: &I, - op: F, - ) -> Pin + Send>> + fn for_each_async(&self, iter: &I, op: F) -> Pin + Send>> where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - LocalIteratorLauncher::for_each_async(&self.array,iter, op) + LocalIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -535,7 +539,12 @@ impl LocalIteratorLauncher for LocalLockArray { LocalIteratorLauncher::reduce(&self.array, iter, op) } - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, @@ -568,21 +577,26 @@ impl LocalIteratorLauncher for LocalLockArray { where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } -// fn collect_async( + // fn collect_async( // &self, // iter: &I, // d: Distribution, @@ -613,14 +627,18 @@ impl LocalIteratorLauncher for LocalLockArray { fn count(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count(&self.array, iter) } - - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count_with_schedule(&self.array, sched, iter) } @@ -632,8 +650,12 @@ impl LocalIteratorLauncher for LocalLockArray { { LocalIteratorLauncher::sum(&self.array, iter) } - - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, @@ -645,4 +667,3 @@ impl LocalIteratorLauncher for LocalLockArray { self.array.team_rt().clone() } } - diff --git a/src/array/local_lock_atomic/operations.rs b/src/array/local_lock_atomic/operations.rs index a85c3ff4..b9c42841 100644 --- a/src/array/local_lock_atomic/operations.rs +++ b/src/array/local_lock_atomic/operations.rs @@ -1,56 +1,5 @@ use crate::array::local_lock_atomic::*; use crate::array::*; -use std::any::TypeId; -use std::collections::HashMap; - -type BufFn = fn(LocalLockByteArrayWeak) -> Arc; -// type MultiMultiFn = fn(LocalLockByteArray,ArrayOpCmd2,Vec) -> LamellarArcAm; -// type MultiSingleFn = fn(LocalLockByteArray,ArrayOpCmd2,Vec,Vec) -> LamellarArcAm; - -lazy_static! { - pub(crate) static ref BUFOPS: HashMap = { - let mut map = HashMap::new(); - for op in crate::inventory::iter:: { - map.insert(op.id.clone(), op.op); - } - map - }; - // pub(crate) static ref MULTIMULTIOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // // }; - // pub(crate) static ref MULTISINGLEOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; - -} - -#[doc(hidden)] -pub struct LocalLockArrayOpBuf { - pub id: TypeId, - pub op: BufFn, -} -// #[doc(hidden)] -// pub struct LocalLockArrayMultiMultiOps { -// pub id: TypeId, -// pub op: MultiMultiFn, -// } -// #[doc(hidden)] -// pub struct LocalLockArrayMultiSingleOps { -// pub id: TypeId, -// pub op: MultiSingleFn, -// } - -crate::inventory::collect!(LocalLockArrayOpBuf); -// crate::inventory::collect!(LocalLockArrayMultiMultiOps); -// crate::inventory::collect!(LocalLockArrayMultiSingleOps); impl ReadOnlyOps for LocalLockArray {} @@ -65,123 +14,3 @@ impl ShiftOps for LocalLockArray {} impl CompareExchangeOps for LocalLockArray {} impl CompareExchangeEpsilonOps for LocalLockArray {} - -// // impl LocalLockArray { -// impl LocalArithmeticOps for LocalLockArray { -// fn local_fetch_add(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_add LocalArithmeticOps for LocalLockArray "); -// // let _lock = self.lock.write(); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; //this locks the -// slice[index] += val; -// orig -// } -// fn local_fetch_sub(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for LocalLockArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] -= val; -// orig -// } -// fn local_fetch_mul(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for LocalLockArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] *= val; -// orig -// } -// fn local_fetch_div(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for LocalLockArray "); -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] /= val; -// // println!("div i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// } -// impl LocalBitWiseOps for LocalLockArray { -// fn local_fetch_bit_and(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// // println!("local_sub LocalArithmeticOps for LocalLockArray "); -// let orig = slice[index]; -// slice[index] &= val; -// // println!("and i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// fn local_fetch_bit_or(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// // println!("local_sub LocalArithmeticOps for LocalLockArray "); -// let orig = slice[index]; -// slice[index] |= val; -// orig -// } -// } -// impl LocalAtomicOps for LocalLockArray { -// fn local_load(&self, index: impl OpInput<'a,usize>, _val: T) -> T { -// self.local_as_mut_slice()[index] -// } - -// fn local_store(&self, index: impl OpInput<'a,usize>, val: T) { -// self.local_as_mut_slice()[index] = val; //this locks the array -// } - -// fn local_swap(&self, index: impl OpInput<'a,usize>, val: T) -> T { -// let mut slice = self.local_as_mut_slice(); //this locks the array -// let orig = slice[index]; -// slice[index] = val; -// orig -// } -// } -// // } - -// #[macro_export] -// macro_rules! LocalLockArray_create_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Add,[<$name dist_add>],[<$name local_add>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::FetchAdd,[<$name dist_fetch_add>],[<$name local_add>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Sub,[<$name dist_sub>],[<$name local_sub>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::FetchSub,[<$name dist_fetch_sub>],[<$name local_sub>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Mul,[<$name dist_mul>],[<$name local_mul>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::FetchMul,[<$name dist_fetch_mul>],[<$name local_mul>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Div,[<$name dist_div>],[<$name local_div>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::FetchDiv,[<$name dist_fetch_div>],[<$name local_div>]} - -// } -// } -// } - -// #[macro_export] -// macro_rules! LocalLockArray_create_bitwise_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::And,[<$name dist_bit_and>],[<$name local_bit_and>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::FetchAnd,[<$name dist_fetch_bit_and>],[<$name local_bit_and>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Or,[<$name dist_bit_or>],[<$name local_bit_or>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::FetchOr,[<$name dist_fetch_bit_or>],[<$name local_bit_or>]} -// } -// } -// } - -// #[macro_export] -// macro_rules! LocalLockArray_create_atomic_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Store,[<$name dist_store>],[<$name local_store>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Load,[<$name dist_load>],[<$name local_load>]} -// $crate::LocalLockArray_register!{$a,ArrayOpCmd::Swap,[<$name dist_swap>],[<$name local_swap>]} -// } -// } -// } -// #[macro_export] -// macro_rules! LocalLockArray_register { -// ($id:ident, $optype:path, $op:ident, $local:ident) => { -// inventory::submit! { -// #![crate =$crate] -// $crate::array::LocalLockArrayOp{ -// id: ($optype,std::any::TypeId::of::<$id>()), -// op: $op, -// } -// } -// }; -// } diff --git a/src/array/native_atomic.rs b/src/array/native_atomic.rs index 84c2255b..f80543e1 100644 --- a/src/array/native_atomic.rs +++ b/src/array/native_atomic.rs @@ -2,7 +2,6 @@ pub(crate) mod iteration; pub(crate) mod operations; mod rdma; use crate::array::atomic::AtomicElement; -use crate::array::native_atomic::operations::BUFOPS; use crate::array::private::LamellarArrayPrivate; use crate::array::r#unsafe::{UnsafeByteArray, UnsafeByteArrayWeak}; use crate::array::*; @@ -11,9 +10,6 @@ use crate::darc::DarcMode; use crate::lamellar_team::{IntoLamellarTeam, LamellarTeamRT}; use crate::memregion::Dist; use serde::ser::SerializeSeq; -// use parking_lot::{ -// Mutex,MutexGuard -// }; use std::any::TypeId; use std::ops::{ AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign, @@ -173,7 +169,7 @@ macro_rules! slice_as_atomic{ macro_rules! as_type{ { $val:ident,$A:ty } => { { - *(&$val as *const T as *mut $A) + *(&$val as *const T as *const $A) } } } @@ -915,7 +911,7 @@ impl Iterator for NativeAtomicLocalDataIter { } } -impl NativeAtomicArray { +impl NativeAtomicArray { // Send + Copy == Dist pub(crate) fn new_internal>( team: U, @@ -925,17 +921,6 @@ impl NativeAtomicArray { // println!("new native atomic array 1"); let array = UnsafeArray::new(team.clone(), array_size, distribution); array.block_on_outstanding(DarcMode::NativeAtomicArray); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let mut op_bufs = array.inner.data.op_buffers.write(); - let bytearray = NativeAtomicByteArray { - array: array.clone().into(), - orig_t: NativeAtomicType::from::(), - }; - - for _pe in 0..array.num_pes() { - op_bufs.push(func(NativeAtomicByteArray::downgrade(&bytearray))); - } - } NativeAtomicArray { array: array, @@ -1006,8 +991,8 @@ impl NativeAtomicArray { } } -impl TeamFrom<(Vec,Distribution)> for NativeAtomicArray { - fn team_from(input: (Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(Vec, Distribution)> for NativeAtomicArray { + fn team_from(input: (Vec, Distribution), team: &Pin>) -> Self { let (vals, distribution) = input; let input = (&vals, distribution); let array: UnsafeArray = input.team_into(team); @@ -1020,16 +1005,7 @@ impl From> for NativeAtomicArray { fn from(array: UnsafeArray) -> Self { // println!("native from unsafe"); array.block_on_outstanding(DarcMode::NativeAtomicArray); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let bytearray = NativeAtomicByteArray { - array: array.clone().into(), - orig_t: NativeAtomicType::from::(), - }; - let mut op_bufs = array.inner.data.op_buffers.write(); - for _pe in 0..array.inner.data.num_pes { - op_bufs.push(func(NativeAtomicByteArray::downgrade(&bytearray))) - } - } + NativeAtomicArray { array: array, orig_t: NativeAtomicType::from::(), @@ -1059,11 +1035,10 @@ impl From> for LamellarByteArray { #[doc(hidden)] impl From for NativeAtomicArray { - fn from(array:LamellarByteArray) -> Self { + fn from(array: LamellarByteArray) -> Self { if let LamellarByteArray::NativeAtomicArray(array) = array { array.into() - } - else { + } else { panic!("Expected LamellarByteArray::NativeAtomicArray") } } diff --git a/src/array/native_atomic/iteration.rs b/src/array/native_atomic/iteration.rs index 83fa0b8a..4bc0d36a 100644 --- a/src/array/native_atomic/iteration.rs +++ b/src/array/native_atomic/iteration.rs @@ -1,8 +1,10 @@ -use crate::array::native_atomic::*; -use crate::array::iterator::distributed_iterator::{DistIter,DistIteratorLauncher,IndexedDistributedIterator,DistributedIterator}; -use crate::array::iterator::local_iterator::{LocalIter,LocalIteratorLauncher,IndexedLocalIterator,LocalIterator}; +use crate::array::iterator::distributed_iterator::{ + DistIteratorLauncher, DistributedIterator, IndexedDistributedIterator, +}; +use crate::array::iterator::local_iterator::{LocalIterator, LocalIteratorLauncher}; use crate::array::iterator::one_sided_iterator::OneSidedIter; use crate::array::iterator::{LamellarArrayIterators, LamellarArrayMutIterators, Schedule}; +use crate::array::native_atomic::*; // use crate::array::private::LamellarArrayPrivate; use crate::array::*; use crate::memregion::Dist; @@ -228,7 +230,7 @@ impl DistIteratorLauncher for NativeAtomicArray { F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - DistIteratorLauncher::for_each_async(&self.array,iter, op) + DistIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -248,16 +250,21 @@ impl DistIteratorLauncher for NativeAtomicArray { where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self,sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } @@ -268,9 +275,9 @@ impl DistIteratorLauncher for NativeAtomicArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_async(&self.array, iter, d) } @@ -283,11 +290,11 @@ impl DistIteratorLauncher for NativeAtomicArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - DistIteratorLauncher::collect_async_with_schedule(&self.array, sched,iter, d) + DistIteratorLauncher::collect_async_with_schedule(&self.array, sched, iter, d) } fn team(&self) -> Pin> { self.array.team_rt().clone() @@ -309,7 +316,7 @@ impl LocalIteratorLauncher for NativeAtomicArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each(&self.array,iter, op) + LocalIteratorLauncher::for_each(&self.array, iter, op) } fn for_each_with_schedule( &self, @@ -321,19 +328,15 @@ impl LocalIteratorLauncher for NativeAtomicArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each_with_schedule(&self.array,sched, iter, op) + LocalIteratorLauncher::for_each_with_schedule(&self.array, sched, iter, op) } - fn for_each_async( - &self, - iter: &I, - op: F, - ) -> Pin + Send>> + fn for_each_async(&self, iter: &I, op: F) -> Pin + Send>> where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - LocalIteratorLauncher::for_each_async(&self.array,iter, op) + LocalIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -358,7 +361,12 @@ impl LocalIteratorLauncher for NativeAtomicArray { LocalIteratorLauncher::reduce(&self.array, iter, op) } - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, @@ -391,21 +399,26 @@ impl LocalIteratorLauncher for NativeAtomicArray { where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } -// fn collect_async( + // fn collect_async( // &self, // iter: &I, // d: Distribution, @@ -436,14 +449,18 @@ impl LocalIteratorLauncher for NativeAtomicArray { fn count(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count(&self.array, iter) } - - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count_with_schedule(&self.array, sched, iter) } @@ -455,8 +472,12 @@ impl LocalIteratorLauncher for NativeAtomicArray { { LocalIteratorLauncher::sum(&self.array, iter) } - - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, diff --git a/src/array/native_atomic/operations.rs b/src/array/native_atomic/operations.rs index 861677c4..09f4c36e 100644 --- a/src/array/native_atomic/operations.rs +++ b/src/array/native_atomic/operations.rs @@ -1,61 +1,5 @@ use crate::array::native_atomic::*; use crate::array::*; -use std::any::TypeId; -use std::collections::HashMap; - -type BufFn = fn(NativeAtomicByteArrayWeak) -> Arc; -// type MultiMultiFn = fn(NativeAtomicByteArray,ArrayOpCmd2,Vec) -> LamellarArcAm; -// type MultiSingleFn = fn(NativeAtomicByteArray,ArrayOpCmd2,Vec,Vec) -> LamellarArcAm; - -lazy_static! { - pub(crate) static ref BUFOPS: HashMap = { - let mut map = HashMap::new(); - for op in crate::inventory::iter:: { - map.insert(op.id.clone(), op.op); - } - map - }; - - // pub(crate) static ref MULTIMULTIOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; - - // pub(crate) static ref MULTISINGLEOPS: HashMap = { - // let mut map = HashMap::new(); - // for op in crate::inventory::iter:: { - // map.insert(op.id.clone(), op.op); - // } - // map - // }; - -} - -#[doc(hidden)] -pub struct NativeAtomicArrayOpBuf { - pub id: TypeId, - pub op: BufFn, -} - -// #[doc(hidden)] -// pub struct NativeAtomicArrayMultiMultiOps { -// pub id: TypeId, -// pub op: MultiMultiFn, -// } - -// #[doc(hidden)] -// pub struct NativeAtomicArrayMultiSingleOps { -// pub id: TypeId, -// pub op: MultiSingleFn, -// } - -crate::inventory::collect!(NativeAtomicArrayOpBuf); -// crate::inventory::collect!(NativeAtomicArrayMultiMultiOps); -// crate::inventory::collect!(NativeAtomicArrayMultiSingleOps); - impl ReadOnlyOps for NativeAtomicArray {} diff --git a/src/array/operations.rs b/src/array/operations.rs index 01d09578..3513e0e0 100644 --- a/src/array/operations.rs +++ b/src/array/operations.rs @@ -2,7 +2,6 @@ use crate::array::atomic::*; use crate::array::generic_atomic::*; use crate::array::local_lock_atomic::*; use crate::array::native_atomic::*; -use crate::array::r#unsafe::*; use crate::array::*; @@ -39,8 +38,7 @@ use std::u8; #[doc(hidden)] pub static OPS_BUFFER_SIZE: usize = 10_000_000; - -/// A marker trait for types that can be used as an array +/// A marker trait for types that can be used as an array /// Users should not implement this directly, rather they should use the [macro@ArrayOps] derive macro /// by passing it as an argument to the [macro@AmData] attribute macro to automatically derive this trait. /// @@ -68,48 +66,6 @@ pub static OPS_BUFFER_SIZE: usize = 10_000_000; /// } pub trait ArrayOps {} -#[doc(hidden)] -#[derive( - serde::Serialize, - serde::Deserialize, - Hash, - std::cmp::PartialEq, - std::cmp::Eq, - Clone, - Debug, - Copy, -)] -#[serde(bound = "T: Dist + serde::Serialize + serde::de::DeserializeOwned")] -pub enum ArrayOpCmd { - Add, - FetchAdd, - Sub, - FetchSub, - Mul, - FetchMul, - Div, - FetchDiv, - Rem, - FetchRem, - And, - FetchAnd, - Or, - FetchOr, - Xor, - FetchXor, - Store, - Load, - Swap, - Put, - Get, - CompareExchange(T), - CompareExchangeEps(T, T), - Shl, - FetchShl, - Shr, - FetchShr, -} - #[doc(hidden)] #[derive( serde::Serialize, @@ -122,7 +78,7 @@ pub enum ArrayOpCmd { Copy, )] #[serde(bound = "T: AmDist + serde::Serialize + serde::de::DeserializeOwned")] -pub enum ArrayOpCmd2 { +pub enum ArrayOpCmd { Add, FetchAdd, Sub, @@ -145,336 +101,106 @@ pub enum ArrayOpCmd2 { Put, Get, CompareExchange(T), - CompareExchangeEps(T,T), + CompareExchangeEps(T, T), Shl, FetchShl, Shr, FetchShr, } -impl From> for ArrayOpCmd2> { - fn from(cmd: ArrayOpCmd2) -> Self { +impl From> for ArrayOpCmd> { + fn from(cmd: ArrayOpCmd) -> Self { match cmd { - ArrayOpCmd2::Add => ArrayOpCmd2::Add, - ArrayOpCmd2::FetchAdd => ArrayOpCmd2::FetchAdd, - ArrayOpCmd2::Sub => ArrayOpCmd2::Sub, - ArrayOpCmd2::FetchSub => ArrayOpCmd2::FetchSub, - ArrayOpCmd2::Mul => ArrayOpCmd2::Mul, - ArrayOpCmd2::FetchMul => ArrayOpCmd2::FetchMul, - ArrayOpCmd2::Div => ArrayOpCmd2::Div, - ArrayOpCmd2::FetchDiv => ArrayOpCmd2::FetchDiv, - ArrayOpCmd2::Rem => ArrayOpCmd2::Rem, - ArrayOpCmd2::FetchRem => ArrayOpCmd2::FetchRem, - ArrayOpCmd2::And => ArrayOpCmd2::And, - ArrayOpCmd2::FetchAnd => ArrayOpCmd2::FetchAnd, - ArrayOpCmd2::Or => ArrayOpCmd2::Or, - ArrayOpCmd2::FetchOr => ArrayOpCmd2::FetchOr, - ArrayOpCmd2::Xor => ArrayOpCmd2::Xor, - ArrayOpCmd2::FetchXor => ArrayOpCmd2::FetchXor, - ArrayOpCmd2::Store => ArrayOpCmd2::Store, - ArrayOpCmd2::Load => ArrayOpCmd2::Load, - ArrayOpCmd2::Swap => ArrayOpCmd2::Swap, - ArrayOpCmd2::Put => ArrayOpCmd2::Put, - ArrayOpCmd2::Get => ArrayOpCmd2::Get, - ArrayOpCmd2::CompareExchange(old) => { + ArrayOpCmd::Add => ArrayOpCmd::Add, + ArrayOpCmd::FetchAdd => ArrayOpCmd::FetchAdd, + ArrayOpCmd::Sub => ArrayOpCmd::Sub, + ArrayOpCmd::FetchSub => ArrayOpCmd::FetchSub, + ArrayOpCmd::Mul => ArrayOpCmd::Mul, + ArrayOpCmd::FetchMul => ArrayOpCmd::FetchMul, + ArrayOpCmd::Div => ArrayOpCmd::Div, + ArrayOpCmd::FetchDiv => ArrayOpCmd::FetchDiv, + ArrayOpCmd::Rem => ArrayOpCmd::Rem, + ArrayOpCmd::FetchRem => ArrayOpCmd::FetchRem, + ArrayOpCmd::And => ArrayOpCmd::And, + ArrayOpCmd::FetchAnd => ArrayOpCmd::FetchAnd, + ArrayOpCmd::Or => ArrayOpCmd::Or, + ArrayOpCmd::FetchOr => ArrayOpCmd::FetchOr, + ArrayOpCmd::Xor => ArrayOpCmd::Xor, + ArrayOpCmd::FetchXor => ArrayOpCmd::FetchXor, + ArrayOpCmd::Store => ArrayOpCmd::Store, + ArrayOpCmd::Load => ArrayOpCmd::Load, + ArrayOpCmd::Swap => ArrayOpCmd::Swap, + ArrayOpCmd::Put => ArrayOpCmd::Put, + ArrayOpCmd::Get => ArrayOpCmd::Get, + ArrayOpCmd::CompareExchange(old) => { let old_u8 = &old as *const T as *const u8; - let old_u8_vec = unsafe { std::slice::from_raw_parts(old_u8, std::mem::size_of::()).to_vec() }; - ArrayOpCmd2::CompareExchange(old_u8_vec) - }, - ArrayOpCmd2::CompareExchangeEps(old,eps) => { + let old_u8_vec = unsafe { + std::slice::from_raw_parts(old_u8, std::mem::size_of::()).to_vec() + }; + ArrayOpCmd::CompareExchange(old_u8_vec) + } + ArrayOpCmd::CompareExchangeEps(old, eps) => { let old_u8 = &old as *const T as *const u8; - let old_u8_vec = unsafe { std::slice::from_raw_parts(old_u8, std::mem::size_of::()).to_vec() }; + let old_u8_vec = unsafe { + std::slice::from_raw_parts(old_u8, std::mem::size_of::()).to_vec() + }; let eps_u8 = &eps as *const T as *const u8; - let eps_u8_vec = unsafe { std::slice::from_raw_parts(eps_u8, std::mem::size_of::()).to_vec() }; - ArrayOpCmd2::CompareExchangeEps(old_u8_vec,eps_u8_vec) - - } - ArrayOpCmd2::Shl => ArrayOpCmd2::Shl, - ArrayOpCmd2::FetchShl => ArrayOpCmd2::FetchShl, - ArrayOpCmd2::Shr => ArrayOpCmd2::Shr, - ArrayOpCmd2::FetchShr => ArrayOpCmd2::FetchShr, - + let eps_u8_vec = unsafe { + std::slice::from_raw_parts(eps_u8, std::mem::size_of::()).to_vec() + }; + ArrayOpCmd::CompareExchangeEps(old_u8_vec, eps_u8_vec) + } + ArrayOpCmd::Shl => ArrayOpCmd::Shl, + ArrayOpCmd::FetchShl => ArrayOpCmd::FetchShl, + ArrayOpCmd::Shr => ArrayOpCmd::Shr, + ArrayOpCmd::FetchShr => ArrayOpCmd::FetchShr, } } } -impl From>> for ArrayOpCmd2 { - fn from(cmd: ArrayOpCmd2>) -> Self { +impl From>> for ArrayOpCmd { + fn from(cmd: ArrayOpCmd>) -> Self { match cmd { - ArrayOpCmd2::Add => ArrayOpCmd2::Add, - ArrayOpCmd2::FetchAdd => ArrayOpCmd2::FetchAdd, - ArrayOpCmd2::Sub => ArrayOpCmd2::Sub, - ArrayOpCmd2::FetchSub => ArrayOpCmd2::FetchSub, - ArrayOpCmd2::Mul => ArrayOpCmd2::Mul, - ArrayOpCmd2::FetchMul => ArrayOpCmd2::FetchMul, - ArrayOpCmd2::Div => ArrayOpCmd2::Div, - ArrayOpCmd2::FetchDiv => ArrayOpCmd2::FetchDiv, - ArrayOpCmd2::Rem => ArrayOpCmd2::Rem, - ArrayOpCmd2::FetchRem => ArrayOpCmd2::FetchRem, - ArrayOpCmd2::And => ArrayOpCmd2::And, - ArrayOpCmd2::FetchAnd => ArrayOpCmd2::FetchAnd, - ArrayOpCmd2::Or => ArrayOpCmd2::Or, - ArrayOpCmd2::FetchOr => ArrayOpCmd2::FetchOr, - ArrayOpCmd2::Xor => ArrayOpCmd2::Xor, - ArrayOpCmd2::FetchXor => ArrayOpCmd2::FetchXor, - ArrayOpCmd2::Store => ArrayOpCmd2::Store, - ArrayOpCmd2::Load => ArrayOpCmd2::Load, - ArrayOpCmd2::Swap => ArrayOpCmd2::Swap, - ArrayOpCmd2::Put => ArrayOpCmd2::Put, - ArrayOpCmd2::Get => ArrayOpCmd2::Get, - ArrayOpCmd2::CompareExchange(old) => { - let old_t = unsafe { std::slice::from_raw_parts(old.as_ptr() as *const T, std::mem::size_of::()) }; - ArrayOpCmd2::CompareExchange(old_t[0]) - }, - ArrayOpCmd2::CompareExchangeEps(old,eps) => { - let old_t = unsafe { std::slice::from_raw_parts(old.as_ptr() as *const T, std::mem::size_of::()) }; - let eps_t = unsafe { std::slice::from_raw_parts(eps.as_ptr() as *const T, std::mem::size_of::()) }; - ArrayOpCmd2::CompareExchangeEps(old_t[0],eps_t[0]) - }, - ArrayOpCmd2::Shl => ArrayOpCmd2::Shl, - ArrayOpCmd2::FetchShl => ArrayOpCmd2::FetchShl, - ArrayOpCmd2::Shr => ArrayOpCmd2::Shr, - ArrayOpCmd2::FetchShr => ArrayOpCmd2::FetchShr, - } - } -} - -impl ArrayOpCmd { - #[tracing::instrument(skip_all)] - pub fn result_size(&self) -> usize { - match self { - ArrayOpCmd::CompareExchange(_) | ArrayOpCmd::CompareExchangeEps(_, _) => { - std::mem::size_of::() + 1 - } //plus one to indicate this requires a result (0 for okay, 1 for error) - ArrayOpCmd::FetchAdd - | ArrayOpCmd::FetchSub - | ArrayOpCmd::FetchMul - | ArrayOpCmd::FetchDiv - | ArrayOpCmd::FetchRem - | ArrayOpCmd::FetchAnd - | ArrayOpCmd::FetchOr - | ArrayOpCmd::FetchXor - | ArrayOpCmd::FetchShl - | ArrayOpCmd::FetchShr - | ArrayOpCmd::Load - | ArrayOpCmd::Swap - | ArrayOpCmd::Get => std::mem::size_of::(), //just return value, assume never fails - ArrayOpCmd::Add - | ArrayOpCmd::Sub - | ArrayOpCmd::Mul - | ArrayOpCmd::Div - | ArrayOpCmd::Rem - | ArrayOpCmd::And - | ArrayOpCmd::Or - | ArrayOpCmd::Xor - | ArrayOpCmd::Shl - | ArrayOpCmd::Shr - | ArrayOpCmd::Store - | ArrayOpCmd::Put => 0, //we dont return anything - } - } - - pub fn to_bytes(&self, buf: &mut [u8]) -> usize { - match self { - ArrayOpCmd::Add => { - buf[0] = 0; - 1 - } - ArrayOpCmd::FetchAdd => { - buf[0] = 1; - 1 - } - ArrayOpCmd::Sub => { - buf[0] = 2; - 1 - } - ArrayOpCmd::FetchSub => { - buf[0] = 3; - 1 - } - ArrayOpCmd::Mul => { - buf[0] = 4; - 1 - } - ArrayOpCmd::FetchMul => { - buf[0] = 5; - 1 - } - ArrayOpCmd::Div => { - buf[0] = 6; - 1 - } - ArrayOpCmd::FetchDiv => { - buf[0] = 7; - 1 - } - ArrayOpCmd::Rem => { - buf[0] = 8; - 1 - } - ArrayOpCmd::FetchRem => { - buf[0] = 9; - 1 - } - ArrayOpCmd::And => { - buf[0] = 10; - 1 - } - ArrayOpCmd::FetchAnd => { - buf[0] = 11; - 1 - } - ArrayOpCmd::Or => { - buf[0] = 12; - 1 - } - ArrayOpCmd::FetchOr => { - buf[0] = 13; - 1 - } - ArrayOpCmd::Xor => { - buf[0] = 14; - 1 - } - ArrayOpCmd::FetchXor => { - buf[0] = 15; - 1 - } - ArrayOpCmd::Store => { - buf[0] = 16; - 1 - } - ArrayOpCmd::Load => { - buf[0] = 17; - 1 - } - ArrayOpCmd::Swap => { - buf[0] = 18; - 1 - } - ArrayOpCmd::Put => { - buf[0] = 19; - 1 - } - ArrayOpCmd::Get => { - buf[0] = 20; - 1 - } - ArrayOpCmd::CompareExchange(val) => { - buf[0] = 21; - unsafe { - std::ptr::copy_nonoverlapping(val as *const T, buf[1..].as_ptr() as *mut T, 1); - } - 1 + std::mem::size_of::() - } - ArrayOpCmd::CompareExchangeEps(val, eps) => { - buf[0] = 22; - let t_size = std::mem::size_of::(); - unsafe { - std::ptr::copy_nonoverlapping(val as *const T, buf[1..].as_ptr() as *mut T, 1); - std::ptr::copy_nonoverlapping( - eps as *const T, - buf[(1 + t_size)..].as_ptr() as *mut T, - 1, - ); - } - 1 + 2 * std::mem::size_of::() - } - ArrayOpCmd::Shl => { - buf[0] = 23; - 1 - } - ArrayOpCmd::FetchShl => { - buf[0] = 24; - 1 - } - ArrayOpCmd::Shr => { - buf[0] = 25; - 1 - } - ArrayOpCmd::FetchShr => { - buf[0] = 26; - 1 - } - } - } - - pub fn num_bytes(&self) -> usize { - match self { - ArrayOpCmd::Add => 1, - ArrayOpCmd::FetchAdd => 1, - ArrayOpCmd::Sub => 1, - ArrayOpCmd::FetchSub => 1, - ArrayOpCmd::Mul => 1, - ArrayOpCmd::FetchMul => 1, - ArrayOpCmd::Div => 1, - ArrayOpCmd::FetchDiv => 1, - ArrayOpCmd::Rem => 1, - ArrayOpCmd::FetchRem => 1, - ArrayOpCmd::And => 1, - ArrayOpCmd::FetchAnd => 1, - ArrayOpCmd::Or => 1, - ArrayOpCmd::FetchOr => 1, - ArrayOpCmd::Xor => 1, - ArrayOpCmd::FetchXor => 1, - ArrayOpCmd::Store => 1, - ArrayOpCmd::Load => 1, - ArrayOpCmd::Swap => 1, - ArrayOpCmd::Put => 1, - ArrayOpCmd::Get => 1, - ArrayOpCmd::CompareExchange(_val) => 1 + std::mem::size_of::(), - ArrayOpCmd::CompareExchangeEps(_val, _eps) => 1 + 2 * std::mem::size_of::(), - ArrayOpCmd::Shl => 1, - ArrayOpCmd::FetchShl => 1, - ArrayOpCmd::Shr => 1, - ArrayOpCmd::FetchShr => 1, - } - } - - pub fn from_bytes(buf: &[u8]) -> (Self, usize) { - let variant = buf[0]; - match variant { - 0 => (ArrayOpCmd::Add, 1), - 1 => (ArrayOpCmd::FetchAdd, 1), - 2 => (ArrayOpCmd::Sub, 1), - 3 => (ArrayOpCmd::FetchSub, 1), - 4 => (ArrayOpCmd::Mul, 1), - 5 => (ArrayOpCmd::FetchMul, 1), - 6 => (ArrayOpCmd::Div, 1), - 7 => (ArrayOpCmd::FetchDiv, 1), - 8 => (ArrayOpCmd::Div, 1), - 9 => (ArrayOpCmd::FetchDiv, 1), - 10 => (ArrayOpCmd::And, 1), - 11 => (ArrayOpCmd::FetchAnd, 1), - 12 => (ArrayOpCmd::Or, 1), - 13 => (ArrayOpCmd::FetchOr, 1), - 14 => (ArrayOpCmd::Or, 1), - 15 => (ArrayOpCmd::FetchOr, 1), - 16 => (ArrayOpCmd::Store, 1), - 17 => (ArrayOpCmd::Load, 1), - 18 => (ArrayOpCmd::Swap, 1), - 19 => (ArrayOpCmd::Put, 1), - 20 => (ArrayOpCmd::Get, 1), - 21 => { - let val = unsafe { *(buf[1..].as_ptr() as *const T) }; - ( - ArrayOpCmd::CompareExchange(val), - 1 + std::mem::size_of::(), - ) - } - 22 => { - let t_size = std::mem::size_of::(); - let val = unsafe { *(buf[1..].as_ptr() as *const T) }; - let eps = unsafe { *(buf[(1 + t_size)..].as_ptr() as *const T) }; - (ArrayOpCmd::CompareExchangeEps(val, eps), 1 + 2 * t_size) - } - 23 => (ArrayOpCmd::Shl, 1), - 24 => (ArrayOpCmd::FetchShl, 1), - 25 => (ArrayOpCmd::Shr, 1), - 26 => (ArrayOpCmd::FetchShr, 1), - _ => { - panic!("unrecognized Array Op Type"); - } + ArrayOpCmd::Add => ArrayOpCmd::Add, + ArrayOpCmd::FetchAdd => ArrayOpCmd::FetchAdd, + ArrayOpCmd::Sub => ArrayOpCmd::Sub, + ArrayOpCmd::FetchSub => ArrayOpCmd::FetchSub, + ArrayOpCmd::Mul => ArrayOpCmd::Mul, + ArrayOpCmd::FetchMul => ArrayOpCmd::FetchMul, + ArrayOpCmd::Div => ArrayOpCmd::Div, + ArrayOpCmd::FetchDiv => ArrayOpCmd::FetchDiv, + ArrayOpCmd::Rem => ArrayOpCmd::Rem, + ArrayOpCmd::FetchRem => ArrayOpCmd::FetchRem, + ArrayOpCmd::And => ArrayOpCmd::And, + ArrayOpCmd::FetchAnd => ArrayOpCmd::FetchAnd, + ArrayOpCmd::Or => ArrayOpCmd::Or, + ArrayOpCmd::FetchOr => ArrayOpCmd::FetchOr, + ArrayOpCmd::Xor => ArrayOpCmd::Xor, + ArrayOpCmd::FetchXor => ArrayOpCmd::FetchXor, + ArrayOpCmd::Store => ArrayOpCmd::Store, + ArrayOpCmd::Load => ArrayOpCmd::Load, + ArrayOpCmd::Swap => ArrayOpCmd::Swap, + ArrayOpCmd::Put => ArrayOpCmd::Put, + ArrayOpCmd::Get => ArrayOpCmd::Get, + ArrayOpCmd::CompareExchange(old) => { + let old_t = unsafe { + std::slice::from_raw_parts(old.as_ptr() as *const T, std::mem::size_of::()) + }; + ArrayOpCmd::CompareExchange(old_t[0]) + } + ArrayOpCmd::CompareExchangeEps(old, eps) => { + let old_t = unsafe { + std::slice::from_raw_parts(old.as_ptr() as *const T, std::mem::size_of::()) + }; + let eps_t = unsafe { + std::slice::from_raw_parts(eps.as_ptr() as *const T, std::mem::size_of::()) + }; + ArrayOpCmd::CompareExchangeEps(old_t[0], eps_t[0]) + } + ArrayOpCmd::Shl => ArrayOpCmd::Shl, + ArrayOpCmd::FetchShl => ArrayOpCmd::FetchShl, + ArrayOpCmd::Shr => ArrayOpCmd::Shr, + ArrayOpCmd::FetchShr => ArrayOpCmd::FetchShr, } } } @@ -482,357 +208,345 @@ impl ArrayOpCmd { #[doc(hidden)] #[repr(C)] //required as we reinterpret as bytes #[lamellar_impl::AmLocalDataRT] -pub struct IdxVal{ - pub index: usize, +pub struct IdxVal { + pub index: I, pub val: T, } -impl IdxVal { +impl IdxVal { pub fn as_bytes(&self) -> &[u8] { - unsafe { std::slice::from_raw_parts(self as *const Self as *const u8, std::mem::size_of::()) } - } -} - -#[doc(hidden)] -#[derive(serde::Serialize, Clone, Debug)] -pub enum InputToValue<'a, T: Dist> { - OneToOne(usize, T), - OneToMany(usize, OpInputEnum<'a, T>), - ManyToOne(OpInputEnum<'a, usize>, T), - ManyToMany(OpInputEnum<'a, usize>, OpInputEnum<'a, T>), -} - -impl<'a, T: Dist> InputToValue<'a, T> { - #[tracing::instrument(skip_all)] - pub(crate) fn len(&self) -> usize { - match self { - InputToValue::OneToOne(_, _) => 1, - InputToValue::OneToMany(_, vals) => vals.len(), - InputToValue::ManyToOne(indices, _) => indices.len(), - InputToValue::ManyToMany(indices, _) => indices.len(), + unsafe { + std::slice::from_raw_parts( + self as *const Self as *const u8, + std::mem::size_of::(), + ) } } - // fn num_bytes(&self) -> usize{ - // match self{ - // InputToValue::OneToOne(_,_) => std::mem::size_of::<(usize,T)>(), - // InputToValue::OneToMany(_,vals) => std::mem::size_of::()+ vals.len() * std::mem::size_of::(), - // InputToValue::ManyToOne(indices,_) => indices.len() * std::mem::size_of::() + std::mem::size_of::(), - // InputToValue::ManyToMany(indices,vals) => indices.len() * std::mem::size_of::() + vals.len() * std::mem::size_of::(), - // } - // } - #[tracing::instrument(skip_all)] - pub(crate) fn to_pe_offsets( - self, - array: &UnsafeArray, - ) -> ( - HashMap>, - HashMap>, - usize, - ) { - let mut pe_offsets = HashMap::new(); - let mut req_ids = HashMap::new(); - match self { - InputToValue::OneToOne(index, value) => { - let (pe, local_index) = array - .pe_and_offset_for_global_index(index) - .expect("array index out of bounds"); - pe_offsets.insert(pe, InputToValue::OneToOne(local_index, value)); - req_ids.insert(pe, vec![0]); - (pe_offsets, req_ids, 1) - } - InputToValue::OneToMany(index, values) => { - let (pe, local_index) = array - .pe_and_offset_for_global_index(index) - .expect("array index out of bounds"); - let vals_len = values.len(); - req_ids.insert(pe, (0..vals_len).collect()); - pe_offsets.insert(pe, InputToValue::OneToMany(local_index, values)); - - (pe_offsets, req_ids, vals_len) - } - InputToValue::ManyToOne(indices, value) => { - let mut temp_pe_offsets = HashMap::new(); - let mut req_cnt = 0; - for index in indices.iter() { - let (pe, local_index) = array - .pe_and_offset_for_global_index(index) - .expect("array index out of bounds"); - temp_pe_offsets - .entry(pe) - .or_insert(vec![]) - .push(local_index); - req_ids.entry(pe).or_insert(vec![]).push(req_cnt); - req_cnt += 1; - } +} - for (pe, local_indices) in temp_pe_offsets { - pe_offsets.insert( - pe, - InputToValue::ManyToOne(OpInputEnum::Vec(local_indices), value), - ); - } +// #[doc(hidden)] +// #[derive(serde::Serialize, Clone, Debug)] +// pub enum InputToValue<'a, T: Dist> { +// OneToOne(usize, T), +// OneToMany(usize, OpInputEnum<'a, T>), +// ManyToOne(OpInputEnum<'a, usize>, T), +// ManyToMany(OpInputEnum<'a, usize>, OpInputEnum<'a, T>), +// } - (pe_offsets, req_ids, indices.len()) - } - InputToValue::ManyToMany(indices, values) => { - let mut temp_pe_offsets = HashMap::new(); - let mut req_cnt = 0; - for (index, val) in indices.iter().zip(values.iter()) { - let (pe, local_index) = array - .pe_and_offset_for_global_index(index) - .expect("array index out of bounds"); - let data = temp_pe_offsets.entry(pe).or_insert((vec![], vec![])); - data.0.push(local_index); - data.1.push(val); - req_ids.entry(pe).or_insert(vec![]).push(req_cnt); - req_cnt += 1; - } - for (pe, (local_indices, vals)) in temp_pe_offsets { - pe_offsets.insert( - pe, - InputToValue::ManyToMany( - OpInputEnum::Vec(local_indices), - OpInputEnum::Vec(vals), - ), - ); - } - (pe_offsets, req_ids, indices.len()) - } - } - } -} -impl<'a, T: Dist + serde::Serialize + serde::de::DeserializeOwned> InputToValue<'a, T> { - #[tracing::instrument(skip_all)] - pub fn as_op_am_input(&self) -> OpAmInputToValue { - match self { - InputToValue::OneToOne(index, value) => OpAmInputToValue::OneToOne(*index, *value), - InputToValue::OneToMany(index, values) => { - OpAmInputToValue::OneToMany(*index, values.iter().collect()) - } - InputToValue::ManyToOne(indices, value) => { - OpAmInputToValue::ManyToOne(indices.iter().collect(), *value) - } - InputToValue::ManyToMany(indices, values) => { - OpAmInputToValue::ManyToMany(indices.iter().collect(), values.iter().collect()) - } - } - } -} +// impl<'a, T: Dist> InputToValue<'a, T> { +// #[tracing::instrument(skip_all)] +// pub(crate) fn len(&self) -> usize { +// match self { +// InputToValue::OneToOne(_, _) => 1, +// InputToValue::OneToMany(_, vals) => vals.len(), +// InputToValue::ManyToOne(indices, _) => indices.len(), +// InputToValue::ManyToMany(indices, _) => indices.len(), +// } +// } +// // fn num_bytes(&self) -> usize{ +// // match self{ +// // InputToValue::OneToOne(_,_) => std::mem::size_of::<(usize,T)>(), +// // InputToValue::OneToMany(_,vals) => std::mem::size_of::()+ vals.len() * std::mem::size_of::(), +// // InputToValue::ManyToOne(indices,_) => indices.len() * std::mem::size_of::() + std::mem::size_of::(), +// // InputToValue::ManyToMany(indices,vals) => indices.len() * std::mem::size_of::() + vals.len() * std::mem::size_of::(), +// // } +// // } +// #[tracing::instrument(skip_all)] +// pub(crate) fn to_pe_offsets( +// self, +// array: &UnsafeArray, +// ) -> ( +// HashMap>, +// HashMap>, +// usize, +// ) { +// let mut pe_offsets = HashMap::new(); +// let mut req_ids = HashMap::new(); +// match self { +// InputToValue::OneToOne(index, value) => { +// let (pe, local_index) = array +// .pe_and_offset_for_global_index(index) +// .expect("array index out of bounds"); +// pe_offsets.insert(pe, InputToValue::OneToOne(local_index, value)); +// req_ids.insert(pe, vec![0]); +// (pe_offsets, req_ids, 1) +// } +// InputToValue::OneToMany(index, values) => { +// let (pe, local_index) = array +// .pe_and_offset_for_global_index(index) +// .expect("array index out of bounds"); +// let vals_len = values.len(); +// req_ids.insert(pe, (0..vals_len).collect()); +// pe_offsets.insert(pe, InputToValue::OneToMany(local_index, values)); + +// (pe_offsets, req_ids, vals_len) +// } +// InputToValue::ManyToOne(indices, value) => { +// let mut temp_pe_offsets = HashMap::new(); +// let mut req_cnt = 0; +// for index in indices.iter() { +// let (pe, local_index) = array +// .pe_and_offset_for_global_index(index) +// .expect("array index out of bounds"); +// temp_pe_offsets +// .entry(pe) +// .or_insert(vec![]) +// .push(local_index); +// req_ids.entry(pe).or_insert(vec![]).push(req_cnt); +// req_cnt += 1; +// } + +// for (pe, local_indices) in temp_pe_offsets { +// pe_offsets.insert( +// pe, +// InputToValue::ManyToOne(OpInputEnum::Vec(local_indices), value), +// ); +// } + +// (pe_offsets, req_ids, indices.len()) +// } +// InputToValue::ManyToMany(indices, values) => { +// let mut temp_pe_offsets = HashMap::new(); +// let mut req_cnt = 0; +// for (index, val) in indices.iter().zip(values.iter()) { +// let (pe, local_index) = array +// .pe_and_offset_for_global_index(index) +// .expect("array index out of bounds"); +// let data = temp_pe_offsets.entry(pe).or_insert((vec![], vec![])); +// data.0.push(local_index); +// data.1.push(val); +// req_ids.entry(pe).or_insert(vec![]).push(req_cnt); +// req_cnt += 1; +// } +// for (pe, (local_indices, vals)) in temp_pe_offsets { +// pe_offsets.insert( +// pe, +// InputToValue::ManyToMany( +// OpInputEnum::Vec(local_indices), +// OpInputEnum::Vec(vals), +// ), +// ); +// } +// (pe_offsets, req_ids, indices.len()) +// } +// } +// } +// } -impl OpAmInputToValue { - #[tracing::instrument(skip_all)] - pub fn len(&self) -> usize { - match self { - OpAmInputToValue::OneToOne(_, _) => 1, - OpAmInputToValue::OneToMany(_, vals) => vals.len(), - OpAmInputToValue::ManyToOne(indices, _) => indices.len(), - OpAmInputToValue::ManyToMany(indices, _) => indices.len(), - } - } -} +// impl OpAmInputToValue { +// #[tracing::instrument(skip_all)] +// pub fn len(&self) -> usize { +// match self { +// OpAmInputToValue::OneToOne(_, _) => 1, +// OpAmInputToValue::OneToMany(_, vals) => vals.len(), +// OpAmInputToValue::ManyToOne(indices, _) => indices.len(), +// OpAmInputToValue::ManyToMany(indices, _) => indices.len(), +// } +// } +// } -#[doc(hidden)] -#[derive(serde::Serialize, serde::Deserialize, Debug)] -#[serde(bound = "T: Dist + serde::Serialize + serde::de::DeserializeOwned")] -pub enum OpAmInputToValue { - OneToOne(usize, T), - OneToMany(usize, Vec), - ManyToOne(Vec, T), - ManyToMany(Vec, Vec), -} +// #[doc(hidden)] +// #[derive(serde::Serialize, serde::Deserialize, Debug)] +// #[serde(bound = "T: Dist + serde::Serialize + serde::de::DeserializeOwned")] +// pub enum OpAmInputToValue { +// OneToOne(usize, T), +// OneToMany(usize, Vec), +// ManyToOne(Vec, T), +// ManyToMany(Vec, Vec), +// } -impl OpAmInputToValue { - pub fn embed_vec(data: &Vec, buf: &mut [u8]) -> usize { - let mut size = 0; - // embed the data length - let len = data.len(); - unsafe { - std::ptr::copy_nonoverlapping( - &len as *const usize, - buf[size..].as_ptr() as *mut usize, - 1, - ) - }; - size += std::mem::size_of::(); - // ---- end data length ---- - // embed the data - unsafe { - std::ptr::copy_nonoverlapping(data.as_ptr(), buf[size..].as_ptr() as *mut U, len) - }; - size += len * std::mem::size_of::(); - // ---- end data ==== - size - } - pub fn embed_single_val(val: U, buf: &mut [u8]) -> usize { - // embed the val - unsafe { std::ptr::copy_nonoverlapping(&val as *const U, buf.as_ptr() as *mut U, 1) }; - std::mem::size_of::() - // ---- end val ---- - } - pub fn to_bytes(self, buf: &mut [u8]) -> usize { - match self { - OpAmInputToValue::OneToOne(idx, val) => { - // embed the enum type - let mut size = 0; - buf[size] = 0; - size += 1; - // ----- end type ----- - // embed the index - size += OpAmInputToValue::::embed_single_val(idx, &mut buf[size..]); - // ---- end index ---- - // embed the value - size += OpAmInputToValue::::embed_single_val(val, &mut buf[size..]); - // -- end value -- - size - } - OpAmInputToValue::OneToMany(idx, vals) => { - // embed the enum type - let mut size = 0; - buf[size] = 1; - size += 1; - // ----- end type ----- - // embed the index - size += OpAmInputToValue::::embed_single_val(idx, &mut buf[size..]); - // ---- end index ---- - // embed the vals - size += OpAmInputToValue::::embed_vec(&vals, &mut buf[size..]); - // ---- end vals ---- - size - } - OpAmInputToValue::ManyToOne(idxs, val) => { - // embed the enum type - let mut size = 0; - buf[size] = 2; - size += 1; - // ----- end type ----- - // embed the indices - size += OpAmInputToValue::::embed_vec(&idxs, &mut buf[size..]); - // ---- end indices ---- - // embed the val - size += OpAmInputToValue::::embed_single_val(val, &mut buf[size..]); - // ---- end val ---- - size - } - OpAmInputToValue::ManyToMany(idxs, vals) => { - // embed the enum type - let mut size = 0; - buf[size] = 3; - size += 1; - // ----- end type ----- - // embed the indices - size += OpAmInputToValue::::embed_vec(&idxs, &mut buf[size..]); - // ---- end indices ---- - // embed the vals - size += OpAmInputToValue::::embed_vec(&vals, &mut buf[size..]); - // ---- end vals ---- - size - } - } - } - pub fn vec_size(data: &Vec) -> usize { - let mut size = 0; - let len = data.len(); - size += std::mem::size_of::(); //the length - size += len * std::mem::size_of::(); - size - } - pub fn single_val_size(_val: U) -> usize { - std::mem::size_of::() - } - pub fn num_bytes(&self) -> usize { - match self { - OpAmInputToValue::OneToOne(idx, val) => { - let mut size = 0; - size += 1; - size += OpAmInputToValue::::single_val_size(idx); - size += OpAmInputToValue::::single_val_size(val); - size - } - OpAmInputToValue::OneToMany(idx, vals) => { - let mut size = 0; - size += 1; - size += OpAmInputToValue::::single_val_size(idx); - size += OpAmInputToValue::::vec_size(&vals); - size - } - OpAmInputToValue::ManyToOne(idxs, val) => { - let mut size = 0; - size += 1; - size += OpAmInputToValue::::vec_size(&idxs); - size += OpAmInputToValue::::single_val_size(val); - size - } - OpAmInputToValue::ManyToMany(idxs, vals) => { - let mut size = 0; - size += 1; - size += OpAmInputToValue::::vec_size(&idxs); - size += OpAmInputToValue::::vec_size(&vals); - size - } - } - } -} +// impl OpAmInputToValue { +// pub fn embed_vec(data: &Vec, buf: &mut [u8]) -> usize { +// let mut size = 0; +// // embed the data length +// let len = data.len(); +// unsafe { +// std::ptr::copy_nonoverlapping( +// &len as *const usize, +// buf[size..].as_ptr() as *mut usize, +// 1, +// ) +// }; +// size += std::mem::size_of::(); +// // ---- end data length ---- +// // embed the data +// unsafe { +// std::ptr::copy_nonoverlapping(data.as_ptr(), buf[size..].as_ptr() as *mut U, len) +// }; +// size += len * std::mem::size_of::(); +// // ---- end data ==== +// size +// } +// pub fn embed_single_val(val: U, buf: &mut [u8]) -> usize { +// // embed the val +// unsafe { std::ptr::copy_nonoverlapping(&val as *const U, buf.as_ptr() as *mut U, 1) }; +// std::mem::size_of::() +// // ---- end val ---- +// } +// pub fn to_bytes(self, buf: &mut [u8]) -> usize { +// match self { +// OpAmInputToValue::OneToOne(idx, val) => { +// // embed the enum type +// let mut size = 0; +// buf[size] = 0; +// size += 1; +// // ----- end type ----- +// // embed the index +// size += OpAmInputToValue::::embed_single_val(idx, &mut buf[size..]); +// // ---- end index ---- +// // embed the value +// size += OpAmInputToValue::::embed_single_val(val, &mut buf[size..]); +// // -- end value -- +// size +// } +// OpAmInputToValue::OneToMany(idx, vals) => { +// // embed the enum type +// let mut size = 0; +// buf[size] = 1; +// size += 1; +// // ----- end type ----- +// // embed the index +// size += OpAmInputToValue::::embed_single_val(idx, &mut buf[size..]); +// // ---- end index ---- +// // embed the vals +// size += OpAmInputToValue::::embed_vec(&vals, &mut buf[size..]); +// // ---- end vals ---- +// size +// } +// OpAmInputToValue::ManyToOne(idxs, val) => { +// // embed the enum type +// let mut size = 0; +// buf[size] = 2; +// size += 1; +// // ----- end type ----- +// // embed the indices +// size += OpAmInputToValue::::embed_vec(&idxs, &mut buf[size..]); +// // ---- end indices ---- +// // embed the val +// size += OpAmInputToValue::::embed_single_val(val, &mut buf[size..]); +// // ---- end val ---- +// size +// } +// OpAmInputToValue::ManyToMany(idxs, vals) => { +// // embed the enum type +// let mut size = 0; +// buf[size] = 3; +// size += 1; +// // ----- end type ----- +// // embed the indices +// size += OpAmInputToValue::::embed_vec(&idxs, &mut buf[size..]); +// // ---- end indices ---- +// // embed the vals +// size += OpAmInputToValue::::embed_vec(&vals, &mut buf[size..]); +// // ---- end vals ---- +// size +// } +// } +// } +// pub fn vec_size(data: &Vec) -> usize { +// let mut size = 0; +// let len = data.len(); +// size += std::mem::size_of::(); //the length +// size += len * std::mem::size_of::(); +// size +// } +// pub fn single_val_size(_val: U) -> usize { +// std::mem::size_of::() +// } +// pub fn num_bytes(&self) -> usize { +// match self { +// OpAmInputToValue::OneToOne(idx, val) => { +// let mut size = 0; +// size += 1; +// size += OpAmInputToValue::::single_val_size(idx); +// size += OpAmInputToValue::::single_val_size(val); +// size +// } +// OpAmInputToValue::OneToMany(idx, vals) => { +// let mut size = 0; +// size += 1; +// size += OpAmInputToValue::::single_val_size(idx); +// size += OpAmInputToValue::::vec_size(&vals); +// size +// } +// OpAmInputToValue::ManyToOne(idxs, val) => { +// let mut size = 0; +// size += 1; +// size += OpAmInputToValue::::vec_size(&idxs); +// size += OpAmInputToValue::::single_val_size(val); +// size +// } +// OpAmInputToValue::ManyToMany(idxs, vals) => { +// let mut size = 0; +// size += 1; +// size += OpAmInputToValue::::vec_size(&idxs); +// size += OpAmInputToValue::::vec_size(&vals); +// size +// } +// } +// } +// } -#[doc(hidden)] -pub enum RemoteOpAmInputToValue<'a, T: Dist> { - OneToOne(&'a usize, &'a T), - OneToMany(&'a usize, &'a [T]), - ManyToOne(&'a [usize], &'a T), - ManyToMany(&'a [usize], &'a [T]), -} +// #[doc(hidden)] +// pub enum RemoteOpAmInputToValue<'a, T: Dist> { +// OneToOne(&'a usize, &'a T), +// OneToMany(&'a usize, &'a [T]), +// ManyToOne(&'a [usize], &'a T), +// ManyToMany(&'a [usize], &'a [T]), +// } -impl<'a, T: Dist> RemoteOpAmInputToValue<'a, T> { - pub fn unpack_slice(buf: &[u8]) -> (&[U], usize) { - let mut size = 0; - let len = unsafe { &*(buf[size..].as_ptr() as *const usize) }; - size += std::mem::size_of::(); - let vals = unsafe { std::slice::from_raw_parts(buf[size..].as_ptr() as *const U, *len) }; - size += len * std::mem::size_of::(); - (vals, size) - } - pub fn from_bytes(buf: &'a [u8]) -> (Self, usize) { - let mut size = 0; - let variant = buf[size]; - size += 1; - match variant { - 0 => { - let idx = unsafe { &*(buf[size..].as_ptr() as *const usize) }; - size += std::mem::size_of::(); - let val = unsafe { &*(buf[size..].as_ptr() as *const T) }; - size += std::mem::size_of::(); - (RemoteOpAmInputToValue::OneToOne(idx, val), size) - } - 1 => { - let idx = unsafe { &*(buf[size..].as_ptr() as *const usize) }; - size += std::mem::size_of::(); - let (vals, vals_bytes) = RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); - size += vals_bytes; - (RemoteOpAmInputToValue::OneToMany(idx, vals), size) - } - 2 => { - let (idxs, idxs_bytes) = - RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); - size += idxs_bytes; - let val = unsafe { &*(buf[size..].as_ptr() as *const T) }; - size += std::mem::size_of::(); - - (RemoteOpAmInputToValue::ManyToOne(idxs, val), size) - } - 3 => { - let (idxs, idxs_bytes) = - RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); - size += idxs_bytes; - let (vals, vals_bytes) = RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); - size += vals_bytes; - - (RemoteOpAmInputToValue::ManyToMany(idxs, vals), size) - } - _ => { - panic!("unrecognized OpAmInputToValue Type"); - } - } - } -} +// impl<'a, T: Dist> RemoteOpAmInputToValue<'a, T> { +// pub fn unpack_slice(buf: &[u8]) -> (&[U], usize) { +// let mut size = 0; +// let len = unsafe { &*(buf[size..].as_ptr() as *const usize) }; +// size += std::mem::size_of::(); +// let vals = unsafe { std::slice::from_raw_parts(buf[size..].as_ptr() as *const U, *len) }; +// size += len * std::mem::size_of::(); +// (vals, size) +// } +// pub fn from_bytes(buf: &'a [u8]) -> (Self, usize) { +// let mut size = 0; +// let variant = buf[size]; +// size += 1; +// match variant { +// 0 => { +// let idx = unsafe { &*(buf[size..].as_ptr() as *const usize) }; +// size += std::mem::size_of::(); +// let val = unsafe { &*(buf[size..].as_ptr() as *const T) }; +// size += std::mem::size_of::(); +// (RemoteOpAmInputToValue::OneToOne(idx, val), size) +// } +// 1 => { +// let idx = unsafe { &*(buf[size..].as_ptr() as *const usize) }; +// size += std::mem::size_of::(); +// let (vals, vals_bytes) = RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); +// size += vals_bytes; +// (RemoteOpAmInputToValue::OneToMany(idx, vals), size) +// } +// 2 => { +// let (idxs, idxs_bytes) = +// RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); +// size += idxs_bytes; +// let val = unsafe { &*(buf[size..].as_ptr() as *const T) }; +// size += std::mem::size_of::(); + +// (RemoteOpAmInputToValue::ManyToOne(idxs, val), size) +// } +// 3 => { +// let (idxs, idxs_bytes) = +// RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); +// size += idxs_bytes; +// let (vals, vals_bytes) = RemoteOpAmInputToValue::::unpack_slice(&buf[size..]); +// size += vals_bytes; + +// (RemoteOpAmInputToValue::ManyToMany(idxs, vals), size) +// } +// _ => { +// panic!("unrecognized OpAmInputToValue Type"); +// } +// } +// } +// } #[doc(hidden)] #[derive(Clone, serde::Serialize, Debug)] @@ -914,55 +628,8 @@ impl<'a, T: Dist> OpInputEnum<'_, T> { } } - // #[tracing::instrument(skip_all)] - // pub(crate) fn as_vec_chunks(&self, chunk_size: usize) -> Vec> { - - // match self { - // OpInputEnum::Val(v) => vec![vec![*v]], - // OpInputEnum::Slice(s) => s.chunks(chunk_size).map(|chunk| chunk.to_vec()).collect(), - // OpInputEnum::Vec(v) => v.chunks(chunk_size).map(|chunk| chunk.to_vec()).collect(), - // OpInputEnum::NativeAtomicLocalData(a) => { - // let mut vecs = vec![]; - // let mut data = Vec::with_capacity(chunk_size); - // for elem in a.iter() { - // data.push(elem.load()); - // if data.len() == chunk_size { - // vecs.push(data); - // data = Vec::with_capacity(chunk_size); - // } - // } - // if !data.is_empty() { - // vecs.push(data); - // } - // vecs - // }, - // OpInputEnum::GenericAtomicLocalData(a) => { - // let mut vecs = vec![]; - // let mut data = Vec::with_capacity(chunk_size); - // for elem in a.iter() { - // data.push(elem.load()); - // if data.len() == chunk_size { - // vecs.push(data); - // data = Vec::with_capacity(chunk_size); - // } - // } - // if !data.is_empty() { - // vecs.push(data); - // } - // vecs - // }, - // OpInputEnum::LocalLockLocalData(a) => a.data.chunks(chunk_size).map(|chunk| chunk.to_vec()).collect(), - // OpInputEnum::GlobalLockLocalData(a) => a.data.chunks(chunk_size).map(|chunk| chunk.to_vec()).collect(), - // // OpInputEnum::MemoryRegion(mr) => *unsafe { mr.as_slice() } - // // .expect("memregion not local") - // // .first() - // // .expect("memregion is empty"), - // } - // } - // #[tracing::instrument(skip_all)] pub(crate) fn as_vec_chunks(&self, chunk_size: usize) -> Box> + '_> { - match self { OpInputEnum::Val(v) => Box::new(vec![vec![*v]].into_iter()), OpInputEnum::Slice(s) => Box::new(s.chunks(chunk_size).map(|chunk| chunk.to_vec())), @@ -970,35 +637,37 @@ impl<'a, T: Dist> OpInputEnum<'_, T> { OpInputEnum::NativeAtomicLocalData(a) => { let mut data = Vec::with_capacity(chunk_size); - Box::new(a.iter().enumerate().filter_map(move |(i,elem)| { + Box::new(a.iter().enumerate().filter_map(move |(i, elem)| { data.push(elem.load()); if data.len() == chunk_size || i == a.len() - 1 { let mut new_data = Vec::with_capacity(chunk_size); std::mem::swap(&mut data, &mut new_data); Some(new_data) - } - else { + } else { None } })) - }, + } OpInputEnum::GenericAtomicLocalData(a) => { let mut data = Vec::with_capacity(chunk_size); - Box::new(a.iter().enumerate().filter_map(move |(i,elem)| { + Box::new(a.iter().enumerate().filter_map(move |(i, elem)| { data.push(elem.load()); if data.len() == chunk_size || i == a.len() - 1 { let mut new_data = Vec::with_capacity(chunk_size); std::mem::swap(&mut data, &mut new_data); Some(new_data) - } - else { + } else { None } })) - }, - OpInputEnum::LocalLockLocalData(a) => Box::new(a.data.chunks(chunk_size).map(|chunk| chunk.to_vec())), - OpInputEnum::GlobalLockLocalData(a) => Box::new(a.data.chunks(chunk_size).map(|chunk| chunk.to_vec())), + } + OpInputEnum::LocalLockLocalData(a) => { + Box::new(a.data.chunks(chunk_size).map(|chunk| chunk.to_vec())) + } + OpInputEnum::GlobalLockLocalData(a) => { + Box::new(a.data.chunks(chunk_size).map(|chunk| chunk.to_vec())) + } // OpInputEnum::MemoryRegion(mr) => *unsafe { mr.as_slice() } // .expect("memregion not local") // .first() @@ -1091,20 +760,19 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a [T] { // Err(_) => 10000, //+ 1 to account for main thread // }; // let num = len / num_per_batch; - let num = if len <1000 { + let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } - }; + }; let num_per_batch = len / num; for i in 0..num { let temp = &self[(i * num_per_batch)..((i + 1) * num_per_batch)]; @@ -1119,7 +787,7 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a [T] { } } -impl<'a, T: Dist> OpInput<'a, T> for &'a mut dyn Iterator { +impl<'a, T: Dist> OpInput<'a, T> for &'a mut dyn Iterator { fn as_op_input(self) -> (Vec>, usize) { self.collect::>().as_op_input() } @@ -1156,14 +824,13 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a mut [T] { let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } @@ -1235,14 +902,13 @@ impl<'a, T: Dist> OpInput<'a, T> for Vec { // let num = len / num_per_batch; let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } @@ -1366,14 +1032,13 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a LocalLockLocalData<'_, T> { // let num = len / num_per_batch; let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } @@ -1390,9 +1055,7 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a LocalLockLocalData<'_, T> { let rem = len % num_per_batch; if rem > 0 { // let sub_array = self.sub_array((start_index+(num*num_per_batch))..(start_index+(num*num_per_batch) + rem)); - let sub_data = self - .clone() - .into_sub_data(num * num_per_batch, len); + let sub_data = self.clone().into_sub_data(num * num_per_batch, len); iters.push(OpInputEnum::LocalLockLocalData(sub_data)); } } @@ -1418,14 +1081,13 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a GlobalLockLocalData<'_, T> { // let num = len / num_per_batch; let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } @@ -1442,9 +1104,7 @@ impl<'a, T: Dist> OpInput<'a, T> for &'a GlobalLockLocalData<'_, T> { let rem = len % num_per_batch; if rem > 0 { // let sub_array = self.sub_array((start_index+(num*num_per_batch))..(start_index+(num*num_per_batch) + rem)); - let sub_data = self - .clone() - .into_sub_data(num * num_per_batch, len); + let sub_data = self.clone().into_sub_data(num * num_per_batch, len); iters.push(OpInputEnum::GlobalLockLocalData(sub_data)); } } @@ -1502,14 +1162,13 @@ impl<'a, T: Dist + ElementOps> OpInput<'a, T> for &GenericAtomicLocalData { // let num = len / num_per_batch; let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } @@ -1523,8 +1182,7 @@ impl<'a, T: Dist + ElementOps> OpInput<'a, T> for &GenericAtomicLocalData { let rem = len % num_per_batch; if rem > 0 { // let sub_array = self.sub_array((start_index+(num*num_per_batch))..(start_index+(num*num_per_batch) + rem)); - let sub_data = - local_data.sub_data(num * num_per_batch, len); + let sub_data = local_data.sub_data(num * num_per_batch, len); iters.push(OpInputEnum::GenericAtomicLocalData(sub_data)); } } @@ -1557,14 +1215,13 @@ impl<'a, T: Dist + ElementOps> OpInput<'a, T> for &NativeAtomicLocalData { // let num = len / num_per_batch; let num = if len < 1000 { 1 - } - else { - match std::env::var("LAMELLAR_BATCH_OP_THREADS"){ + } else { + match std::env::var("LAMELLAR_BATCH_OP_THREADS") { Ok(n) => n.parse::().unwrap(), Err(_) => { match std::env::var("LAMELLAR_THREADS") { - Ok(n) => std::cmp::max(1,(n.parse::().unwrap() + 1)/4), //+ 1 to account for main thread - Err(_) => 4, //+ 1 to account for main thread + Ok(n) => std::cmp::max(1, (n.parse::().unwrap() + 1) / 4), //+ 1 to account for main thread + Err(_) => 4, //+ 1 to account for main thread } } } @@ -1582,8 +1239,7 @@ impl<'a, T: Dist + ElementOps> OpInput<'a, T> for &NativeAtomicLocalData { let rem = len % num_per_batch; if rem > 0 { // let sub_array = self.sub_array((start_index+(num*num_per_batch))..(start_index+(num*num_per_batch) + rem)); - let sub_data = - local_data.sub_data(num * num_per_batch, len); + let sub_data = local_data.sub_data(num * num_per_batch, len); iters.push(OpInputEnum::NativeAtomicLocalData(sub_data)); } } diff --git a/src/array/operations/access.rs b/src/array/operations/access.rs index e253bed7..22d39585 100644 --- a/src/array/operations/access.rs +++ b/src/array/operations/access.rs @@ -90,8 +90,12 @@ pub trait AccessOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn store<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array() - .initiate_batch_op(val, index, ArrayOpCmd2::Store, self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Store, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [store][AccessOps::store] function, @@ -125,9 +129,12 @@ pub trait AccessOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin + Send>> { - // self.inner_array() - // .initiate_op(val, index, ArrayOpCmd::Store) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Store,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Store, + self.as_lamellar_byte_array(), + ) } /// This call swaps the supplied `val` into the element specified by `index`, returning the old value @@ -156,11 +163,13 @@ pub trait AccessOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn swap<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::Swap,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::Swap, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [swap][AccessOps::swap] function, @@ -195,8 +204,12 @@ pub trait AccessOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::Swap,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::Swap, + self.as_lamellar_byte_array(), + ) } } diff --git a/src/array/operations/arithmetic.rs b/src/array/operations/arithmetic.rs index 04b7c353..c6c86552 100644 --- a/src/array/operations/arithmetic.rs +++ b/src/array/operations/arithmetic.rs @@ -119,7 +119,12 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn add(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Add,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Add, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [add][ArithmeticOps::add] function, @@ -154,7 +159,12 @@ pub trait ArithmeticOps: private::LamellarArrayP val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Add) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Add,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Add, + self.as_lamellar_byte_array(), + ) } /// This call adds the supplied `val` into the element specified by `index`, returning the old value @@ -183,11 +193,13 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn fetch_add(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchAdd,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchAdd, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_add][ArithmeticOps::fetch_add] function, @@ -222,8 +234,12 @@ pub trait ArithmeticOps: private::LamellarArrayP index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchAdd,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchAdd, + self.as_lamellar_byte_array(), + ) } /// This call subtracts the supplied `val` from the element specified by `index` @@ -251,7 +267,12 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn sub<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Sub,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Sub, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [sub][ArithmeticOps::sub] function, @@ -286,7 +307,12 @@ pub trait ArithmeticOps: private::LamellarArrayP val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Sub) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Sub,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Sub, + self.as_lamellar_byte_array(), + ) } /// This call subtracts the supplied `val` from the element specified by `index`, returning the old value @@ -315,11 +341,13 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn fetch_sub<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchSub,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchSub, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_sub][ArithmeticOps::fetch_sub] function, @@ -354,8 +382,12 @@ pub trait ArithmeticOps: private::LamellarArrayP index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchSub,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchSub, + self.as_lamellar_byte_array(), + ) } /// This call multiplies the supplied `val` by the element specified by `index` and stores the result. @@ -383,7 +415,12 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn mul<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Mul,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Mul, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [mul][ArithmeticOps::mul] function, @@ -418,7 +455,12 @@ pub trait ArithmeticOps: private::LamellarArrayP val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Mul) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Mul,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Mul, + self.as_lamellar_byte_array(), + ) } /// This call multiplies the supplied `val` with the element specified by `index`, returning the old value @@ -447,11 +489,13 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn fetch_mul<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchMul,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchMul, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_mul][ArithmeticOps::fetch_mul] function, @@ -486,8 +530,12 @@ pub trait ArithmeticOps: private::LamellarArrayP index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchMul,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchMul, + self.as_lamellar_byte_array(), + ) } /// This call divides the element specified by `index` with the supplied `val` and stores the result @@ -515,7 +563,12 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn div<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Div,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Div, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [div][ArithmeticOps::div] function, @@ -550,7 +603,12 @@ pub trait ArithmeticOps: private::LamellarArrayP val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Div) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Div,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Div, + self.as_lamellar_byte_array(), + ) } /// This call divides the element specified by `index` with the supplied `val`, returning the old value @@ -579,11 +637,13 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn fetch_div<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchDiv,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchDiv, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_div][ArithmeticOps::fetch_div] function, @@ -618,8 +678,12 @@ pub trait ArithmeticOps: private::LamellarArrayP index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchDiv,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchDiv, + self.as_lamellar_byte_array(), + ) } /// This call divides the element specified by `index` with the supplied `val` and stores the result @@ -647,7 +711,12 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn rem<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Rem,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Rem, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [rem][ArithmeticOps::rem] function, @@ -682,7 +751,12 @@ pub trait ArithmeticOps: private::LamellarArrayP val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Rem) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Rem,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Rem, + self.as_lamellar_byte_array(), + ) } /// This call divides the element specified by `index` with the supplied `val`, returning the old value @@ -711,11 +785,13 @@ pub trait ArithmeticOps: private::LamellarArrayP ///``` #[tracing::instrument(skip_all)] fn fetch_rem<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchRem,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchRem, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_rem][ArithmeticOps::fetch_rem] function, @@ -750,8 +826,12 @@ pub trait ArithmeticOps: private::LamellarArrayP index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchRem,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchRem, + self.as_lamellar_byte_array(), + ) } } diff --git a/src/array/operations/bitwise.rs b/src/array/operations/bitwise.rs index 438e4555..e36aac5f 100644 --- a/src/array/operations/bitwise.rs +++ b/src/array/operations/bitwise.rs @@ -105,7 +105,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn bit_and<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::And,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::And, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [bit_and][BitWiseOps::bit_and] function, @@ -140,7 +145,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::And) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::And,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::And, + self.as_lamellar_byte_array(), + ) } /// This call performs a bitwise `and` with the element specified by `index` and the supplied `val`, returning the old value @@ -169,11 +179,13 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn fetch_bit_and<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchAnd,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchAnd, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_bit_and][BitWiseOps::fetch_bit_and] function, @@ -208,8 +220,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchAnd,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchAnd, + self.as_lamellar_byte_array(), + ) } /// This call performs a bitwise `or` with the element specified by `index` and the supplied `val`. @@ -237,7 +253,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn bit_or<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Or,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Or, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [bit_or][BitWiseOps::bit_or] function, @@ -272,7 +293,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Or) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Or,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Or, + self.as_lamellar_byte_array(), + ) } /// This call performs a bitwise `or` with the element specified by `index` and the supplied `val`, returning the old value @@ -301,11 +327,13 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn fetch_bit_or<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchOr,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchOr, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_bit_or][BitWiseOps::fetch_bit_or] function, @@ -340,8 +368,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchOr,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchOr, + self.as_lamellar_byte_array(), + ) } /// This call performs a bitwise `xor` with the element specified by `index` and the supplied `val`. @@ -369,7 +401,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn bit_xor<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Xor,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Xor, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [bit_xor][BitWiseOps::bit_xor] function, @@ -404,7 +441,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { val: impl OpInput<'a, T>, ) -> Pin + Send>> { // self.inner_array().initiate_op(val, index, ArrayOpCmd::Xor) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Xor,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Xor, + self.as_lamellar_byte_array(), + ) } /// This call performs a bitwise `xor` with the element specified by `index` and the supplied `val`, returning the old value @@ -433,11 +475,13 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn fetch_bit_xor<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchXor,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchXor, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_bit_xor][BitWiseOps::fetch_bit_xor] function, @@ -472,8 +516,12 @@ pub trait BitWiseOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchXor,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchXor, + self.as_lamellar_byte_array(), + ) } } diff --git a/src/array/operations/compare_exchange.rs b/src/array/operations/compare_exchange.rs index 6740bc52..b6877929 100644 --- a/src/array/operations/compare_exchange.rs +++ b/src/array/operations/compare_exchange.rs @@ -125,11 +125,13 @@ pub trait CompareExchangeOps: private::LamellarArrayPriv current: T, new: T, ) -> Pin> + Send>> { - let result = self.inner_array() - .initiate_batch_result_op_2(new, index, ArrayOpCmd2::CompareExchange(current), self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_result_op_2( + new, + index, + ArrayOpCmd::CompareExchange(current), + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [compare_exchange][CompareExchangeOps::compare_exchange] function, @@ -170,8 +172,8 @@ pub trait CompareExchangeOps: private::LamellarArrayPriv self.inner_array().initiate_batch_result_op_2( new, index, - ArrayOpCmd2::CompareExchange(current), - self.as_lamellar_byte_array() + ArrayOpCmd::CompareExchange(current), + self.as_lamellar_byte_array(), ) } } @@ -296,11 +298,10 @@ pub trait CompareExchangeEpsilonOps: let result = self.inner_array().initiate_batch_result_op_2( new, index, - ArrayOpCmd2::CompareExchangeEps(current, eps), - self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + ArrayOpCmd::CompareExchangeEps(current, eps), + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [compare_exchange_epsilon][CompareExchangeEpsilonOps::compare_exchange_epsilon] function, @@ -343,7 +344,7 @@ pub trait CompareExchangeEpsilonOps: self.inner_array().initiate_batch_result_op_2( new, index, - ArrayOpCmd2::CompareExchangeEps(current, eps), + ArrayOpCmd::CompareExchangeEps(current, eps), self.as_lamellar_byte_array(), ) } diff --git a/src/array/operations/read_only.rs b/src/array/operations/read_only.rs index c41a76b7..35154066 100644 --- a/src/array/operations/read_only.rs +++ b/src/array/operations/read_only.rs @@ -70,11 +70,14 @@ pub trait ReadOnlyOps: private::LamellarArrayPrivate { #[tracing::instrument(skip_all)] fn load<'a>(&self, index: usize) -> Pin + Send>> { let dummy_val = self.inner_array().dummy_val(); //we dont actually do anything with this except satisfy apis; - // let array = self.inner_array(); - let result = self.inner_array().initiate_batch_fetch_op_2(dummy_val, index, ArrayOpCmd2::Load, self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + // let array = self.inner_array(); + let result = self.inner_array().initiate_batch_fetch_op_2( + dummy_val, + index, + ArrayOpCmd::Load, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [load][ReadOnlyOps::load] function, @@ -111,7 +114,11 @@ pub trait ReadOnlyOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, ) -> Pin> + Send>> { let dummy_val = self.inner_array().dummy_val(); //we dont actually do anything with this except satisfy apis; - self.inner_array() - .initiate_batch_fetch_op_2(dummy_val, index, ArrayOpCmd2::Load,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + dummy_val, + index, + ArrayOpCmd::Load, + self.as_lamellar_byte_array(), + ) } } diff --git a/src/array/operations/shift.rs b/src/array/operations/shift.rs index 5ead858f..e5f39e64 100644 --- a/src/array/operations/shift.rs +++ b/src/array/operations/shift.rs @@ -72,7 +72,12 @@ pub trait ShiftOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn shl(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Shl,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Shl, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [shl][ShiftOps::shl] function, @@ -106,8 +111,13 @@ pub trait ShiftOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin + Send>> { - // self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Shl) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Shl,self.as_lamellar_byte_array()) + // self.inner_array().initiate_batch_op(val, index, ArrayOpCmd::Shl) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Shl, + self.as_lamellar_byte_array(), + ) } /// This call performs an in place left shift of `val` bits on the element specified by `index`, returning the old value @@ -136,11 +146,13 @@ pub trait ShiftOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn fetch_shl(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchShl,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchShl, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_shl][ShiftOps::fetch_shl] function, @@ -175,8 +187,12 @@ pub trait ShiftOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchShl,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchShl, + self.as_lamellar_byte_array(), + ) } /// This call performs an in place right shift of `val` bits on the element specified by `index`. @@ -204,7 +220,12 @@ pub trait ShiftOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn shr<'a>(&self, index: usize, val: T) -> Pin + Send>> { - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Shr,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Shr, + self.as_lamellar_byte_array(), + ) } /// This call performs a batched vesion of the [shl][ShiftOps::shl] function, @@ -238,8 +259,13 @@ pub trait ShiftOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin + Send>> { - // self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Shr) - self.inner_array().initiate_batch_op(val, index, ArrayOpCmd2::Shr,self.as_lamellar_byte_array()) + // self.inner_array().initiate_batch_op(val, index, ArrayOpCmd::Shr) + self.inner_array().initiate_batch_op( + val, + index, + ArrayOpCmd::Shr, + self.as_lamellar_byte_array(), + ) } /// This call performs an in place right shift of `val` bits on the element specified by `index`, returning the old value @@ -268,11 +294,13 @@ pub trait ShiftOps: private::LamellarArrayPrivate { ///``` #[tracing::instrument(skip_all)] fn fetch_shr<'a>(&self, index: usize, val: T) -> Pin + Send>> { - let result = self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchShr,self.as_lamellar_byte_array()); - Box::pin(async move{ - result.await[0] - }) + let result = self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchShr, + self.as_lamellar_byte_array(), + ); + Box::pin(async move { result.await[0] }) } /// This call performs a batched vesion of the [fetch_shr][ShiftOps::fetch_shr] function, @@ -307,8 +335,12 @@ pub trait ShiftOps: private::LamellarArrayPrivate { index: impl OpInput<'a, usize>, val: impl OpInput<'a, T>, ) -> Pin> + Send>> { - self.inner_array() - .initiate_batch_fetch_op_2(val, index, ArrayOpCmd2::FetchShr,self.as_lamellar_byte_array()) + self.inner_array().initiate_batch_fetch_op_2( + val, + index, + ArrayOpCmd::FetchShr, + self.as_lamellar_byte_array(), + ) } } diff --git a/src/array/read_only.rs b/src/array/read_only.rs index 4cd0ff22..c67218d5 100644 --- a/src/array/read_only.rs +++ b/src/array/read_only.rs @@ -10,8 +10,8 @@ use std::sync::Arc; type BufFn = fn(ReadOnlyByteArrayWeak) -> Arc; -// type MultiMultiFn = fn(ReadOnlyByteArray,ArrayOpCmd2,Vec) -> LamellarArcAm; -// type MultiSingleFn = fn(ReadOnlyByteArray,ArrayOpCmd2,Vec,Vec) -> LamellarArcAm; +// type MultiMultiFn = fn(ReadOnlyByteArray,ArrayOpCmd,Vec) -> LamellarArcAm; +// type MultiSingleFn = fn(ReadOnlyByteArray,ArrayOpCmd,Vec,Vec) -> LamellarArcAm; lazy_static! { pub(crate) static ref BUFOPS: HashMap = { @@ -103,7 +103,7 @@ impl ReadOnlyByteArrayWeak { /// Thanks to this gaurantee there is the potential for increased performance when ready remote data in this /// array type as locking or atomic access is uneeded. For certain operations like `get()` it is possible to /// directly do an RDMA transfer. -impl ReadOnlyArray { +impl ReadOnlyArray { #[doc(alias = "Collective")] /// Construct a new ReadOnlyArray with a length of `array_size` whose data will be layed out with the provided `distribution` on the PE's specified by the `team`. /// `team` is commonly a [LamellarWorld][crate::LamellarWorld] or [LamellarTeam][crate::LamellarTeam] (instance or reference). @@ -126,16 +126,7 @@ impl ReadOnlyArray { ) -> ReadOnlyArray { let array = UnsafeArray::new(team, array_size, distribution); array.block_on_outstanding(DarcMode::ReadOnlyArray); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let mut op_bufs = array.inner.data.op_buffers.write(); - let bytearray = ReadOnlyByteArray { - array: array.clone().into(), - }; - - for _pe in 0..array.num_pes() { - op_bufs.push(func(ReadOnlyByteArray::downgrade(&bytearray))); - } - } + ReadOnlyArray { array: array } } @@ -390,8 +381,8 @@ impl ReadOnlyArray { } } -impl TeamFrom<(Vec,Distribution)> for ReadOnlyArray { - fn team_from(input: (Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(Vec, Distribution)> for ReadOnlyArray { + fn team_from(input: (Vec, Distribution), team: &Pin>) -> Self { let (vals, distribution) = input; let input = (&vals, distribution); let array: UnsafeArray = input.team_into(team); @@ -399,8 +390,8 @@ impl TeamFrom<(Vec,Distribution)> for ReadOnlyArray { } } -impl TeamFrom<(&Vec,Distribution)> for ReadOnlyArray { - fn team_from(input: (&Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(&Vec, Distribution)> for ReadOnlyArray { + fn team_from(input: (&Vec, Distribution), team: &Pin>) -> Self { let array: UnsafeArray = input.team_into(team); array.into() } @@ -410,15 +401,7 @@ impl From> for ReadOnlyArray { fn from(array: UnsafeArray) -> Self { // println!("readonly from UnsafeArray"); array.block_on_outstanding(DarcMode::ReadOnlyArray); - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let bytearray = ReadOnlyByteArray { - array: array.clone().into(), - }; - let mut op_bufs = array.inner.data.op_buffers.write(); - for _pe in 0..array.inner.data.num_pes { - op_bufs.push(func(ReadOnlyByteArray::downgrade(&bytearray))); - } - } + ReadOnlyArray { array: array } } } @@ -474,11 +457,10 @@ impl From> for LamellarByteArray { } impl From for ReadOnlyArray { - fn from(array:LamellarByteArray) -> Self { + fn from(array: LamellarByteArray) -> Self { if let LamellarByteArray::ReadOnlyArray(array) = array { array.into() - } - else { + } else { panic!("Expected LamellarByteArray::ReadOnlyArray") } } diff --git a/src/array/read_only/iteration.rs b/src/array/read_only/iteration.rs index 48e96981..c43bf579 100644 --- a/src/array/read_only/iteration.rs +++ b/src/array/read_only/iteration.rs @@ -1,7 +1,9 @@ use crate::array::read_only::*; -use crate::array::iterator::distributed_iterator::{DistIter,DistIteratorLauncher,IndexedDistributedIterator,DistributedIterator}; -use crate::array::iterator::local_iterator::{LocalIter,LocalIteratorLauncher,IndexedLocalIterator,LocalIterator}; +use crate::array::iterator::distributed_iterator::{ + DistIter, DistIteratorLauncher, DistributedIterator, +}; +use crate::array::iterator::local_iterator::{LocalIter, LocalIterator, LocalIteratorLauncher}; use crate::array::iterator::one_sided_iterator::OneSidedIter; use crate::array::iterator::{LamellarArrayIterators, Schedule}; use crate::array::*; @@ -71,7 +73,7 @@ impl DistIteratorLauncher for ReadOnlyArray { F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - DistIteratorLauncher::for_each_async(&self.array,iter, op) + DistIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -91,16 +93,21 @@ impl DistIteratorLauncher for ReadOnlyArray { where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self,sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: DistributedIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } @@ -111,9 +118,9 @@ impl DistIteratorLauncher for ReadOnlyArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { DistIteratorLauncher::collect_async(&self.array, iter, d) } @@ -126,11 +133,11 @@ impl DistIteratorLauncher for ReadOnlyArray { ) -> Pin + Send>> where I: DistributedIterator, - I::Item: Future + Send + 'static, + I::Item: Future + Send + 'static, B: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - DistIteratorLauncher::collect_async_with_schedule(&self.array, sched,iter, d) + DistIteratorLauncher::collect_async_with_schedule(&self.array, sched, iter, d) } fn team(&self) -> Pin> { self.array.team_rt().clone() @@ -152,7 +159,7 @@ impl LocalIteratorLauncher for ReadOnlyArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each(&self.array,iter, op) + LocalIteratorLauncher::for_each(&self.array, iter, op) } fn for_each_with_schedule( &self, @@ -164,19 +171,15 @@ impl LocalIteratorLauncher for ReadOnlyArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - LocalIteratorLauncher::for_each_with_schedule(&self.array,sched, iter, op) + LocalIteratorLauncher::for_each_with_schedule(&self.array, sched, iter, op) } - fn for_each_async( - &self, - iter: &I, - op: F, - ) -> Pin + Send>> + fn for_each_async(&self, iter: &I, op: F) -> Pin + Send>> where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - LocalIteratorLauncher::for_each_async(&self.array,iter, op) + LocalIteratorLauncher::for_each_async(&self.array, iter, op) } fn for_each_async_with_schedule( &self, @@ -201,7 +204,12 @@ impl LocalIteratorLauncher for ReadOnlyArray { LocalIteratorLauncher::reduce(&self.array, iter, op) } - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, @@ -234,21 +242,26 @@ impl LocalIteratorLauncher for ReadOnlyArray { where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect(&self.array, iter, d) } - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { LocalIteratorLauncher::collect_with_schedule(&self.array, sched, iter, d) } -// fn collect_async( + // fn collect_async( // &self, // iter: &I, // d: Distribution, @@ -279,14 +292,18 @@ impl LocalIteratorLauncher for ReadOnlyArray { fn count(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count(&self.array, iter) } - - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { LocalIteratorLauncher::count_with_schedule(&self.array, sched, iter) } @@ -298,8 +315,12 @@ impl LocalIteratorLauncher for ReadOnlyArray { { LocalIteratorLauncher::sum(&self.array, iter) } - - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, diff --git a/src/array/unsafe.rs b/src/array/unsafe.rs index e84ce0a7..5708630c 100644 --- a/src/array/unsafe.rs +++ b/src/array/unsafe.rs @@ -4,7 +4,7 @@ pub(crate) mod operations; mod rdma; use crate::active_messaging::*; -use crate::array::r#unsafe::operations::BUFOPS; +// use crate::array::r#unsafe::operations::BUFOPS; use crate::array::*; use crate::array::{LamellarRead, LamellarWrite}; use crate::darc::{Darc, DarcMode, WeakDarc}; @@ -14,8 +14,6 @@ use crate::memregion::{Dist, MemoryRegion}; use crate::scheduler::SchedulerQueue; use crate::LamellarTaskGroup; use core::marker::PhantomData; -use parking_lot::RwLock; -use std::any::TypeId; use std::ops::Bound; use std::pin::Pin; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -29,7 +27,6 @@ pub(crate) struct UnsafeArrayData { pub(crate) task_group: Arc, pub(crate) my_pe: usize, pub(crate) num_pes: usize, - pub(crate) op_buffers: RwLock>>, req_cnt: Arc, } @@ -146,11 +143,11 @@ impl UnsafeArray { let task_group = LamellarTaskGroup::new(team.clone()); let my_pe = team.team_pe_id().unwrap(); let num_pes = team.num_pes(); - let full_array_size = std::cmp::max(array_size,num_pes); + let full_array_size = std::cmp::max(array_size, num_pes); let elem_per_pe = full_array_size as f64 / num_pes as f64; let per_pe_size = (full_array_size as f64 / num_pes as f64).ceil() as usize; //we do ceil to ensure enough space an each pe - // println!("new unsafe array {:?} {:?} {:?}", elem_per_pe, num_elems_local, per_pe_size); + // println!("new unsafe array {:?} {:?} {:?}", elem_per_pe, num_elems_local, per_pe_size); let rmr = MemoryRegion::new( per_pe_size * std::mem::size_of::(), team.lamellae.clone(), @@ -171,15 +168,10 @@ impl UnsafeArray { task_group: Arc::new(task_group), my_pe: my_pe, num_pes: num_pes, - // op_buffers: Mutex::new(HashMap::new()), - op_buffers: RwLock::new(Vec::new()), req_cnt: Arc::new(AtomicUsize::new(0)), }, crate::darc::DarcMode::UnsafeArray, - Some(|data: &mut UnsafeArrayData| { - // // println!("unsafe array data dropping2"); - data.op_buffers.write().clear(); - }), + None, ) .expect("trying to create array on non team member"); let array = UnsafeArray { @@ -189,7 +181,7 @@ impl UnsafeArray { // wait: wait, orig_elem_per_pe: elem_per_pe, elem_size: std::mem::size_of::(), - offset: 0, //relative to size of T + offset: 0, //relative to size of T size: full_array_size, //relative to size of T }, phantom: PhantomData, @@ -201,34 +193,17 @@ impl UnsafeArray { // println!("pe: {:?} {:?}",i,array.inner.num_elems_pe(i)); // } // array.inner.data.print(); - array.create_buffered_ops(); - if full_array_size != array_size{ + if full_array_size != array_size { println!("WARNING: Array size {array_size} is less than number of pes {full_array_size}, each PE will not contain data"); array.sub_array(0..array_size) - } - else{ + } else { array } // println!("after buffered ops"); // array.inner.data.print(); - } - - } impl UnsafeArray { - - // This is called when constructing a new array to setup the operation buffers - fn create_buffered_ops(&self) { - if let Some(func) = BUFOPS.get(&TypeId::of::()) { - let mut op_bufs = self.inner.data.op_buffers.write(); - let bytearray: UnsafeByteArray = self.clone().into(); - for _pe in 0..self.inner.data.num_pes { - op_bufs.push(func(UnsafeByteArray::downgrade(&bytearray))) - } - } - } - #[doc(alias("One-sided", "onesided"))] /// Change the distribution this array handle uses to index into the data of the array. /// @@ -392,8 +367,7 @@ impl UnsafeArray { self.wait_all(); // println!("block on outstanding"); // self.inner.data.print(); - self.inner.data.block_on_outstanding(mode, 0); //self.inner.data.op_buffers.read().len()); - self.inner.data.op_buffers.write().clear(); + self.inner.data.block_on_outstanding(mode, 0); // self.inner.data.print(); } @@ -592,18 +566,18 @@ impl UnsafeArray { // } // } -impl TeamFrom<(Vec,Distribution)> for UnsafeArray { - fn team_from(input: (Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(Vec, Distribution)> for UnsafeArray { + fn team_from(input: (Vec, Distribution), team: &Pin>) -> Self { let (vals, distribution) = input; let input = (&vals, distribution); input.team_into(team) } } -impl TeamFrom<(&Vec,Distribution)> for UnsafeArray { - fn team_from(input: (&Vec,Distribution), team: &Pin>) -> Self { +impl TeamFrom<(&Vec, Distribution)> for UnsafeArray { + fn team_from(input: (&Vec, Distribution), team: &Pin>) -> Self { let (local_vals, distribution) = input; - println!("local_vals len: {:?}",local_vals.len()); + println!("local_vals len: {:?}", local_vals.len()); team.barrier(); let local_sizes = UnsafeArray::::new(team.clone(), team.num_pes, Distribution::Block); @@ -622,16 +596,21 @@ impl TeamFrom<(&Vec,Distribution)> for UnsafeArray { .into_iter() .enumerate() .for_each(|(i, local_size)| { - println!("i: {:?} local_size{:?}", i,local_size); + println!("i: {:?} local_size{:?}", i, local_size); size += local_size; if i < my_pe { my_start += local_size; } }); } - println!("my_start {} size {} local_vals {}", my_start, size, local_vals.len()); + println!( + "my_start {} size {} local_vals {}", + my_start, + size, + local_vals.len() + ); let array = UnsafeArray::::new(team.clone(), size, distribution); - if local_vals.len() > 0 { + if local_vals.len() > 0 { unsafe { array.put(my_start, local_vals) }; } array.wait_all(); @@ -657,8 +636,6 @@ impl From> for UnsafeArray { // println!("unsafe from native atomic"); // let array = array.into_data(); array.array.block_on_outstanding(DarcMode::UnsafeArray); - array.array.inner.data.op_buffers.write().clear(); - array.array.create_buffered_ops(); array.array } } @@ -668,8 +645,6 @@ impl From> for UnsafeArray { // println!("unsafe from generic atomic"); // let array = array.into_data(); array.array.block_on_outstanding(DarcMode::UnsafeArray); - array.array.inner.data.op_buffers.write().clear(); - array.array.create_buffered_ops(); array.array } } @@ -678,8 +653,6 @@ impl From> for UnsafeArray { fn from(array: LocalLockArray) -> Self { // println!("unsafe from local lock atomic"); array.array.block_on_outstanding(DarcMode::UnsafeArray); - array.array.inner.data.op_buffers.write().clear(); - array.array.create_buffered_ops(); array.array } } @@ -688,28 +661,14 @@ impl From> for UnsafeArray { fn from(array: GlobalLockArray) -> Self { // println!("unsafe from global lock atomic"); array.array.block_on_outstanding(DarcMode::UnsafeArray); - array.array.inner.data.op_buffers.write().clear(); - array.array.create_buffered_ops(); array.array } } -// impl From> for UnsafeArray { -// fn from(array: LocalOnlyArray) -> Self { -// // println!("unsafe from local only"); -// array.array.block_on_outstanding(DarcMode::UnsafeArray); -// array.array.inner.data.op_buffers.write().clear(); -// array.array.create_buffered_ops(); -// array.array -// } -// } - impl From> for UnsafeArray { fn from(array: ReadOnlyArray) -> Self { // println!("unsafe from read only"); array.array.block_on_outstanding(DarcMode::UnsafeArray); - array.array.inner.data.op_buffers.write().clear(); - array.array.create_buffered_ops(); array.array } } @@ -740,7 +699,9 @@ impl From> for UnsafeByteArray { impl From<&UnsafeArray> for UnsafeByteArray { fn from(array: &UnsafeArray) -> Self { - UnsafeByteArray { inner: array.inner.clone() } + UnsafeByteArray { + inner: array.inner.clone(), + } } } @@ -751,11 +712,10 @@ impl From> for LamellarByteArray { } impl From for UnsafeArray { - fn from(array:LamellarByteArray) -> Self { + fn from(array: LamellarByteArray) -> Self { if let LamellarByteArray::UnsafeArray(array) = array { array.into() - } - else { + } else { panic!("Expected LamellarByteArray::UnsafeArray") } } diff --git a/src/array/unsafe/iteration/local.rs b/src/array/unsafe/iteration/local.rs index 90e46741..1ad136ee 100644 --- a/src/array/unsafe/iteration/local.rs +++ b/src/array/unsafe/iteration/local.rs @@ -1,18 +1,17 @@ use crate::active_messaging::SyncSend; use crate::array::iterator::local_iterator::*; use crate::array::r#unsafe::UnsafeArray; -use crate::array::{LamellarArray,Distribution,ArrayOps,TeamFrom}; +use crate::array::{ArrayOps, Distribution, TeamFrom}; -use crate::memregion::Dist; use crate::array::iterator::Schedule; use crate::lamellar_team::LamellarTeamRT; +use crate::memregion::Dist; use core::marker::PhantomData; use futures::Future; use std::pin::Pin; use std::sync::Arc; - impl LocalIteratorLauncher for UnsafeArray { fn local_global_index_from_local(&self, index: usize, chunk_size: usize) -> Option { // println!("global index cs:{:?}",chunk_size); @@ -49,12 +48,12 @@ impl LocalIteratorLauncher for UnsafeArray { I: LocalIterator + 'static, F: Fn(I::Item) + SyncSend + Clone + 'static, { - let for_each = ForEach{ + let for_each = ForEach { iter: iter.clone(), op, }; match sched { - Schedule::Static => self.sched_static(for_each ), + Schedule::Static => self.sched_static(for_each), Schedule::Dynamic => self.sched_dynamic(for_each), Schedule::Chunk(size) => self.sched_chunk(for_each, size), Schedule::Guided => self.sched_guided(for_each), @@ -62,11 +61,7 @@ impl LocalIteratorLauncher for UnsafeArray { } } - fn for_each_async( - &self, - iter: &I, - op: F, - ) -> Pin + Send>> + fn for_each_async(&self, iter: &I, op: F) -> Pin + Send>> where I: LocalIterator + 'static, F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, @@ -86,12 +81,12 @@ impl LocalIteratorLauncher for UnsafeArray { F: Fn(I::Item) -> Fut + SyncSend + Clone + 'static, Fut: Future + Send + 'static, { - let for_each = ForEachAsync{ + let for_each = ForEachAsync { iter: iter.clone(), op: op.clone(), }; match sched { - Schedule::Static => self.sched_static(for_each ), + Schedule::Static => self.sched_static(for_each), Schedule::Dynamic => self.sched_dynamic(for_each), Schedule::Chunk(size) => self.sched_chunk(for_each, size), Schedule::Guided => self.sched_guided(for_each), @@ -108,18 +103,23 @@ impl LocalIteratorLauncher for UnsafeArray { self.reduce_with_schedule(Schedule::Static, iter, op) } - fn reduce_with_schedule(&self, sched: Schedule, iter: &I, op: F) -> Pin> + Send>> + fn reduce_with_schedule( + &self, + sched: Schedule, + iter: &I, + op: F, + ) -> Pin> + Send>> where I: LocalIterator + 'static, I::Item: SyncSend, F: Fn(I::Item, I::Item) -> I::Item + SyncSend + Clone + 'static, { - let reduce = Reduce{ + let reduce = Reduce { iter: iter.clone(), op, }; match sched { - Schedule::Static => self.sched_static(reduce ), + Schedule::Static => self.sched_static(reduce), Schedule::Dynamic => self.sched_dynamic(reduce), Schedule::Chunk(size) => self.sched_chunk(reduce, size), Schedule::Guided => self.sched_guided(reduce), @@ -160,26 +160,31 @@ impl LocalIteratorLauncher for UnsafeArray { fn collect(&self, iter: &I, d: Distribution) -> Pin + Send>> where - I: LocalIterator + 'static, + I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - self.collect_with_schedule(Schedule::Static,iter,d) + self.collect_with_schedule(Schedule::Static, iter, d) } - fn collect_with_schedule(&self, sched: Schedule, iter: &I, d: Distribution) -> Pin + Send>> + fn collect_with_schedule( + &self, + sched: Schedule, + iter: &I, + d: Distribution, + ) -> Pin + Send>> where - I: LocalIterator + 'static, + I: LocalIterator + 'static, I::Item: Dist + ArrayOps, - A: for<'a> TeamFrom<(&'a Vec,Distribution)> + SyncSend + Clone + 'static, + A: for<'a> TeamFrom<(&'a Vec, Distribution)> + SyncSend + Clone + 'static, { - let collect = Collect{ + let collect = Collect { iter: iter.clone().monotonic(), distribution: d, _phantom: PhantomData, }; match sched { - Schedule::Static => self.sched_static(collect ), + Schedule::Static => self.sched_static(collect), Schedule::Dynamic => self.sched_dynamic(collect), Schedule::Chunk(size) => self.sched_chunk(collect, size), Schedule::Guided => self.sched_guided(collect), @@ -220,20 +225,22 @@ impl LocalIteratorLauncher for UnsafeArray { fn count(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static + I: LocalIterator + 'static, { - self.count_with_schedule(Schedule::Static,iter) + self.count_with_schedule(Schedule::Static, iter) } - fn count_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + fn count_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static, + I: LocalIterator + 'static, { - let count = Count{ - iter: iter.clone(), - }; + let count = Count { iter: iter.clone() }; match sched { - Schedule::Static => self.sched_static(count ), + Schedule::Static => self.sched_static(count), Schedule::Dynamic => self.sched_dynamic(count), Schedule::Chunk(size) => self.sched_chunk(count, size), Schedule::Guided => self.sched_guided(count), @@ -243,22 +250,24 @@ impl LocalIteratorLauncher for UnsafeArray { fn sum(&self, iter: &I) -> Pin + Send>> where - I: LocalIterator + 'static, + I: LocalIterator + 'static, I::Item: SyncSend + std::iter::Sum, { - self.sum_with_schedule(Schedule::Static,iter) + self.sum_with_schedule(Schedule::Static, iter) } - fn sum_with_schedule(&self, sched: Schedule, iter: &I) -> Pin + Send>> + fn sum_with_schedule( + &self, + sched: Schedule, + iter: &I, + ) -> Pin + Send>> where - I: LocalIterator + 'static, - I::Item: SyncSend + std::iter::Sum, + I: LocalIterator + 'static, + I::Item: SyncSend + std::iter::Sum, { - let sum = Sum{ - iter: iter.clone(), - }; + let sum = Sum { iter: iter.clone() }; match sched { - Schedule::Static => self.sched_static(sum ), + Schedule::Static => self.sched_static(sum), Schedule::Dynamic => self.sched_dynamic(sum), Schedule::Chunk(size) => self.sched_chunk(sum, size), Schedule::Guided => self.sched_guided(sum), diff --git a/src/array/unsafe/operations.rs b/src/array/unsafe/operations.rs index 28dd7946..f5be17ed 100644 --- a/src/array/unsafe/operations.rs +++ b/src/array/unsafe/operations.rs @@ -2,29 +2,20 @@ use crate::active_messaging::*; use crate::array::operations::*; use crate::array::r#unsafe::*; use crate::array::*; -use crate::lamellar_request::LamellarRequest; // use crate::memregion::Dist; use std::any::TypeId; use std::collections::HashMap; // use itertools::Itertools; -type BufFn = fn(UnsafeByteArrayWeak) -> Arc; - -type MultiValMultiIdxFn = fn(LamellarByteArray,ArrayOpCmd2>,Vec) -> LamellarArcAm; -type SingleValMultiIdxFn = fn(LamellarByteArray,ArrayOpCmd2>,Vec,Vec) -> LamellarArcAm; -type MultiValSingleIdxFn = fn(LamellarByteArray,ArrayOpCmd2>,Vec,usize) -> LamellarArcAm; - +type MultiValMultiIdxFn = fn(LamellarByteArray, ArrayOpCmd>, Vec, u8) -> LamellarArcAm; +type SingleValMultiIdxFn = + fn(LamellarByteArray, ArrayOpCmd>, Vec, Vec, u8) -> LamellarArcAm; +type MultiValSingleIdxFn = + fn(LamellarByteArray, ArrayOpCmd>, Vec, usize) -> LamellarArcAm; lazy_static! { - pub(crate) static ref BUFOPS: HashMap = { - let mut map = HashMap::new(); - for op in crate::inventory::iter:: { - map.insert(op.id.clone(), op.op); - } - map - }; - + pub(crate) static ref MULTI_VAL_MULTI_IDX_OPS: HashMap<(TypeId,TypeId,BatchReturnType), MultiValMultiIdxFn> = { let mut map = HashMap::new(); for op in crate::inventory::iter:: { @@ -49,51 +40,73 @@ lazy_static! { }; } -#[doc(hidden)] -pub struct UnsafeArrayOpBuf { - pub id: TypeId, - pub op: BufFn, +#[derive(Debug,Copy,Clone)] +enum IndexSize { + U8, + U16, + U32, + U64, + Usize, +} + +impl From for IndexSize { + fn from(size: usize) -> Self { + if size <= u8::MAX as usize { + IndexSize::U8 + } else if size <= u16::MAX as usize { + IndexSize::U16 + } else if size <= u32::MAX as usize { + IndexSize::U32 + } else if size <= u64::MAX as usize { + IndexSize::U64 + } else { + IndexSize::Usize + } + } } +impl IndexSize { + fn len(&self) -> usize { + match self { + IndexSize::U8 => 1, + IndexSize::U16 => 2, + IndexSize::U32 => 4, + IndexSize::U64 => 8, + IndexSize::Usize => 8, + } + } + fn as_bytes(&self, val: &usize) -> &[u8] { + match self { + IndexSize::U8 => unsafe { std::slice::from_raw_parts(val as *const usize as *const u8, 1) }, + IndexSize::U16 => unsafe { std::slice::from_raw_parts(val as *const usize as *const u8, 2) }, + IndexSize::U32 => unsafe { std::slice::from_raw_parts(val as *const usize as *const u8, 4) }, + IndexSize::U64 => unsafe { std::slice::from_raw_parts(val as *const usize as *const u8, 8) }, + IndexSize::Usize => unsafe { std::slice::from_raw_parts(val as *const usize as *const u8, 8) }, + } + } +} #[doc(hidden)] pub struct MultiValMultiIdxOps { - pub id: (TypeId,TypeId,BatchReturnType), + pub id: (TypeId, TypeId, BatchReturnType), pub op: MultiValMultiIdxFn, } - #[doc(hidden)] pub struct SingleValMultiIdxOps { - pub id: (TypeId,TypeId,BatchReturnType), + pub id: (TypeId, TypeId, BatchReturnType), pub op: SingleValMultiIdxFn, } #[doc(hidden)] pub struct MultiValSingleIdxOps { - pub id: (TypeId,TypeId,BatchReturnType), + pub id: (TypeId, TypeId, BatchReturnType), pub op: MultiValSingleIdxFn, } -crate::inventory::collect!(UnsafeArrayOpBuf); - crate::inventory::collect!(MultiValMultiIdxOps); crate::inventory::collect!(SingleValMultiIdxOps); crate::inventory::collect!(MultiValSingleIdxOps); -#[derive(Debug)] -pub(crate) enum BufOpsRequest { - NoFetch(Box), - Fetch(Box>), - Result(Box>), -} - -#[derive(Debug, Clone, Copy)] -pub(crate) enum OpReturnType { - None, - Fetch, - Result, -} - impl UnsafeArray { pub(crate) fn dummy_val(&self) -> T { let slice = self.inner.data.mem_region.as_slice().unwrap(); @@ -106,334 +119,64 @@ impl UnsafeArray { } #[tracing::instrument(skip_all)] - fn initiate_op_task<'a>( - &self, - ret_type: OpReturnType, - op: ArrayOpCmd, - input: InputToValue, - submit_cnt: usize, - req_handles: Arc>>>, - ) { - // println!("initiate_op_task"); - let (pe_offsets, req_ids, req_cnt) = input.to_pe_offsets(self); //HashMap> - - let res_offsets_map = OpReqOffsets::new(); - let res_map = OpResults::new(); - let mut complete_cnt = Vec::new(); - // println!("pe_offsets size {:?}",pe_offsets.len()); - - // println!("req_cnt: {:?}",req_cnt); - let req = pe_offsets - .iter() - .map(|(pe, op_data)| { - // println!("pe: {:?} op_len {:?}",pe,op_data.len()); - let pe = *pe; - let mut stall_mark = self - .inner - .data - .req_cnt - .fetch_add(op_data.len(), Ordering::SeqCst); - // println!("added to req_cnt: {} {}",stall_mark+op_data.len(),op_data.len()); - let buf_op = self.inner.data.op_buffers.read()[pe].clone(); - - let (first, complete, res_offsets) = match ret_type { - OpReturnType::Fetch | OpReturnType::Result => buf_op.add_fetch_ops( - pe, - &op as *const ArrayOpCmd as *const u8, - op_data as *const InputToValue as *const u8, - &req_ids[&pe], - res_map.clone(), - self.inner.data.team.clone(), - ), - OpReturnType::None => { - let (first, complete) = buf_op.add_ops( - &op as *const ArrayOpCmd as *const u8, - op_data as *const InputToValue as *const u8, - self.inner.data.team.clone(), - ); - (first, complete, None) - } - }; - if let Some(res_offsets) = res_offsets { - res_offsets_map.insert(pe, res_offsets); - } - if first { - // let res_map = res_map.clone(); - let array = self.clone(); - let _team_cnt = self - .inner - .data - .team - .team_counters - .outstanding_reqs - .fetch_add(1, Ordering::SeqCst); // we need to tell the world we have a request pending - // println!("updated team cnt: {}",_team_cnt +1); - self.inner.data.team.scheduler.submit_task(async move { - // println!("starting"); - let mut wait_cnt = 0; - while wait_cnt < 1000 - && array.inner.data.req_cnt.load(Ordering::Relaxed) - * std::mem::size_of::<(usize, T)>() - < 100000000 - { - while stall_mark != array.inner.data.req_cnt.load(Ordering::Relaxed) { - stall_mark = array.inner.data.req_cnt.load(Ordering::Relaxed); - async_std::task::yield_now().await; - } - wait_cnt += 1; - async_std::task::yield_now().await; - } - - let (ams, len, complete, results) = - buf_op.into_arc_am(pe, array.sub_array_range()); - // println!("pe{:?} ams: {:?} len{:?}",pe,ams.len(),len); - if len > 0 { - let mut res = Vec::new(); - for am in ams { - res.push(array.inner.data.team.exec_arc_am_pe::>( - pe, - am, - Some(array.inner.data.array_counters.clone()), - )); - } - - let mut full_results: Vec = Vec::new(); - for r in res { - // println!("submitted indirectly {:?} ",len); - let results_u8: Vec = r.into_future().await; - // println!("returned_u8 {:?}",results_u8); - - full_results.extend(results_u8); - } - - // println!("{:?} {:?}",full_results.len(),full_results); - std::mem::swap(&mut full_results, &mut results.lock()); - // println!("inserted results {:}",pe); - // res_map.insert(pe,full_results); - // println!("results {:?}",res_map); - // println!("done!"); - let _cnt1 = array - .inner - .data - .team - .team_counters - .outstanding_reqs - .fetch_sub(1, Ordering::SeqCst); //remove our pending req now that it has actually been submitted; - // println!("updated team cnt: {}",_cnt1 -1); - let _cnt2 = array.inner.data.req_cnt.fetch_sub(len, Ordering::SeqCst); - // println!("removed frm req_cnt: {} {}",_cnt2-len,len); - complete.store(true, Ordering::Relaxed); - - // println!("indirect cnts {:?}->{:?} {:?}->{:?} -- {:?}",cnt1,cnt1-1,cnt2,cnt2-len,len); - } else { - // println!("here {:?} {:?} ",ams.len(),len); - // complete.store(true, Ordering::Relaxed); - let _team_cnt = array - .inner - .data - .team - .team_counters - .outstanding_reqs - .fetch_sub(1, Ordering::SeqCst); - // println!("updated team cnt: {} but not sure I should be here",_team_cnt -1); - complete.store(true, Ordering::Relaxed); - } - }); - } - complete_cnt.push(complete.clone()); - // match results{ - // Some(results) =>{ - match ret_type { - OpReturnType::Fetch => { - BufOpsRequest::Fetch(Box::new(ArrayOpFetchHandleInner { - indices: res_offsets_map.clone(), - complete: complete_cnt.clone(), - results: res_map.clone(), - req_cnt: req_cnt, - scheduler: self.inner.data.team.scheduler.clone(), - _phantom: PhantomData, - })) - } - OpReturnType::Result => { - BufOpsRequest::Result(Box::new(ArrayOpResultHandleInner { - indices: res_offsets_map.clone(), - complete: complete_cnt.clone(), - results: res_map.clone(), - req_cnt: req_cnt, - scheduler: self.inner.data.team.scheduler.clone(), - _phantom: PhantomData, - })) - } - OpReturnType::None => BufOpsRequest::NoFetch(Box::new(ArrayOpHandleInner { - complete: complete_cnt.clone(), - scheduler: self.inner.data.team.scheduler.clone(), - })), - } - // } - }) - .last() - .unwrap(); - // submit_cnt.fetch_sub(1, Ordering::SeqCst); - // req - req_handles.lock().insert(submit_cnt, req); - } - - #[tracing::instrument(skip_all)] - pub(crate) fn inner_initiate_op<'a>( + pub(crate) fn initiate_batch_op<'a>( &self, val: impl OpInput<'a, T>, index: impl OpInput<'a, usize>, op: ArrayOpCmd, - ret_type: OpReturnType, - ) -> Arc>>> { - let (mut indices, i_len) = index.as_op_input(); //(Vec>, usize); - let (mut vals, v_len) = val.as_op_input(); - let mut i_v_iters = vec![]; - let req_handles = Arc::new(Mutex::new(HashMap::new())); - // println!("i_len {i_len} indices len {} v_len {v_len} vals len {}",indices.len(),vals.len()); - if v_len > 0 && i_len > 0 { - if v_len == 1 && i_len > 1 { - // println!("here 0"); - let val = vals[0].first(); - for i in indices.drain(..) { - i_v_iters.push(InputToValue::ManyToOne(i, val)); - } - } else if v_len > 1 && i_len == 1 { - // println!("here 1"); - let idx = indices[0].first(); - for v in vals.drain(..) { - i_v_iters.push(InputToValue::OneToMany(idx, v)); - } - } else if i_len == v_len { - if i_len == 1 { - // println!("here 2"); - i_v_iters.push(InputToValue::OneToOne(indices[0].first(), vals[0].first())); - } else { - // println!("here 3"); - for (i, v) in indices.iter().zip(vals.iter()) { - i_v_iters.push(InputToValue::ManyToMany(i.clone(), v.clone())); - } - } - } else { - panic!( - "not sure this case can exist!! indices len {:?} vals len {:?}", - i_len, v_len - ); - } - - // println!("i_v_iters len {:?}", i_v_iters.len()); - - let num_sub_reqs = i_v_iters.len(); //Arc::new(AtomicUsize::new(i_v_iters.len())); - let mut submit_cnt = num_sub_reqs - 1; + byte_array: LamellarByteArray, + ) -> Pin + Send>> { + let (indices, i_len) = index.as_op_input(); + let (vals, v_len) = val.as_op_input(); - while i_v_iters.len() > 1 { - // let submit_cnt = submit_cnt.clone(); - let req_handles = req_handles.clone(); - let array = self.clone(); - let input = i_v_iters.pop().unwrap(); - self.inner.data.team.scheduler.submit_task(async move { - array.initiate_op_task(ret_type, op, input, submit_cnt, req_handles); - }); - submit_cnt -= 1; - } - self.initiate_op_task( - ret_type, + let max_local_size = (0..self.num_pes()) + .map(|pe| self.inner.num_elems_pe(pe)) + .max() + .unwrap(); + let index_size = IndexSize::from(max_local_size); + + let res: Pin)>> + Send>> = if v_len == 1 + && i_len == 1 + { + //one to one + self.single_val_single_index::<()>( + byte_array, + vals[0].first(), + indices[0].first(), op, - i_v_iters.pop().unwrap(), - submit_cnt, - req_handles.clone(), - ); - // req_handles.lock().insert(0,req); - while req_handles.lock().len() < num_sub_reqs { - std::thread::yield_now(); - } - // println!("submit_cnt {:?} {:?}",req_handles.lock().len(),num_sub_reqs); - req_handles + BatchReturnType::None, + ) + } else if v_len > 1 && i_len == 1 { + //many vals one index + self.multi_val_one_index::<()>( + byte_array, + vals, + indices[0].first(), + op, + BatchReturnType::None, + index_size, + ) + } else if v_len == 1 && i_len > 1 { + //one val many indices + self.one_val_multi_indices::<()>( + byte_array, + vals[0].first(), + indices, + op, + BatchReturnType::None, + index_size, + ) + } else if v_len > 1 && i_len > 1 { + //many vals many indices + self.multi_val_multi_index::<()>(byte_array, vals, indices, op, BatchReturnType::None,index_size,) } else { - // println!("im not sure i can ever be here"); - req_handles.lock().insert( - 0, - match ret_type { - OpReturnType::Fetch => { - BufOpsRequest::Fetch(Box::new(ArrayOpFetchHandleInner { - indices: OpReqOffsets::new(), - complete: Vec::new(), - results: OpResults::new(), - req_cnt: 0, - scheduler: self.inner.data.team.scheduler.clone(), - _phantom: PhantomData, - })) - } - OpReturnType::Result => { - BufOpsRequest::Result(Box::new(ArrayOpResultHandleInner { - indices: OpReqOffsets::new(), - complete: Vec::new(), - results: OpResults::new(), - req_cnt: 0, - scheduler: self.inner.data.team.scheduler.clone(), - _phantom: PhantomData, - })) - } - OpReturnType::None => BufOpsRequest::NoFetch(Box::new(ArrayOpHandleInner { - complete: Vec::new(), - scheduler: self.inner.data.team.scheduler.clone(), - })), - }, - ); - req_handles - } - } - - #[tracing::instrument(skip_all)] - pub(crate) fn initiate_op<'a>( - &self, - val: impl OpInput<'a, T>, - index: impl OpInput<'a, usize>, - op: ArrayOpCmd, - ) -> Pin + Send>> { - let req_handles_mutex = self.inner_initiate_op(val, index, op, OpReturnType::None); - let mut req_handles = req_handles_mutex.lock(); - let mut reqs = vec![]; - for i in 0..req_handles.len() { - match req_handles.remove(&i).unwrap() { - BufOpsRequest::NoFetch(req) => reqs.push(req), - BufOpsRequest::Fetch(_) => { - panic!("trying to return a fetch request for not fetch operations") - } - BufOpsRequest::Result(_) => { - panic!("trying to return a result request for not fetch operations") - } - } - } - Box::new(ArrayOpHandle { reqs }).into_future() - } - - #[tracing::instrument(skip_all)] - pub(crate) fn initiate_batch_op<'a>( - &self, - val: impl OpInput<'a, T>, - index: impl OpInput<'a, usize>, - op: ArrayOpCmd2, - byte_array: LamellarByteArray - ) -> Pin + Send>> { - let (mut indices, i_len) = index.as_op_input(); - let (mut vals, v_len) = val.as_op_input(); - let res: Pin)>> + Send>> = if v_len == 1 && i_len == 1 { //one to one - self.single_val_single_index::<()>(byte_array, vals[0].first(), indices[0].first(), op, BatchReturnType::None) - } - else if v_len > 1 && i_len == 1 { //many vals one index - self.multi_val_one_index::<()>(byte_array, vals, indices[0].first(), op, BatchReturnType::None) - } - else if v_len == 1 && i_len > 1 { //one val many indices - self.one_val_multi_indices::<()>(byte_array, vals[0].first(), indices, op, BatchReturnType::None) - } - else if v_len > 1 && i_len > 1 { //many vals many indices - self.multi_val_multi_index::<()>(byte_array, vals, indices, op, BatchReturnType::None) - } - else{ //no vals no indices + //no vals no indices Box::pin(async { Vec::new() }) }; - Box::pin(async { res.await; () }) + Box::pin(async { + res.await; + () + }) } #[tracing::instrument(skip_all)] @@ -441,37 +184,72 @@ impl UnsafeArray { &self, val: impl OpInput<'a, T>, index: impl OpInput<'a, usize>, - op: ArrayOpCmd2, - byte_array: LamellarByteArray + op: ArrayOpCmd, + byte_array: LamellarByteArray, ) -> Pin> + Send>> { // println!("here in batch fetch op 2"); - let (mut indices, i_len) = index.as_op_input(); - let (mut vals, v_len) = val.as_op_input(); + let (indices, i_len) = index.as_op_input(); + let (vals, v_len) = val.as_op_input(); + let max_local_size = (0..self.num_pes()) + .map(|pe| self.inner.num_elems_pe(pe)) + .max() + .unwrap(); + let index_size = IndexSize::from(max_local_size); // println!("i_len {:?} v_len {:?}",i_len,v_len ); - let res: Pin,Vec)>> + Send>> = - if v_len == 1 && i_len == 1 { //one to one - self.single_val_single_index::>(byte_array, vals[0].first(), indices[0].first(), op, BatchReturnType::Vals) - } - else if v_len > 1 && i_len == 1 { //many vals one index - self.multi_val_one_index::>(byte_array, vals, indices[0].first(), op, BatchReturnType::Vals) - } - else if v_len == 1 && i_len > 1 { //one val many indices - self.one_val_multi_indices::>(byte_array, vals[0].first(), indices, op, BatchReturnType::Vals) - } - else if v_len > 1 && i_len > 1 { //many vals many indices - self.multi_val_multi_index::>(byte_array, vals, indices, op, BatchReturnType::Vals) - } - else{ //no vals no indices - panic!("should not be here"); - Box::pin(async { Vec::new() }) - }; + let res: Pin, Vec)>> + Send>> = + if v_len == 1 && i_len == 1 { + //one to one + self.single_val_single_index::>( + byte_array, + vals[0].first(), + indices[0].first(), + op, + BatchReturnType::Vals, + ) + } else if v_len > 1 && i_len == 1 { + //many vals one index + self.multi_val_one_index::>( + byte_array, + vals, + indices[0].first(), + op, + BatchReturnType::Vals, + index_size, + ) + } else if v_len == 1 && i_len > 1 { + //one val many indices + self.one_val_multi_indices::>( + byte_array, + vals[0].first(), + indices, + op, + BatchReturnType::Vals, + index_size, + ) + } else if v_len > 1 && i_len > 1 { + //many vals many indices + self.multi_val_multi_index::>( + byte_array, + vals, + indices, + op, + BatchReturnType::Vals, + index_size, + ) + } else { + //no vals no indices + panic!("should not be here"); + // Box::pin(async { Vec::new() }) + }; Box::pin(async { - let mut results = Vec::with_capacity(std::cmp::max(i_len,v_len)); - unsafe {results.set_len(std::cmp::max(i_len,v_len));} - for (mut vals,mut idxs) in res.await.into_iter(){ + let mut results = Vec::with_capacity(std::cmp::max(i_len, v_len)); + unsafe { + results.set_len(std::cmp::max(i_len, v_len)); + } + for (mut vals, mut idxs) in res.await.into_iter() { // println!("vals {:?} idx {:?}",vals.len(),idxs); - for (v,i) in vals.drain(..).zip(idxs.drain(..)){ - results[i]=v; + for (v, i) in vals.drain(..).zip(idxs.drain(..)) { + results[i] = v; } } results @@ -484,34 +262,69 @@ impl UnsafeArray { &self, val: impl OpInput<'a, T>, index: impl OpInput<'a, usize>, - op: ArrayOpCmd2, - byte_array: LamellarByteArray - ) -> Pin>> + Send>> { - let (mut indices, i_len) = index.as_op_input(); - let (mut vals, v_len) = val.as_op_input(); - - let res: Pin>,Vec)>> + Send>> = - if v_len == 1 && i_len == 1 { //one to one - self.single_val_single_index::>>(byte_array, vals[0].first(), indices[0].first(), op, BatchReturnType::Result) - } - else if v_len > 1 && i_len == 1 { //many vals one index - self.multi_val_one_index::>>(byte_array, vals, indices[0].first(), op, BatchReturnType::Result) - } - else if v_len == 1 && i_len > 1 { //one val many indices - self.one_val_multi_indices::>>(byte_array, vals[0].first(), indices, op, BatchReturnType::Result) - } - else if v_len > 1 && i_len > 1 { //many vals many indices - self.multi_val_multi_index::>>(byte_array, vals, indices, op, BatchReturnType::Result) - } - else{ //no vals no indices - Box::pin(async { Vec::new() }) - }; + op: ArrayOpCmd, + byte_array: LamellarByteArray, + ) -> Pin>> + Send>> { + let (indices, i_len) = index.as_op_input(); + let (vals, v_len) = val.as_op_input(); + let max_local_size = (0..self.num_pes()) + .map(|pe| self.inner.num_elems_pe(pe)) + .max() + .unwrap(); + let index_size = IndexSize::from(max_local_size); + + let res: Pin>, Vec)>> + Send>> = + if v_len == 1 && i_len == 1 { + //one to one + self.single_val_single_index::>>( + byte_array, + vals[0].first(), + indices[0].first(), + op, + BatchReturnType::Result, + ) + } else if v_len > 1 && i_len == 1 { + //many vals one index + self.multi_val_one_index::>>( + byte_array, + vals, + indices[0].first(), + op, + BatchReturnType::Result, + index_size, + ) + } else if v_len == 1 && i_len > 1 { + //one val many indices + self.one_val_multi_indices::>>( + byte_array, + vals[0].first(), + indices, + op, + BatchReturnType::Result, + index_size, + ) + } else if v_len > 1 && i_len > 1 { + //many vals many indices + self.multi_val_multi_index::>>( + byte_array, + vals, + indices, + op, + BatchReturnType::Result, + index_size, + ) + } else { + //no vals no indices + Box::pin(async { Vec::new() }) + }; Box::pin(async { - let mut results = Vec::with_capacity(std::cmp::max(i_len,v_len)); - unsafe {results.set_len(std::cmp::max(i_len,v_len));} - for (mut vals,mut idxs) in res.await.into_iter(){ - for (v,i) in vals.drain(..).zip(idxs.drain(..)){ - results[i]=v; + let mut results = Vec::with_capacity(std::cmp::max(i_len, v_len)); + unsafe { + results.set_len(std::cmp::max(i_len, v_len)); + } + for (mut vals, mut idxs) in res.await.into_iter() { + for (v, i) in vals.drain(..).zip(idxs.drain(..)) { + results[i] = v; } } results @@ -519,163 +332,117 @@ impl UnsafeArray { }) } - #[tracing::instrument(skip_all)] - pub(crate) fn initiate_fetch_op<'a>( - &self, - val: impl OpInput<'a, T>, - index: impl OpInput<'a, usize>, - op: ArrayOpCmd, - ) -> Pin + Send>> { - let req_handles_mutex = self.inner_initiate_op(val, index, op, OpReturnType::Fetch); - let mut req_handles = req_handles_mutex.lock(); - match req_handles.remove(&0).unwrap() { - BufOpsRequest::NoFetch(_) => { - panic!("trying to return a non fetch request for fetch operations") - } - BufOpsRequest::Fetch(req) => Box::new(ArrayOpFetchHandle { req }).into_future(), - BufOpsRequest::Result(_) => { - panic!("trying to return a result request for fetch operations") - } - } - } - - #[tracing::instrument(skip_all)] - pub(crate) fn initiate_batch_fetch_op<'a>( - &self, - val: impl OpInput<'a, T>, - index: impl OpInput<'a, usize>, - op: ArrayOpCmd, - ) -> Pin> + Send>> { - let req_handles_mutex = self.inner_initiate_op(val, index, op, OpReturnType::Fetch); - let mut req_handles = req_handles_mutex.lock(); - let mut reqs = vec![]; - // println!("req_handles len {:?} {:?}",req_handles.len(),req_handles); - for i in 0..req_handles.len() { - match req_handles.remove(&i).unwrap() { - BufOpsRequest::NoFetch(_) => { - panic!("trying to return a non fetch request for fetch operations") - } - BufOpsRequest::Fetch(req) => reqs.push(req), - BufOpsRequest::Result(_) => { - panic!("trying to return a result request for fetch operations") - } - } - } - Box::new(ArrayOpBatchFetchHandle { reqs }).into_future() - } - #[tracing::instrument(skip_all)] - pub(crate) fn initiate_result_op<'a>( - &self, - val: impl OpInput<'a, T>, - index: impl OpInput<'a, usize>, - op: ArrayOpCmd, - ) -> Pin> + Send>> { - let req_handles_mutex = self.inner_initiate_op(val, index, op, OpReturnType::Result); - let mut req_handles = req_handles_mutex.lock(); - // println!("req_handles len {:?}",req_handles.len()); + - match req_handles.remove(&0).unwrap() { - BufOpsRequest::NoFetch(_) => { - panic!("trying to return a non fetch request for result operations") - } - BufOpsRequest::Fetch(_) => { - panic!("trying to return a fetch request for result operations") - } - BufOpsRequest::Result(req) => Box::new(ArrayOpResultHandle { req }).into_future(), - } - } - #[tracing::instrument(skip_all)] - pub(crate) fn initiate_batch_result_op<'a>( + fn one_val_multi_indices( &self, - val: impl OpInput<'a, T>, - index: impl OpInput<'a, usize>, + byte_array: LamellarByteArray, + val: T, + mut indices: Vec>, op: ArrayOpCmd, - ) -> Pin>> + Send>> { - let req_handles_mutex = self.inner_initiate_op(val, index, op, OpReturnType::Result); - let mut req_handles = req_handles_mutex.lock(); - let mut reqs = vec![]; - // println!("req_handles len {:?}",req_handles.len()); - for i in 0..req_handles.len() { - match req_handles.remove(&i).unwrap() { - BufOpsRequest::NoFetch(_) => { - panic!("trying to return a non fetch request for result operations") - } - BufOpsRequest::Fetch(_) => { - panic!("trying to return a fetch request for result operations") - } - BufOpsRequest::Result(req) => reqs.push(req), - } - } - Box::new(ArrayOpBatchResultHandle { reqs }).into_future() - } - - fn one_val_multi_indices(&self, byte_array: LamellarByteArray, val: T, mut indices: Vec>, op: ArrayOpCmd2, ret: BatchReturnType) -> Pin)>> + Send>>{ + ret: BatchReturnType, + index_size: IndexSize, + ) -> Pin)>> + Send>> { let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { - Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread - Err(_) => 10000, //+ 1 to account for main thread + Ok(n) => n.parse::().unwrap(), + Err(_) => 10000, }; let num_pes = self.inner.data.team.num_pes(); let cnt = Arc::new(AtomicUsize::new(0)); let futures = Arc::new(Mutex::new(Vec::new())); let num_reqs = indices.len(); let mut start_i = 0; - + // println!("single_val_multi_index"); - for (i,index) in indices.drain(..).enumerate(){ + for (_i, index) in indices.drain(..).enumerate() { let cnt2 = cnt.clone(); let futures2 = futures.clone(); let byte_array2 = byte_array.clone(); let len = index.len(); // println!("num_reqs {:?}",num_reqs); - self.inner.data.team.scheduler.submit_immediate_task2(async move { - let mut buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; - let mut res_buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; - let mut reqs: Vec)> + Send>>> = Vec::new(); - let mut res_index = 0; - for (ii,idx) in index.iter().enumerate(){ - let j = ii + start_i; - let (pe,local_index) = self.pe_and_offset_for_global_index(idx).unwrap(); - buffs[pe].push(local_index); - res_buffs[pe].push(j); - if buffs[pe].len() >= num_per_batch { - let mut new_buffer = Vec::with_capacity(num_per_batch); - std::mem::swap( &mut buffs[pe], &mut new_buffer); - let mut new_res_buffer = Vec::with_capacity(num_per_batch); - std::mem::swap( &mut res_buffs[pe], &mut new_res_buffer); - - let am = SingleValMultiIndex::new_with_vec(byte_array2.clone(), op ,new_buffer,val).into_am::(ret); - let req = self.inner.data.team.exec_arc_am_pe::( - pe, - am, - Some(self.inner.data.array_counters.clone()) - ).into_future(); - reqs.push(Box::pin(async move {(req.await,new_res_buffer)})); + self.inner + .data + .team + .scheduler + .submit_immediate_task2(async move { + let mut buffs = + vec![Vec::with_capacity(num_per_batch * index_size.len()); num_pes]; + let mut res_buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; + let mut reqs: Vec)> + Send>>> = + Vec::new(); + // let mut res_index = 0; + for (ii, idx) in index.iter().enumerate() { + let j = ii + start_i; + let (pe, local_index) = self.pe_and_offset_for_global_index(idx).unwrap(); + buffs[pe].extend_from_slice(index_size.as_bytes(&local_index)); + res_buffs[pe].push(j); + if buffs[pe].len() >= num_per_batch { + let mut new_buffer = + Vec::with_capacity(num_per_batch * index_size.len()); + std::mem::swap(&mut buffs[pe], &mut new_buffer); + let mut new_res_buffer = Vec::with_capacity(num_per_batch); + std::mem::swap(&mut res_buffs[pe], &mut new_res_buffer); + + let am = SingleValMultiIndex::new_with_vec( + byte_array2.clone(), + op, + new_buffer, + val, + index_size, + ) + .into_am::(ret); + let req = self + .inner + .data + .team + .exec_arc_am_pe::( + pe, + am, + Some(self.inner.data.array_counters.clone()), + ) + .into_future(); + reqs.push(Box::pin(async move { (req.await, new_res_buffer) })); + } } - } - for (pe,(buff,res_buff)) in buffs.into_iter().zip(res_buffs.into_iter()).enumerate() { - if buff.len() > 0 { - let am = SingleValMultiIndex::new_with_vec(byte_array2.clone(),op,buff,val).into_am::(ret); - let req = self.inner.data.team.exec_arc_am_pe::( - pe, - am, - Some(self.inner.data.array_counters.clone()) - ).into_future(); - reqs.push(Box::pin(async move {(req.await,res_buff)})); + for (pe, (buff, res_buff)) in + buffs.into_iter().zip(res_buffs.into_iter()).enumerate() + { + if buff.len() > 0 { + let am = SingleValMultiIndex::new_with_vec( + byte_array2.clone(), + op, + buff, + val, + index_size, + ) + .into_am::(ret); + let req = self + .inner + .data + .team + .exec_arc_am_pe::( + pe, + am, + Some(self.inner.data.array_counters.clone()), + ) + .into_future(); + reqs.push(Box::pin(async move { (req.await, res_buff) })); + } } - } - // println!("reqs len {:?}",reqs.len()); - futures2.lock().extend(reqs); - cnt2.fetch_add(1, Ordering::SeqCst); - }); + // println!("reqs len {:?}",reqs.len()); + futures2.lock().extend(reqs); + cnt2.fetch_add(1, Ordering::SeqCst); + }); start_i += len; } - - while cnt.load(Ordering::SeqCst) < num_reqs{ - self.inner.data.team.scheduler.exec_task(); - } + // println!("futures len {:?}",futures.lock().len()); - Box::pin(async move{ + Box::pin(async move { + while cnt.load(Ordering::SeqCst) < num_reqs { + // self.inner.data.team.scheduler.exec_task(); + async_std::task::yield_now().await; + } // println!("futures len {:?}",futures.lock().len()); futures::future::join_all(futures.lock().drain(..)).await }) @@ -683,64 +450,107 @@ impl UnsafeArray { // in general this type of operation will likely incur terrible cache performance, the obvious optimization is to apply the updates locally then send it over, // this clearly works for ops like add and mul, does it hold for sub (i think so? given that it is always array[i] - val), need to think about other ops as well... - fn multi_val_one_index(&self, byte_array: LamellarByteArray, mut vals: Vec>, index: usize, op: ArrayOpCmd2, ret: BatchReturnType) -> Pin)>> + Send>>{ + fn multi_val_one_index( + &self, + byte_array: LamellarByteArray, + mut vals: Vec>, + index: usize, + op: ArrayOpCmd, + ret: BatchReturnType, + index_size: IndexSize, + ) -> Pin)>> + Send>> { let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread Err(_) => 10000, //+ 1 to account for main thread }; // println!("multi_val_one_index"); - let num_pes = self.inner.data.team.num_pes(); + // let num_pes = self.inner.data.team.num_pes(); let cnt = Arc::new(AtomicUsize::new(0)); let futures = Arc::new(Mutex::new(Vec::new())); - let (pe,local_index) = self.pe_and_offset_for_global_index(index).unwrap(); + let (pe, local_index) = self.pe_and_offset_for_global_index(index).unwrap(); let num_reqs = vals.len(); // println!("num_reqs {:?}",num_reqs); let mut start_i = 0; - for val in vals.drain(..){ + for val in vals.drain(..) { let cnt2 = cnt.clone(); let futures2 = futures.clone(); let byte_array2 = byte_array.clone(); let len = val.len(); - self.inner.data.team.scheduler.submit_immediate_task2(async move { - // let mut buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; - // let val_slice = val.as_slice(); - let mut inner_start_i = start_i; - let mut reqs:Vec)> + Send>>> = Vec::new(); - val.as_vec_chunks(num_per_batch).into_iter().for_each(|val|{ - let val_len = val.len(); - let am = MultiValSingleIndex::new_with_vec(byte_array2.clone(), op ,local_index, val).into_am::(ret); - let req = self.inner.data.team.exec_arc_am_pe::( - pe, - am, - Some(self.inner.data.array_counters.clone()) - ).into_future(); - // println!("start_i: {:?} inner_start_i {:?} val_len: {:?}",start_i,inner_start_i,val_len); - let res_buffer = (inner_start_i..inner_start_i+val_len).collect::>(); - reqs.push(Box::pin(async move {(req.await,res_buffer)})); - inner_start_i += val_len; + self.inner + .data + .team + .scheduler + .submit_immediate_task2(async move { + // let mut buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; + // let val_slice = val.as_slice(); + let mut inner_start_i = start_i; + let mut reqs: Vec)> + Send>>> = + Vec::new(); + val.as_vec_chunks(num_per_batch) + .into_iter() + .for_each(|val| { + let val_len = val.len(); + let am = MultiValSingleIndex::new_with_vec( + byte_array2.clone(), + op, + local_index, + val, + ) + .into_am::(ret); + let req = self + .inner + .data + .team + .exec_arc_am_pe::( + pe, + am, + Some(self.inner.data.array_counters.clone()), + ) + .into_future(); + // println!("start_i: {:?} inner_start_i {:?} val_len: {:?}",start_i,inner_start_i,val_len); + let res_buffer = + (inner_start_i..inner_start_i + val_len).collect::>(); + reqs.push(Box::pin(async move { (req.await, res_buffer) })); + inner_start_i += val_len; + }); + // println!("reqs len {:?}",reqs.len()); + futures2.lock().extend(reqs); + cnt2.fetch_add(1, Ordering::SeqCst); }); - // println!("reqs len {:?}",reqs.len()); - futures2.lock().extend(reqs); - cnt2.fetch_add(1, Ordering::SeqCst); - }); start_i += len; } - while cnt.load(Ordering::SeqCst) < num_reqs{ + while cnt.load(Ordering::SeqCst) < num_reqs { self.inner.data.team.scheduler.exec_task(); } // println!("futures len {:?}",futures.lock().len()); - Box::pin(async move{ + Box::pin(async move { // println!("futures len {:?}",futures.lock().len()); futures::future::join_all(futures.lock().drain(..)).await }) } - fn multi_val_multi_index(&self, byte_array: LamellarByteArray, mut vals: Vec>, mut indices: Vec>, op: ArrayOpCmd2, ret: BatchReturnType) -> Pin)>> + Send>>{ + fn multi_val_multi_index( + &self, + byte_array: LamellarByteArray, + mut vals: Vec>, + mut indices: Vec>, + op: ArrayOpCmd, + ret: BatchReturnType, + index_size: IndexSize, + ) -> Pin)>> + Send>> { let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread Err(_) => 10000, //+ 1 to account for main thread }; - let bytes_per_batch = num_per_batch * std::mem::size_of::>(); + let bytes_per_batch = + match index_size { + IndexSize::U8 => num_per_batch * std::mem::size_of::>(), + IndexSize::U16 => num_per_batch * std::mem::size_of::>(), + IndexSize::U32 => num_per_batch * std::mem::size_of::>(), + IndexSize::U64 => num_per_batch * std::mem::size_of::>(), + IndexSize::Usize => num_per_batch * std::mem::size_of::>(), + }; + let num_pes = self.inner.data.team.num_pes(); let cnt = Arc::new(AtomicUsize::new(0)); let futures = Arc::new(Mutex::new(Vec::new())); @@ -748,523 +558,273 @@ impl UnsafeArray { // println!("num_reqs {:?}",num_reqs); let mut start_i = 0; - - for (i,(index,val)) in indices.drain(..).zip(vals.drain(..)).enumerate(){ + + for (_i, (index, val)) in indices.drain(..).zip(vals.drain(..)).enumerate() { let cnt2 = cnt.clone(); let futures2 = futures.clone(); let byte_array2 = byte_array.clone(); let len = index.len(); - self.inner.data.team.scheduler.submit_immediate_task2(async move { - let mut buffs = vec![Vec::with_capacity(bytes_per_batch); num_pes]; - let mut res_buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; - let mut reqs: Vec)> + Send>>> = Vec::new(); - let mut res_index = 0; - for (ii,(idx,val)) in index.iter().zip(val.iter()).enumerate(){ - let j = ii + start_i; - let (pe,local_index) = self.pe_and_offset_for_global_index(idx).unwrap(); - buffs[pe].extend_from_slice(IdxVal { index: local_index, val: val }.as_bytes()); - res_buffs[pe].push(j); - if buffs[pe].len() >= bytes_per_batch { - let mut new_buffer = Vec::with_capacity(bytes_per_batch); - std::mem::swap( &mut buffs[pe], &mut new_buffer); - let mut new_res_buffer = Vec::with_capacity(num_per_batch); - std::mem::swap( &mut res_buffs[pe], &mut new_res_buffer); - - // println!("buff len {}",new_buffer.len()); - let am = MultiValMultiIndex::new_with_vec(byte_array2.clone(), op ,new_buffer).into_am::(ret); - let req = self.inner.data.team.exec_arc_am_pe::( - pe, - am, - Some(self.inner.data.array_counters.clone()) - ).into_future(); - reqs.push(Box::pin(async move {(req.await,new_res_buffer)})); + self.inner + .data + .team + .scheduler + .submit_immediate_task2(async move { + let mut buffs = vec![Vec::with_capacity(bytes_per_batch); num_pes]; + let mut res_buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; + let mut reqs: Vec)> + Send>>> = + Vec::new(); + // let mut res_index = 0; + for (ii, (idx, val)) in index.iter().zip(val.iter()).enumerate() { + let j = ii + start_i; + let (pe, local_index) = self.pe_and_offset_for_global_index(idx).unwrap(); + match index_size { + IndexSize::U8 => buffs[pe].extend_from_slice( + IdxVal { + index: local_index as u8, + val: val, + } + .as_bytes(), + ), + IndexSize::U16 => buffs[pe].extend_from_slice( + IdxVal { + index: local_index as u16, + val: val, + } + .as_bytes(), + ), + IndexSize::U32 => buffs[pe].extend_from_slice( + IdxVal { + index: local_index as u32, + val: val, + } + .as_bytes(), + ), + IndexSize::U64 => buffs[pe].extend_from_slice( + IdxVal { + index: local_index as u64, + val: val, + } + .as_bytes(), + ), + IndexSize::Usize => buffs[pe].extend_from_slice( + IdxVal { + index: local_index as usize, + val: val, + } + .as_bytes(), + ), + } + res_buffs[pe].push(j); + if buffs[pe].len() >= bytes_per_batch { + let mut new_buffer = Vec::with_capacity(bytes_per_batch); + std::mem::swap(&mut buffs[pe], &mut new_buffer); + let mut new_res_buffer = Vec::with_capacity(num_per_batch); + std::mem::swap(&mut res_buffs[pe], &mut new_res_buffer); + + // println!("buff len {}",new_buffer.len()); + let am = MultiValMultiIndex::new_with_vec( + byte_array2.clone(), + op, + new_buffer, + index_size, + ) + .into_am::(ret); + let req = self + .inner + .data + .team + .exec_arc_am_pe::( + pe, + am, + Some(self.inner.data.array_counters.clone()), + ) + .into_future(); + reqs.push(Box::pin(async move { (req.await, new_res_buffer) })); + } } - - } - for (pe,(buff,res_buff)) in buffs.into_iter().zip(res_buffs.into_iter()).enumerate() { - if buff.len() > 0 { - // println!("buff len {}",buff.len()); - let am = MultiValMultiIndex::new_with_vec(byte_array2.clone(),op,buff).into_am::(ret); - let req = self.inner.data.team.exec_arc_am_pe::( - pe, - am, - Some(self.inner.data.array_counters.clone()) - ).into_future(); - reqs.push(Box::pin(async move {(req.await,res_buff)})); + for (pe, (buff, res_buff)) in + buffs.into_iter().zip(res_buffs.into_iter()).enumerate() + { + if buff.len() > 0 { + // println!("buff len {}",buff.len()); + let am = MultiValMultiIndex::new_with_vec( + byte_array2.clone(), + op, + buff, + index_size, + ) + .into_am::(ret); + let req = self + .inner + .data + .team + .exec_arc_am_pe::( + pe, + am, + Some(self.inner.data.array_counters.clone()), + ) + .into_future(); + reqs.push(Box::pin(async move { (req.await, res_buff) })); + } } - } - futures2.lock().extend(reqs); - cnt2.fetch_add(1, Ordering::SeqCst); - }); + futures2.lock().extend(reqs); + cnt2.fetch_add(1, Ordering::SeqCst); + }); start_i += len; } - while cnt.load(Ordering::SeqCst) < num_reqs{ + while cnt.load(Ordering::SeqCst) < num_reqs { self.inner.data.team.scheduler.exec_task(); } - Box::pin(async move{ + Box::pin(async move { // println!("futures len: {:?}",futures.lock().len()); futures::future::join_all(futures.lock().drain(..)).await }) } - fn single_val_single_index(&self, byte_array: LamellarByteArray, val: T, index: usize, op: ArrayOpCmd2, ret: BatchReturnType) -> Pin)>> + Send>>{ - - let (pe,local_index) = self.pe_and_offset_for_global_index(index).unwrap(); + fn single_val_single_index( + &self, + byte_array: LamellarByteArray, + val: T, + index: usize, + op: ArrayOpCmd, + ret: BatchReturnType, + ) -> Pin)>> + Send>> { + let (pe, local_index) = self.pe_and_offset_for_global_index(index).unwrap(); let mut buff = Vec::new(); - buff.extend_from_slice(IdxVal { index: local_index, val: val }.as_bytes()); + buff.extend_from_slice( + IdxVal { + index: local_index, + val: val, + } + .as_bytes(), + ); let res_buff = vec![0]; - let am = MultiValMultiIndex::new_with_vec(byte_array.clone(),op,buff).into_am::(ret); - let req = self.inner.data.team.exec_arc_am_pe::( - pe, - am, - Some(self.inner.data.array_counters.clone()) - ).into_future(); - let mut reqs =vec![Box::pin(async move {(req.await,res_buff)})]; - - Box::pin(async move{ - futures::future::join_all(reqs.drain(..)).await - }) + let am = MultiValMultiIndex::new_with_vec(byte_array.clone(), op, buff, IndexSize::Usize).into_am::(ret); + let req = self + .inner + .data + .team + .exec_arc_am_pe::(pe, am, Some(self.inner.data.array_counters.clone())) + .into_future(); + let mut reqs = vec![Box::pin(async move { (req.await, res_buff) })]; + + Box::pin(async move { futures::future::join_all(reqs.drain(..)).await }) } } -// impl UnsafeArray { -// pub fn new_add3<'a>( -// &self, -// mut index: impl OpInput<'a, usize>, -// mut val: impl OpInput<'a, T>, -// ) -> Pin + Send>> { -// let (mut indices, i_len) = index.as_op_input(); //(Vec>, usize); -// let (mut vals, v_len) = val.as_op_input(); - - -// // let mut reqs = vec![]; -// let cnt = Arc::new(AtomicUsize::new(0)); - -// for i in 0..indices.len(){ -// let cnt2 = cnt.clone(); -// let index = indices[i].iter(); -// let val = vals[0].iter(); -// self.inner.data.team.scheduler.submit_task(async move { -// self.add_multi_single3(index,val); -// cnt2.fetch_add(1, Ordering::SeqCst); -// }); -// } -// while cnt.load(Ordering::SeqCst) < indices.len(){ -// std::thread::yield_now(); -// } -// Box::pin(async move{ -// // futures::future::join_all(reqs).await; -// }) -// } -// pub fn new_add2<'a>( -// &self, -// // mut index: impl OpInput<'a, usize>, -// // mut val: impl OpInput<'a, T>, -// mut index: &'a[usize], -// mut val: T, -// ) -> Pin + Send>> { -// let num_tasks = match std::env::var("LAMELLAR_THREADS") { -// Ok(n) => (n.parse::().unwrap() + 1)/2, //+ 1 to account for main thread -// Err(_) => 4, //+ 1 to account for main thread -// }; -// let i_p_t = index.len()/num_tasks; - -// // let mut reqs = vec![]; -// let cnt = Arc::new(AtomicUsize::new(0)); - -// for i in 0..num_tasks{ -// let cnt2 = cnt.clone(); -// self.inner.data.team.scheduler.submit_task(async move { -// self.add_multi_single2(&index[i*i_p_t..(i+1)*i_p_t],val); -// cnt2.fetch_add(1, Ordering::SeqCst); -// }); -// } -// while cnt.load(Ordering::SeqCst) < num_tasks{ -// std::thread::yield_now(); -// } -// Box::pin(async move{ -// // futures::future::join_all(reqs).await; -// }) -// } -// pub fn new_add<'a>( -// &self, -// mut index: impl Iterator + Clone, -// mut val: impl Iterator + Clone, -// ) -> Pin + Send>> { -// match(index.clone().skip(1).next(),val.clone().skip(1).next()){ -// (Some(_),Some(_)) => { -// self.add_multi_multi(index,val) -// } -// (Some(_),None) => { -// self.add_multi_single(index,val) -// } -// (None,Some(_)) => { -// Box::pin(async{}) -// // self.add_single_multi(index,val) -// } -// (None,None) => { -// self.add_multi_multi(index,val) -// } -// } -// } - -// fn add_multi_multi<'a>(&self, index: impl Iterator, val: impl Iterator) -> Pin + Send>>{ -// let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { -// Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread -// Err(_) => 10000, //+ 1 to account for main thread -// }; -// // println!("num_per_batch {:?}",num_per_batch); -// let mut reqs = Arc::new(Mutex::new(Vec::new())); -// let req_count = AtomicUsize::new(0); -// let mut temp = Vec::with_capacity(num_per_batch*5); -// index.zip(val).for_each( |idx_val| { -// temp.push(idx_val); -// if temp.len() == num_per_batch*5 { -// let mut new_temp = Vec::with_capacity(num_per_batch*5); -// std::mem::swap(&mut temp, &mut new_temp); -// req_count.fetch_add(1,Ordering::Relaxed); -// let mut all_reqs = reqs.clone(); -// self.inner.data.team.scheduler.submit_task(async move { -// let mut pe_buffers = HashMap::new(); -// let mut reqs = new_temp.into_iter().filter_map(|(idx, val)| { - -// let (pe,index) = self.pe_and_offset_for_global_index(*idx).expect("index out of bounds"); -// let buffer = pe_buffers.entry(pe).or_insert((MultiMulti::new(self.clone(),ArrayOpCmd2::Add),0)); -// buffer.0.append_idxs_vals(index, val); -// buffer.1 += 1; -// if buffer.1 >= num_per_batch { -// let mut new_buffer = MultiMulti::new(self.clone(),ArrayOpCmd2::Add); -// std::mem::swap( &mut buffer.0, &mut new_buffer); -// buffer.1 = 0; -// // let am: MultiMultiAddRemote = new_buffer.into(); -// Some( -// self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// new_buffer.into_am(), -// Some(self.inner.data.array_counters.clone()) -// ).into_future() -// ) -// } -// else { -// None -// } -// }).collect::>(); -// for (pe,buffer) in pe_buffers.iter_mut() { -// if buffer.1 > 0 { - -// let mut new_buffer = MultiMulti::new(self.clone(),ArrayOpCmd2::Add); -// std::mem::swap( &mut buffer.0, &mut new_buffer); -// reqs.push( - -// self.inner.data.team.exec_arc_am_pe::<()>( -// *pe, -// new_buffer.into_am(), -// Some(self.inner.data.array_counters.clone()) -// ).into_future() -// ); -// } -// } -// // println!("reqs len {:?}",reqs.len()); -// all_reqs.lock().extend(reqs); -// }); -// } -// }); -// if temp.len() > 0 { -// req_count.fetch_add(1,Ordering::Relaxed); -// let mut all_reqs = reqs.clone(); -// self.inner.data.team.scheduler.submit_task(async move { -// let mut pe_buffers = HashMap::new(); -// let mut reqs = temp.into_iter().filter_map(|(idx, val)| { - -// let (pe,index) = self.pe_and_offset_for_global_index(*idx).expect("index out of bounds"); -// let buffer = pe_buffers.entry(pe).or_insert((MultiMulti::new(self.clone(),ArrayOpCmd2::Add),0)); -// buffer.0.append_idxs_vals(index, val); -// buffer.1 += 1; -// if buffer.1 >= num_per_batch { -// let mut new_buffer = MultiMulti::new(self.clone(),ArrayOpCmd2::Add); -// std::mem::swap( &mut buffer.0, &mut new_buffer); -// buffer.1 = 0; -// // let am: MultiMultiAddRemote = new_buffer.into(); -// Some( -// self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// new_buffer.into_am(), -// Some(self.inner.data.array_counters.clone()) -// ).into_future() -// ) -// } -// else { -// None -// } -// }).collect::>(); -// for (pe,buffer) in pe_buffers.iter_mut() { -// if buffer.1 > 0 { - -// let mut new_buffer = MultiMulti::new(self.clone(),ArrayOpCmd2::Add); -// std::mem::swap( &mut buffer.0, &mut new_buffer); -// reqs.push( - -// self.inner.data.team.exec_arc_am_pe::<()>( -// *pe, -// new_buffer.into_am(), -// Some(self.inner.data.array_counters.clone()) -// ).into_future() -// ); -// } -// } -// // println!("reqs len {:?}",reqs.len()); -// all_reqs.lock().extend(reqs); -// }); -// temp = Vec::with_capacity(num_per_batch); -// } -// Box::pin(async move{ -// let cnt = req_count.load(Ordering::Relaxed); -// while reqs.lock().len() < cnt { -// futures::future::pending().await -// } -// match Arc::try_unwrap(reqs){ -// Ok(reqs) => { -// futures::future::join_all(reqs.into_inner()).await; -// } -// Err(_) => { -// panic!("trying to return a fetch request for result operations"); -// } -// } -// }) -// } - - - -// fn add_multi_single<'a>(&self, mut index: impl Iterator, mut val: impl Iterator) -> Pin + Send>>{ -// // let big_timer = Instant::now(); -// let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { -// Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread -// Err(_) => 10000, //+ 1 to account for main thread -// }; -// let num_pes = self.inner.data.team.num_pes(); -// let mut buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; -// let val = val.next().unwrap(); -// let mut reqs = Vec::new(); -// for idx in index{ -// let (pe,local_index) = self.pe_and_offset_for_global_index(*idx).unwrap(); -// buffs[pe].push(local_index); -// if buffs[pe].len() >= num_per_batch { -// let mut new_buffer = Vec::with_capacity(num_per_batch); -// std::mem::swap( &mut buffs[pe], &mut new_buffer); -// let am = SingleValMultiIndex::new_with_vec(self.clone(),ArrayOpCmd2::Add,new_buffer,val).into_am(); -// reqs.push(self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// am, -// Some(self.inner.data.array_counters.clone()) -// ).into_future()); -// } - -// } -// for (pe,buff) in buffs.into_iter().enumerate() { -// if buff.len() > 0 { -// let am = SingleValMultiIndex::new_with_vec(self.clone(),ArrayOpCmd2::Add,buff,val).into_am(); -// reqs.push(self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// am, -// Some(self.inner.data.array_counters.clone()) -// ).into_future()); -// } -// } -// Box::pin(async move{ -// futures::future::join_all(reqs).await; -// }) -// } - -// fn add_multi_single3<'a>(&self, mut index: impl Iterator, mut val: impl Iterator) -> Pin + Send>>{ -// // let big_timer = Instant::now(); -// let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { -// Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread -// Err(_) => 10000, //+ 1 to account for main thread -// }; -// let num_pes = self.inner.data.team.num_pes(); -// let mut buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; -// let val = val.next().unwrap(); -// let mut reqs = Vec::new(); -// for idx in index{ -// let (pe,local_index) = self.pe_and_offset_for_global_index(idx).unwrap(); -// buffs[pe].push(local_index); -// if buffs[pe].len() >= num_per_batch { -// let mut new_buffer = Vec::with_capacity(num_per_batch); -// std::mem::swap( &mut buffs[pe], &mut new_buffer); -// let am = SingleValMultiIndex::new_with_vec(self.clone(),ArrayOpCmd2::Add,new_buffer,val).into_am(); -// reqs.push(self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// am, -// Some(self.inner.data.array_counters.clone()) -// ).into_future()); -// } - -// } -// for (pe,buff) in buffs.into_iter().enumerate() { -// if buff.len() > 0 { -// let am = SingleValMultiIndex::new_with_vec(self.clone(),ArrayOpCmd2::Add,buff,val).into_am(); -// reqs.push(self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// am, -// Some(self.inner.data.array_counters.clone()) -// ).into_future()); -// } -// } -// Box::pin(async move{ -// futures::future::join_all(reqs).await; -// }) -// } - -// fn add_multi_single2<'a>(&self, mut index: &'a[usize], mut val: T) -> Pin + Send>>{ -// // let big_timer = Instant::now(); -// let num_per_batch = match std::env::var("LAMELLAR_OP_BATCH") { -// Ok(n) => n.parse::().unwrap(), //+ 1 to account for main thread -// Err(_) => 10000, //+ 1 to account for main thread -// }; -// let num_pes = self.inner.data.team.num_pes(); -// let mut buffs = vec![Vec::with_capacity(num_per_batch); num_pes]; -// let mut reqs = Vec::new(); -// for idx in index{ -// let (pe,local_index) = self.pe_and_offset_for_global_index(*idx).unwrap(); -// buffs[pe].push(local_index); -// if buffs[pe].len() >= num_per_batch { -// let mut new_buffer = Vec::with_capacity(num_per_batch); -// std::mem::swap( &mut buffs[pe], &mut new_buffer); -// let am = SingleValMultiIndex::new_with_vec(self.clone(),ArrayOpCmd2::Add,new_buffer,val).into_am(); -// reqs.push(self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// am, -// Some(self.inner.data.array_counters.clone()) -// ).into_future()); -// } - -// } -// for (pe,buff) in buffs.into_iter().enumerate() { -// if buff.len() > 0 { -// let am = SingleValMultiIndex::new_with_vec(self.clone(),ArrayOpCmd2::Add,buff,val).into_am(); -// reqs.push(self.inner.data.team.exec_arc_am_pe::<()>( -// pe, -// am, -// Some(self.inner.data.array_counters.clone()) -// ).into_future()); -// } -// } - -// Box::pin(async move{ -// futures::future::join_all(reqs).await; -// }) -// } -// } - - -// fn multi_multi_add_usize(array: &UnsafeByteArray, idx_vals: &[u8]) { -// let array: UnsafeArray = array.into(); -// let mut local_data = unsafe{array.mut_local_data()}; -// let idx_vals = unsafe {std::slice::from_raw_parts(idx_vals.as_ptr() as *const IdxVal, idx_vals.len()/std::mem::size_of::>())}; -// for elem in idx_vals{ -// local_data[elem.index] += elem.val; -// } -// } - - - -// #[lamellar_impl::AmDataRT] - #[doc(hidden)] -#[derive(Copy,Clone,Debug,Hash,std::cmp::Eq,std::cmp::PartialEq)] -pub enum BatchReturnType{ +#[derive(Copy, Clone, Debug, Hash, std::cmp::Eq, std::cmp::PartialEq)] +pub enum BatchReturnType { None, Vals, Result, } -struct SingleValMultiIndex{ +struct SingleValMultiIndex { array: LamellarByteArray, - idx: Vec, + idx: Vec, val: Vec, - op: ArrayOpCmd2>, + op: ArrayOpCmd>, + index_size: IndexSize, } impl SingleValMultiIndex { - // fn new(array: LamellarByteArray, op:ArrayOpCmd2, val: T) -> Self { - // let val_u8 = &val as *const T as *const u8; - // Self { array: array.into(), idx: Vec::new(), - // val: unsafe {std::slice::from_raw_parts(val_u8, std::mem::size_of::())}.to_vec(), - // op: op.into()} //, type_id: TypeId::of::() } - // } - fn new_with_vec(array: LamellarByteArray, op: ArrayOpCmd2, indices: Vec, val: T) -> Self { + fn new_with_vec( + array: LamellarByteArray, + op: ArrayOpCmd, + indices: Vec, + val: T, + index_size: IndexSize, + ) -> Self { let val_u8 = &val as *const T as *const u8; - Self { array: array.into(), idx: indices, - val: unsafe {std::slice::from_raw_parts(val_u8, std::mem::size_of::())}.to_vec(), - op: op.into()} //, type_id: TypeId::of::() } - + Self { + array: array.into(), + idx: indices, + val: unsafe { std::slice::from_raw_parts(val_u8, std::mem::size_of::()) }.to_vec(), + op: op.into(), + index_size: index_size, + } //, type_id: TypeId::of::() } } - fn into_am(self,ret: BatchReturnType) -> LamellarArcAm { + fn into_am(self, ret: BatchReturnType) -> LamellarArcAm { // println!("{:?} {:?} {:?}",self.array.type_id(),TypeId::of::(),ret); - SINGLE_VAL_MULTI_IDX_OPS.get(&(self.array.type_id(),TypeId::of::(),ret)).unwrap()(self.array,self.op,self.val,self.idx) + SINGLE_VAL_MULTI_IDX_OPS + .get(&(self.array.type_id(), TypeId::of::(), ret)) + .unwrap()( + self.array, + self.op, + self.val, + self.idx, + self.index_size.len() as u8, + ) } } -struct MultiValSingleIndex{ +struct MultiValSingleIndex { array: LamellarByteArray, idx: usize, val: Vec, - op: ArrayOpCmd2>, + op: ArrayOpCmd>, } impl MultiValSingleIndex { - // fn new(array: LamellarByteArray, op:ArrayOpCmd2, index: usize) -> Self { - // // let val_u8 = &val as *const T as *const u8; - // Self { array: array.into(), idx: index, - // val: Vec::new(), - // op: op} //, type_id: TypeId::of::() } - // } - - fn new_with_vec(array: LamellarByteArray, op: ArrayOpCmd2, index: usize, val: Vec) -> Self { + fn new_with_vec( + array: LamellarByteArray, + op: ArrayOpCmd, + index: usize, + val: Vec, + ) -> Self { let val_u8 = val.as_ptr() as *const u8; - Self { array: array.into(), idx: index, - val: unsafe {std::slice::from_raw_parts(val_u8, std::mem::size_of::()*val.len())}.to_vec(), - op: op.into() + Self { + array: array.into(), + idx: index, + val: unsafe { + std::slice::from_raw_parts(val_u8, std::mem::size_of::() * val.len()) + } + .to_vec(), + op: op.into(), } //, type_id: TypeId::of::() } } fn into_am(self, ret: BatchReturnType) -> LamellarArcAm { - MULTI_VAL_SINGLE_IDX_OPS.get(&(self.array.type_id(),TypeId::of::(),ret)).unwrap()(self.array,self.op,self.val,self.idx) + MULTI_VAL_SINGLE_IDX_OPS + .get(&(self.array.type_id(), TypeId::of::(), ret)) + .unwrap()(self.array, self.op, self.val, self.idx) } } -struct MultiValMultiIndex{ +struct MultiValMultiIndex { array: LamellarByteArray, idxs_vals: Vec, - op: ArrayOpCmd2> + op: ArrayOpCmd>, + index_size: IndexSize, } impl MultiValMultiIndex { - // fn new(array: LamellarByteArray, op: ArrayOpCmd2) -> Self { - // Self { array: array.into(), idxs_vals: Vec::new(), op: op.into()} //, type_id: TypeId::of::() } - // } - - fn new_with_vec(array: LamellarByteArray, op: ArrayOpCmd2, idxs_vals: Vec) -> Self { - Self { array: array.into(), idxs_vals: idxs_vals, op: op.into()} //, type_id: TypeId::of::() } - } - fn append_idxs_vals(&mut self, idx: usize, val: T) { - // idx_val as slice of u8 - let idx_val = IdxVal { index: idx, val: val }; - self.idxs_vals.extend_from_slice(idx_val.as_bytes()); + fn new_with_vec( + array: LamellarByteArray, + op: ArrayOpCmd, + idxs_vals: Vec, + index_size: IndexSize, + ) -> Self { + Self { + array: array.into(), + idxs_vals: idxs_vals, + op: op.into(), + index_size: index_size, + } //, type_id: TypeId::of::() } } fn into_am(self, ret: BatchReturnType) -> LamellarArcAm { - MULTI_VAL_MULTI_IDX_OPS.get(&(self.array.type_id(),TypeId::of::(),ret)).unwrap()(self.array,self.op,self.idxs_vals) + MULTI_VAL_MULTI_IDX_OPS + .get(&(self.array.type_id(), TypeId::of::(), ret)) + .unwrap()(self.array, self.op, self.idxs_vals, self.index_size.len() as u8) } } - - - impl ReadOnlyOps for UnsafeArray {} impl AccessOps for UnsafeArray {} @@ -1278,99 +838,3 @@ impl ShiftOps for UnsafeArray {} impl CompareExchangeOps for UnsafeArray {} impl CompareExchangeEpsilonOps for UnsafeArray {} - -// impl UnsafeArray { -// impl LocalArithmeticOps for UnsafeArray { -// fn local_fetch_add(&self, index: usize, val: T) -> T { -// // println!("local_add LocalArithmeticOps for UnsafeArray "); -// unsafe { -// let orig = self.local_as_mut_slice()[index]; -// self.local_as_mut_slice()[index] += val; -// orig -// } -// } -// fn local_fetch_sub(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for UnsafeArray "); -// unsafe { -// let orig = self.local_as_mut_slice()[index]; -// self.local_as_mut_slice()[index] -= val; -// orig -// } -// } -// fn local_fetch_mul(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for UnsafeArray "); -// unsafe { -// let orig = self.local_as_mut_slice()[index]; -// self.local_as_mut_slice()[index] *= val; -// // println!("orig: {:?} new {:?} va; {:?}",orig,self.local_as_mut_slice()[index] ,val); -// orig -// } -// } -// fn local_fetch_div(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for UnsafeArray "); -// unsafe { -// let orig = self.local_as_mut_slice()[index]; -// self.local_as_mut_slice()[index] /= val; -// // println!("div i: {:?} {:?} {:?} {:?}",index,orig,val,self.local_as_mut_slice()[index]); -// orig -// } -// } -// } -// impl LocalBitWiseOps for UnsafeArray { -// fn local_fetch_bit_and(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for UnsafeArray "); -// unsafe { -// let orig = self.local_as_mut_slice()[index]; -// self.local_as_mut_slice()[index] &= val; -// orig -// } -// } -// fn local_fetch_bit_or(&self, index: usize, val: T) -> T { -// // println!("local_sub LocalArithmeticOps for UnsafeArray "); -// unsafe { -// let orig = self.local_as_mut_slice()[index]; -// self.local_as_mut_slice()[index] |= val; -// orig -// } -// } -// } - -// #[macro_export] -// macro_rules! UnsafeArray_create_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::unsafearray_register!{$a,ArrayOpCmd::Add,[<$name dist_add>],[<$name local_add>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::FetchAdd,[<$name dist_fetch_add>],[<$name local_add>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::Sub,[<$name dist_sub>],[<$name local_sub>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::FetchSub,[<$name dist_fetch_sub>],[<$name local_sub>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::Mul,[<$name dist_mul>],[<$name local_mul>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::FetchMul,[<$name dist_fetch_mul>],[<$name local_mul>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::Div,[<$name dist_div>],[<$name local_div>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::FetchDiv,[<$name dist_fetch_div>],[<$name local_div>]} -// } -// } -// } - -// #[macro_export] -// macro_rules! UnsafeArray_create_bitwise_ops { -// ($a:ty, $name:ident) => { -// paste::paste!{ -// $crate::unsafearray_register!{$a,ArrayOpCmd::And,[<$name dist_bit_and>],[<$name local_bit_and>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::FetchAnd,[<$name dist_fetch_bit_and>],[<$name local_bit_and>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::Or,[<$name dist_bit_or>],[<$name local_bit_or>]} -// $crate::unsafearray_register!{$a,ArrayOpCmd::FetchOr,[<$name dist_fetch_bit_or>],[<$name local_bit_or>]} -// } -// } -// } -// #[macro_export] -// macro_rules! unsafearray_register { -// ($id:ident, $optype:path, $op:ident, $local:ident) => { -// inventory::submit! { -// #![crate =$crate] -// $crate::array::UnsafeArrayOp{ -// id: ($optype,std::any::TypeId::of::<$id>()), -// op: $op, -// } -// } -// }; -// } diff --git a/src/darc.rs b/src/darc.rs index 5f8de25c..b5fee7cc 100644 --- a/src/darc.rs +++ b/src/darc.rs @@ -922,8 +922,8 @@ impl LamellarAM for DroppedWaitAM { To view backtrace set RUST_LIB_BACKTRACE=1\n\ {}", mode_refs, - unsafe {wrapped.inner.as_ref().local_cnt.load(Ordering::SeqCst)}, - unsafe {wrapped.inner.as_ref().dist_cnt.load(Ordering::SeqCst)}, + wrapped.inner.as_ref().local_cnt.load(Ordering::SeqCst), + wrapped.inner.as_ref().dist_cnt.load(Ordering::SeqCst), *crate::DEADLOCK_TIMEOUT, std::backtrace::Backtrace::capture() ); @@ -991,8 +991,8 @@ impl LamellarAM for DroppedWaitAM { To view backtrace set RUST_LIB_BACKTRACE=1\n\ {}", mode_refs, - unsafe {wrapped.inner.as_ref().local_cnt.load(Ordering::SeqCst)}, - unsafe {wrapped.inner.as_ref().dist_cnt.load(Ordering::SeqCst)}, + wrapped.inner.as_ref().local_cnt.load(Ordering::SeqCst), + wrapped.inner.as_ref().dist_cnt.load(Ordering::SeqCst), *crate::DEADLOCK_TIMEOUT, std::backtrace::Backtrace::capture() ); diff --git a/src/lamellae/command_queues.rs b/src/lamellae/command_queues.rs index 818d0e4d..40e9e7ed 100644 --- a/src/lamellae/command_queues.rs +++ b/src/lamellae/command_queues.rs @@ -591,7 +591,10 @@ impl InnerCQ { } } if do_alloc { - println!("need to alloc new pool {:?}",std::backtrace::Backtrace::capture()); + println!( + "need to alloc new pool {:?}", + std::backtrace::Backtrace::capture() + ); self.send_alloc_inner(&mut alloc_buf, min_size); } } @@ -756,7 +759,7 @@ impl InnerCQ { cmd.calc_hash(); for pe in 0..self.num_pes { if pe != self.my_pe { - println!("putting alloc cmd to pe {:?}",pe); + println!("putting alloc cmd to pe {:?}", pe); self.comm.put(pe, cmd.as_bytes(), cmd.as_addr()); } } @@ -765,9 +768,9 @@ impl InnerCQ { while !alloc_buf[pe].check_hash() || alloc_buf[pe].cmd != Cmd::Alloc { std::thread::yield_now(); } - println!(" pe {:?} ready to alloc",pe); + println!(" pe {:?} ready to alloc", pe); } - panic!("exiting"); + // panic!("exiting"); self.comm.alloc_pool(min_size); let cmd = &mut alloc_buf[self.my_pe]; @@ -778,7 +781,7 @@ impl InnerCQ { cmd.calc_hash(); for pe in 0..self.num_pes { if pe != self.my_pe { - println!("putting clear cmd to pe {:?}",pe); + println!("putting clear cmd to pe {:?}", pe); self.comm.put(pe, cmd.as_bytes(), cmd.as_addr()); } } @@ -786,7 +789,7 @@ impl InnerCQ { while !alloc_buf[pe].check_hash() || alloc_buf[pe].cmd != Cmd::Clear { std::thread::yield_now(); } - println!(" pe {:?} has alloced",pe); + println!(" pe {:?} has alloced", pe); } println!("created new alloc pool"); } diff --git a/src/lamellar_task_group.rs b/src/lamellar_task_group.rs index 4b626196..4ca13920 100644 --- a/src/lamellar_task_group.rs +++ b/src/lamellar_task_group.rs @@ -630,8 +630,7 @@ impl LamellarTaskGroup { pub(crate) fn exec_arc_am_local_inner( &self, func: LamellarArcLocalAm, - ) -> Box> - { + ) -> Box> { // println!("task group exec am local"); self.team.team_counters.add_send_req(1); self.team.world_counters.add_send_req(1); @@ -1013,9 +1012,6 @@ impl AmGroup { } } - - - #[derive(Clone)] pub enum AmGroupResult<'a, T> { Pe(usize, &'a T), @@ -1031,14 +1027,13 @@ impl<'a, T: std::fmt::Debug> std::fmt::Debug for AmGroupResult<'a, T> { } } - #[derive(Clone)] pub enum TypedAmAllIter<'a, T> { Unit(TypedAmAllUnitIter<'a, T>), Val(TypedAmAllValIter<'a, T>), } -impl <'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllIter<'a, T> { +impl<'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllIter<'a, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { TypedAmAllIter::Unit(iter) => write!(f, "{:?}", iter), @@ -1048,7 +1043,7 @@ impl <'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllIter<'a, T> { } impl<'a, T> Iterator for TypedAmAllIter<'a, T> { - type Item = &'a T; + type Item = &'a T; fn next(&mut self) -> Option { match self { TypedAmAllIter::Unit(iter) => iter.next(), @@ -1064,7 +1059,7 @@ pub struct TypedAmAllUnitIter<'a, T> { num_pes: usize, } -impl <'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllUnitIter<'a, T> { +impl<'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllUnitIter<'a, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "[")?; for i in 0..self.num_pes { @@ -1087,7 +1082,6 @@ impl<'a, T> Iterator for TypedAmAllUnitIter<'a, T> { } } - #[derive(Clone)] pub struct TypedAmAllValIter<'a, T> { all: &'a Vec>, @@ -1096,7 +1090,7 @@ pub struct TypedAmAllValIter<'a, T> { num_pes: usize, } -impl <'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllValIter<'a, T> { +impl<'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllValIter<'a, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "[")?; for i in 0..self.num_pes { @@ -1106,7 +1100,6 @@ impl <'a, T: std::fmt::Debug> std::fmt::Debug for TypedAmAllValIter<'a, T> { } } - impl<'a, T> Iterator for TypedAmAllValIter<'a, T> { type Item = &'a T; fn next(&mut self) -> Option { @@ -1121,20 +1114,18 @@ impl<'a, T> Iterator for TypedAmAllValIter<'a, T> { } #[derive(Clone)] -pub enum TypedAmGroupResult{ +pub enum TypedAmGroupResult { Unit(TypedAmGroupUnitResult), Val(TypedAmGroupValResult), } - - impl TypedAmGroupResult { pub fn unit(reqs: Vec>, cnt: usize, num_pes: usize) -> Self { - TypedAmGroupResult::Unit(TypedAmGroupUnitResult::new( reqs, cnt, num_pes )) + TypedAmGroupResult::Unit(TypedAmGroupUnitResult::new(reqs, cnt, num_pes)) } pub fn val(reqs: Vec>, cnt: usize, num_pes: usize) -> Self { - TypedAmGroupResult::Val(TypedAmGroupValResult::new( reqs, cnt, num_pes )) + TypedAmGroupResult::Val(TypedAmGroupValResult::new(reqs, cnt, num_pes)) } pub fn at(&self, i: usize) -> AmGroupResult<'_, T> { @@ -1152,41 +1143,39 @@ impl TypedAmGroupResult { } pub fn iter(&self) -> TypedAmGroupResultIter<'_, T> { - TypedAmGroupResultIter{ + TypedAmGroupResultIter { index: 0, - results: self + results: self, } } } - #[derive(Clone)] -pub struct TypedAmGroupResultIter<'a,T> { +pub struct TypedAmGroupResultIter<'a, T> { index: usize, - results: &'a TypedAmGroupResult + results: &'a TypedAmGroupResult, } -impl<'a,T> Iterator for TypedAmGroupResultIter<'a, T> { +impl<'a, T> Iterator for TypedAmGroupResultIter<'a, T> { type Item = AmGroupResult<'a, T>; fn next(&mut self) -> Option { if self.index < self.results.len() { self.index += 1; Some(self.results.at(self.index - 1)) - } - else { + } else { None } } } -pub enum BaseAmGroupReq{ - SinglePeUnit(std::pin::Pin + Send>>), - SinglePeVal( std::pin::Pin > + Send>>), - AllPeUnit( std::pin::Pin > + Send>>), - AllPeVal( std::pin::Pin> > + Send>>), -} +pub enum BaseAmGroupReq { + SinglePeUnit(std::pin::Pin + Send>>), + SinglePeVal(std::pin::Pin> + Send>>), + AllPeUnit(std::pin::Pin> + Send>>), + AllPeVal(std::pin::Pin>> + Send>>), +} -impl BaseAmGroupReq { +impl BaseAmGroupReq { async fn into_result(self) -> BaseAmGroupResult { match self { BaseAmGroupReq::SinglePeUnit(reqs) => BaseAmGroupResult::SinglePeUnit(reqs.await), @@ -1198,7 +1187,8 @@ impl BaseAmGroupReq { } #[derive(Clone)] -pub enum BaseAmGroupResult { // T here should be the inner most return type +pub enum BaseAmGroupResult { + // T here should be the inner most return type SinglePeUnit(T), SinglePeVal(Vec), AllPeUnit(Vec), @@ -1218,11 +1208,9 @@ pub struct TypedAmGroupBatchResult { reqs: BaseAmGroupResult, } -impl TypedAmGroupBatchReq{ +impl TypedAmGroupBatchReq { pub fn new(pe: usize, ids: Vec, reqs: BaseAmGroupReq) -> Self { - Self { - pe, ids, reqs - } + Self { pe, ids, reqs } } pub async fn into_result(self) -> TypedAmGroupBatchResult { @@ -1231,7 +1219,6 @@ impl TypedAmGroupBatchReq{ ids: self.ids, reqs: self.reqs.into_result().await, } - } } @@ -1243,29 +1230,25 @@ pub struct TypedAmGroupValResult { } impl TypedAmGroupValResult { - pub fn new( - reqs: Vec>, - cnt: usize, - num_pes: usize, - ) -> Self { - TypedAmGroupValResult { - reqs, - cnt, - num_pes, - } + pub fn new(reqs: Vec>, cnt: usize, num_pes: usize) -> Self { + TypedAmGroupValResult { reqs, cnt, num_pes } } - pub fn at(&self, index: usize) -> AmGroupResult<'_,T> { + pub fn at(&self, index: usize) -> AmGroupResult<'_, T> { assert!(index < self.cnt, "AmGroupResult index out of bounds"); for req in self.reqs.iter() { if let Ok(idx) = req.ids.binary_search(&index) { match &req.reqs { - BaseAmGroupResult::SinglePeVal(res) => return AmGroupResult::Pe(req.pe, &res[idx]), - BaseAmGroupResult::AllPeVal(res) => return AmGroupResult::All( TypedAmAllIter::Val(TypedAmAllValIter{ - all: res, - cur_pe: 0, - req: idx, - num_pes: self.num_pes, - })), + BaseAmGroupResult::SinglePeVal(res) => { + return AmGroupResult::Pe(req.pe, &res[idx]) + } + BaseAmGroupResult::AllPeVal(res) => { + return AmGroupResult::All(TypedAmAllIter::Val(TypedAmAllValIter { + all: res, + cur_pe: 0, + req: idx, + num_pes: self.num_pes, + })) + } _ => unreachable!(), } } @@ -1284,30 +1267,23 @@ pub struct TypedAmGroupUnitResult { num_pes: usize, } - impl TypedAmGroupUnitResult { - pub fn new( - reqs: Vec>, - cnt: usize, - num_pes: usize, - ) -> Self { - TypedAmGroupUnitResult { - reqs, - cnt, - num_pes - } + pub fn new(reqs: Vec>, cnt: usize, num_pes: usize) -> Self { + TypedAmGroupUnitResult { reqs, cnt, num_pes } } - pub fn at(&self, index: usize) -> AmGroupResult<'_,T> { + pub fn at(&self, index: usize) -> AmGroupResult<'_, T> { assert!(index < self.cnt, "AmGroupResult index out of bounds"); for req in self.reqs.iter() { - if let Ok(idx) = req.ids.binary_search(&index) { + if let Ok(_idx) = req.ids.binary_search(&index) { match &req.reqs { BaseAmGroupResult::SinglePeUnit(res) => return AmGroupResult::Pe(req.pe, &res), - BaseAmGroupResult::AllPeUnit(res) => return AmGroupResult::All( TypedAmAllIter::Unit(TypedAmAllUnitIter{ - all: res, - cur_pe: 0, - num_pes: self.num_pes, - })), + BaseAmGroupResult::AllPeUnit(res) => { + return AmGroupResult::All(TypedAmAllIter::Unit(TypedAmAllUnitIter { + all: res, + cur_pe: 0, + num_pes: self.num_pes, + })) + } _ => unreachable!(), } }