diff --git a/src/contraction/contraction.cxx b/src/contraction/contraction.cxx index 76154f88..0fafc79c 100644 --- a/src/contraction/contraction.cxx +++ b/src/contraction/contraction.cxx @@ -4045,7 +4045,7 @@ namespace CTF_int { // global_comm.rank); // DPRINTF(1,"%E bytes of buffer space will be needed for this contraction\n", // (double)ctrf->mem_rec()); - DPRINTF(2,"%E bytes needed, System memory = %E bytes total, %E bytes used, %E bytes available.\n", + printf("%E bytes needed, System memory = %E bytes total, %E bytes used, %E bytes available.\n", (double)ctrf->mem_rec(), (double)proc_bytes_total(), (double)proc_bytes_used(), diff --git a/src/shared/memcontrol.cxx b/src/shared/memcontrol.cxx index 96f2e570..80c41717 100644 --- a/src/shared/memcontrol.cxx +++ b/src/shared/memcontrol.cxx @@ -77,9 +77,10 @@ void read_off_memory_status(statm_t& result) int64_t tot_mem_used; void inc_tot_mem_used(int64_t a){ tot_mem_used += a; - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //int rank; + // MPI_Comm_rank(MPI_COMM_WORLD, &rank); // if (rank == 0) + //printf("INCREMENTING MEMUSAGE BY %ld to %ld\n",a,tot_mem_used); // printf("CTF used memory = %1.5E, Total used memory = %1.5E, available memory via malloc_info is = %1.5E\n", (double)tot_mem_used, (double)proc_bytes_used(), (double)proc_bytes_available()); } #ifndef PRODUCTION diff --git a/src/tensor/untyped_tensor.cxx b/src/tensor/untyped_tensor.cxx index 87ac2e88..be33c042 100644 --- a/src/tensor/untyped_tensor.cxx +++ b/src/tensor/untyped_tensor.cxx @@ -35,7 +35,6 @@ namespace CTF_int { if (order != -1){ if (wrld->rank == 0) DPRINTF(2,"Deleted order %d tensor %s\n",order,name); if (is_folded) unfold(); - cdealloc(name); cdealloc(sym); cdealloc(lens); cdealloc(pad_edge_len); @@ -44,8 +43,8 @@ namespace CTF_int { cdealloc(scp_padding); cdealloc(sym_table); delete [] edge_map; + deregister_size(); if (!is_data_aliased){ - if (has_home) inc_tot_mem_used(-size*sr->el_size); if (is_home){ cdealloc(home_buffer); } else { @@ -57,6 +56,7 @@ namespace CTF_int { if (is_sparse) cdealloc(nnz_blk); order = -1; delete sr; + cdealloc(name); } } @@ -88,6 +88,7 @@ namespace CTF_int { bool profile){ this->init(sr, order,edge_len,sym,wrld,0,name,profile,0); set_distribution(idx, prl, blk); + register_size(this->home_size*sr->el_size); this->data = (char*)CTF_int::alloc(this->size*this->sr->el_size); this->sr->set(this->data, this->sr->addid(), this->size); #ifdef HOME_CONTRACT @@ -188,7 +189,7 @@ namespace CTF_int { CTF_int::cdealloc(this->home_buffer); }*/ this->home_size = other->home_size; - if (other->has_home) inc_tot_mem_used(home_size*sr->el_size); + register_size(this->home_size*sr->el_size); this->home_buffer = (char*)CTF_int::alloc(other->home_size*sr->el_size); if (other->is_home){ this->is_home = 1; @@ -292,6 +293,7 @@ namespace CTF_int { this->nnz_tot = 0; this->nnz_blk = NULL; // this->nnz_loc_max = 0; + this->registered_alloc_size = 0; if (name_ != NULL){ this->name = (char*)alloc(strlen(name_)+1); strcpy(this->name, name_); @@ -551,7 +553,8 @@ namespace CTF_int { /* if (wrld->rank == 0) DPRINTF(3,"Initial size of tensor %d is " PRId64 ",",tensor_id,this->size);*/ CTF_int::alloc_ptr(this->home_size*sr->el_size, (void**)&this->home_buffer); - inc_tot_mem_used(size*sr->el_size); + if (wrld->rank == 0) DPRINTF(2,"Creating home of %s\n",name); + register_size(this->size*sr->el_size); this->data = this->home_buffer; } else { CTF_int::alloc_ptr(this->size*sr->el_size, (void**)&this->data); @@ -1429,8 +1432,7 @@ namespace CTF_int { idx_A[i] = i; } tensor tA(sr, order, lens, sym_A, wrld, 1); - tA.is_home = 0; - tA.has_home = 0; + tA.leave_home_with_buffer(); summation st(this, idx_A, sr->mulid(), &tA, idx_A, sr->mulid()); st.execute(); return tA.read_all_pairs(num_pair, false); @@ -2403,5 +2405,30 @@ namespace CTF_int { wrld->cdt.allred(&nnz_loc, &nnz_tot, 1, MPI_INT64_T, MPI_SUM); } + void tensor::leave_home_with_buffer(){ +#ifdef HOME_CONTRACT + if (this->has_home){ + if (!this->is_home){ + cdealloc(this->home_buffer); + this->home_buffer = this->data; + } + if (wrld->rank == 0) DPRINTF(2,"Deleting home (leave) of %s\n",name); + deregister_size(); + } + this->is_home = 0; + this->has_home = 0; +#endif + } + + void tensor::register_size(int64_t size){ + deregister_size(); + registered_alloc_size = size; + inc_tot_mem_used(registered_alloc_size); + } + + void tensor::deregister_size(){ + inc_tot_mem_used(-registered_alloc_size); + registered_alloc_size = 0; + } } diff --git a/src/tensor/untyped_tensor.h b/src/tensor/untyped_tensor.h index 711ed75f..95ac5066 100644 --- a/src/tensor/untyped_tensor.h +++ b/src/tensor/untyped_tensor.h @@ -94,6 +94,8 @@ namespace CTF_int { mapping * edge_map; /** \brief current size of local tensor data chunk (mapping-dependent) */ int64_t size; + /** \brief size CTF keeps track of for memory usage */ + int64_t registered_alloc_size; /** \brief whether the data is folded/transposed into a (lower-order) tensor */ bool is_folded; /** \brief ordering of the dimensions according to which the tensori s folded */ @@ -654,6 +656,21 @@ namespace CTF_int { * \param[in] nnz_blk number of nonzeros in each block */ void set_new_nnz_glb(int64_t const * nnz_blk); + + /** + * \brief degister home buffer + */ + void leave_home_with_buffer(); + + /** + * \brief register buffer allocation for this tensor + */ + void register_size(int64_t size); + + /** + * \brief deregister buffer allocation for this tensor + */ + void deregister_size(); }; }