Skip to content

Commit

Permalink
Made intermediates formed in contraction expressions be smaller when …
Browse files Browse the repository at this point in the history
…possible, incremented version to 1.34
  • Loading branch information
solomonik committed May 11, 2016
1 parent a5dbbfa commit 93e9b9b
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 53 deletions.
2 changes: 1 addition & 1 deletion include/ctf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <complex>
#include <assert.h>

#define CTF_VERSION 133
#define CTF_VERSION 134

#include "../src/interface/tensor.h"
#include "../src/interface/idx_tensor.h"
Expand Down
4 changes: 2 additions & 2 deletions src/interface/fun_term.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace CTF_int {
}


void Unifun_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
void Unifun_Term::get_inputs(std::set<CTF::Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const {
A->get_inputs(inputs_set);
}

Expand Down Expand Up @@ -131,7 +131,7 @@ namespace CTF_int {
}


void Bifun_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
void Bifun_Term::get_inputs(std::set<CTF::Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const {
A->get_inputs(inputs_set);
B->get_inputs(inputs_set);
}
Expand Down
4 changes: 2 additions & 2 deletions src/interface/fun_term.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace CTF_int {

double estimate_time(CTF::Idx_Tensor output) const;

void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;

CTF::World * where_am_i() const;
};
Expand Down Expand Up @@ -62,7 +62,7 @@ namespace CTF_int {

double estimate_time(CTF::Idx_Tensor output) const;

void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;

CTF::World * where_am_i() const;
};
Expand Down
6 changes: 2 additions & 4 deletions src/interface/idx_tensor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,8 @@ namespace CTF {
return *this;
}

void Idx_Tensor::get_inputs(std::set<CTF_int::tensor*, tensor_tid_less >* inputs_set) const {
if (parent) {
inputs_set->insert(parent);
}
void Idx_Tensor::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
inputs_set->insert((Idx_Tensor*)this);
}

/*template<typename dtype, bool is_ord>
Expand Down
4 changes: 1 addition & 3 deletions src/interface/idx_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ namespace CTF {
/**
* \brief appends the tensors this depends on to the input set
*/
void get_inputs(std::set< CTF_int::tensor*, CTF_int::tensor_tid_less >* inputs_set) const;
void get_inputs(std::set<Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const;

/**
* \brief A = B, compute any operations on operand B and set
Expand Down Expand Up @@ -319,6 +319,4 @@ namespace CTF {
* @}
*/
}


#endif
44 changes: 22 additions & 22 deletions src/interface/schedule.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ namespace CTF {
World * world;

std::vector<TensorOperation*> ops; // operations to execute
std::set<tensor*, tensor_tid_less > local_tensors; // all local tensors used
std::set<Idx_Tensor*, tensor_name_less > local_tensors; // all local tensors used
std::map<tensor*, tensor*> remap; // mapping from global tensor -> local tensor

std::set<tensor*, tensor_tid_less > global_tensors; // all referenced tensors stored as global tensors
std::set<tensor*, tensor_tid_less > output_tensors; // tensors to be written back out, stored as global tensors
std::set<Idx_Tensor*, tensor_name_less > global_tensors; // all referenced tensors stored as global tensors
std::set<Idx_Tensor*, tensor_name_less > output_tensors; // tensors to be written back out, stored as global tensors
};

ScheduleTimer Schedule::partition_and_execute() {
Expand Down Expand Up @@ -156,21 +156,21 @@ namespace CTF {
// Create and communicate tensors to subworlds
schedule_timer.comm_down_time = MPI_Wtime();
for (comm_op_iter=comm_ops.begin(); comm_op_iter!=comm_ops.end(); comm_op_iter++) {
typename std::set<tensor*, tensor_tid_less >::iterator global_tensor_iter;
typename std::set<Idx_Tensor*, tensor_name_less >::iterator global_tensor_iter;
for (global_tensor_iter=comm_op_iter->global_tensors.begin(); global_tensor_iter!=comm_op_iter->global_tensors.end(); global_tensor_iter++) {
tensor* local_clone;
Idx_Tensor* local_clone;
if (comm_op_iter->world != NULL) {
local_clone = new tensor(*(*global_tensor_iter));//, *comm_op_iter->world);
local_clone = new Idx_Tensor(*(*global_tensor_iter));//, *comm_op_iter->world);
} else {
local_clone = NULL;
}
comm_op_iter->local_tensors.insert(local_clone);
comm_op_iter->remap[*global_tensor_iter] = local_clone;
(*global_tensor_iter)->add_to_subworld(local_clone, (*global_tensor_iter)->sr->mulid(), (*global_tensor_iter)->sr->addid());
comm_op_iter->remap[(*global_tensor_iter)->parent] = local_clone->parent;
(*global_tensor_iter)->parent->add_to_subworld(local_clone->parent, (*global_tensor_iter)->sr->mulid(), (*global_tensor_iter)->sr->addid());
}
typename std::set<tensor*, tensor_tid_less >::iterator output_tensor_iter;
typename std::set<Idx_Tensor*, tensor_name_less >::iterator output_tensor_iter;
for (output_tensor_iter=comm_op_iter->output_tensors.begin(); output_tensor_iter!=comm_op_iter->output_tensors.end(); output_tensor_iter++) {
assert(comm_op_iter->remap.find(*output_tensor_iter) != comm_op_iter->remap.end());
assert(comm_op_iter->remap.find((*output_tensor_iter)->parent) != comm_op_iter->remap.end());
}
}
schedule_timer.comm_down_time = MPI_Wtime() - schedule_timer.comm_down_time;
Expand Down Expand Up @@ -201,16 +201,16 @@ namespace CTF {
// Communicate results back into global
schedule_timer.comm_up_time = MPI_Wtime();
for (comm_op_iter=comm_ops.begin(); comm_op_iter!=comm_ops.end(); comm_op_iter++) {
typename std::set<tensor*, tensor_tid_less >::iterator output_tensor_iter;
typename std::set<Idx_Tensor*, tensor_name_less >::iterator output_tensor_iter;
for (output_tensor_iter=comm_op_iter->output_tensors.begin(); output_tensor_iter!=comm_op_iter->output_tensors.end(); output_tensor_iter++) {
(*output_tensor_iter)->add_from_subworld(comm_op_iter->remap[*output_tensor_iter], (*output_tensor_iter)->sr->mulid(), (*output_tensor_iter)->sr->addid());
(*output_tensor_iter)->parent->add_from_subworld(comm_op_iter->remap[(*output_tensor_iter)->parent], (*output_tensor_iter)->sr->mulid(), (*output_tensor_iter)->sr->addid());
}
}
schedule_timer.comm_up_time = MPI_Wtime() - schedule_timer.comm_up_time;

// Clean up local tensors & world
if ((int64_t)comm_ops.size() > my_color) {
typename std::set<tensor*, tensor_tid_less >::iterator local_tensor_iter;
typename std::set<Idx_Tensor*, tensor_name_less >::iterator local_tensor_iter;
for (local_tensor_iter=comm_ops[my_color].local_tensors.begin(); local_tensor_iter!=comm_ops[my_color].local_tensors.end(); local_tensor_iter++) {
delete *local_tensor_iter;
}
Expand Down Expand Up @@ -279,17 +279,17 @@ namespace CTF {
void Schedule::add_operation_typed(TensorOperation* op) {
steps_original.push_back(op);

std::set<tensor*, tensor_tid_less > op_lhs_set;
std::set<Idx_Tensor*, tensor_name_less > op_lhs_set;
op->get_outputs(&op_lhs_set);
assert(op_lhs_set.size() == 1); // limited case to make this a bit easier
tensor* op_lhs = *op_lhs_set.begin();
tensor* op_lhs = (*op_lhs_set.begin())->parent;

std::set<tensor*, tensor_tid_less > op_deps;
std::set<Idx_Tensor*, tensor_name_less > op_deps;
op->get_inputs(&op_deps);

typename std::set<tensor*, tensor_tid_less >::iterator deps_iter;
typename std::set<Idx_Tensor*, tensor_name_less >::iterator deps_iter;
for (deps_iter = op_deps.begin(); deps_iter != op_deps.end(); deps_iter++) {
tensor* dep = *deps_iter;
tensor* dep = (*deps_iter)->parent;
typename std::map<tensor*, TensorOperation*>::iterator dep_loc = latest_write.find(dep);
TensorOperation* dep_op;
if (dep_loc != latest_write.end()) {
Expand Down Expand Up @@ -363,13 +363,13 @@ namespace CTF {
}
}

void TensorOperation::get_outputs(std::set<tensor*, tensor_tid_less >* outputs_set) const {
void TensorOperation::get_outputs(std::set<Idx_Tensor*, tensor_name_less >* outputs_set) const {
assert(lhs->parent);
assert(outputs_set != NULL);
outputs_set->insert(lhs->parent);
outputs_set->insert(lhs);
}

void TensorOperation::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
void TensorOperation::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
rhs->get_inputs(inputs_set);

switch (op) {
Expand All @@ -379,7 +379,7 @@ namespace CTF {
case TENSOR_OP_SUBTRACT:
case TENSOR_OP_MULTIPLY:
assert(lhs->parent != NULL);
inputs_set->insert(lhs->parent);
inputs_set->insert(lhs);
break;
default:
std::cerr << "TensorOperation::get_inputs(): unexpected op: " << op << std::endl;
Expand Down
4 changes: 2 additions & 2 deletions src/interface/schedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ namespace CTF {
/**
* \brief appends the tensors this writes to to the input set
*/
void get_outputs(std::set<CTF_int::tensor*, CTF_int::tensor_tid_less >* outputs_set) const;
void get_outputs(std::set<Idx_Tensor*, CTF_int::tensor_name_less >* outputs_set) const;

/**
* \brief appends the tensors this depends on (reads from, including the output
* if a previous value is required) to the input set
*/
void get_inputs(std::set<CTF_int::tensor*, CTF_int::tensor_tid_less >* inputs_set) const;
void get_inputs(std::set<Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const;

/**
* \brief runs this operation, but does NOT handle dependency scheduling
Expand Down
97 changes: 94 additions & 3 deletions src/interface/term.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ namespace CTF_int {
}
}
}


idx_C = (char*)alloc(sizeof(char)*order_C);
sym_C = (int*)alloc(sizeof(int)*order_C);
Expand Down Expand Up @@ -92,6 +93,61 @@ namespace CTF_int {
free(len_C);
free(idx_C);
return out;

}

Idx_Tensor * get_full_intm(Idx_Tensor& A,
Idx_Tensor& B,
int num_out_inds,
char const * out_inds){
int * len_C, * sym_C;
char * idx_C;
int order_C, i, j;

idx_C = (char*)alloc(sizeof(char)*num_out_inds);
sym_C = (int*)alloc(sizeof(int)*num_out_inds);
len_C = (int*)alloc(sizeof(int)*num_out_inds);
order_C = 0;
for (j=0; j<num_out_inds; j++){
bool found = false;
int len = -1;
int sym_prev = -1;
for (i=0; i<A.parent->order; i++){
if (A.idx_map[i] == out_inds[j]){
found = true;
len = A.parent->lens[i];
if (sym_prev != -1) sym_prev = NS;
else if (i>0 && order_C>0 && A.idx_map[i-1] == idx_C[order_C-1]) sym_prev = A.parent->sym[i-1];
else sym_prev = NS;
}
}
if (!found){
for (i=0; i<B.parent->order; i++){
if (B.idx_map[i] == out_inds[j]){
found = true;
len = B.parent->lens[i];
if (sym_prev != NS && i>0 && order_C>0 && B.idx_map[i-1] == idx_C[order_C-1]) sym_prev = B.parent->sym[i-1];
else sym_prev = NS;

}
}
}
if (found){
idx_C[order_C] = out_inds[j];
len_C[order_C] = len;
if (sym_prev > 0)
sym_C[order_C-1] = sym_prev;
sym_C[order_C] = NS;
order_C++;
}
}
tensor * tsr_C = new tensor(A.parent->sr, order_C, len_C, sym_C, A.parent->wrld, 1);
Idx_Tensor * out = new Idx_Tensor(tsr_C, idx_C);
out->is_intm = 1;
free(sym_C);
free(len_C);
free(idx_C);
return out;
}


Expand Down Expand Up @@ -366,7 +422,7 @@ namespace CTF_int {
}


void Sum_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
void Sum_Term::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
for (int i=0; i<(int)operands.size(); i++){
operands[i]->get_inputs(inputs_set);
}
Expand Down Expand Up @@ -455,7 +511,22 @@ namespace CTF_int {
sr->safemul(op_A.scale, op_B.scale, op_A.scale);
tmp_ops.push_back(op_A.clone());
} else {
Idx_Tensor * intm = get_full_intm(op_A, op_B);
std::set<char> uniq_inds;
for (int k=0; k<output.parent->order; k++){
uniq_inds.insert(output.idx_map[k]);
}
std::set<Idx_Tensor*, tensor_name_less > inputs;
for (int j=0; j<(int)tmp_ops.size(); j++){
tmp_ops[j]->get_inputs(&inputs);
}
for (std::set<Idx_Tensor*>::iterator j=inputs.begin(); j!=inputs.end(); j++){
for (int k=0; k<(*j)->parent->order; k++){
uniq_inds.insert((*j)->idx_map[k]);
}
}
std::vector<char> arr(uniq_inds.begin(), uniq_inds.end());

Idx_Tensor * intm = get_full_intm(op_A, op_B, uniq_inds.size(), &(arr[0]));
sr->safemul(tscale, op_A.scale, tscale);
sr->safemul(tscale, op_B.scale, tscale);
contraction c(op_A.parent, op_A.idx_map,
Expand Down Expand Up @@ -527,6 +598,7 @@ namespace CTF_int {
sr->safemul(op_B.scale, op_A.scale, op_A.scale);
tmp_ops.push_back(op_A.clone());
} else {
printf("HERE2\n");
Idx_Tensor * intm = get_full_intm(op_A, op_B);
sr->safemul(tscale, op_A.scale, tscale);
sr->safemul(tscale, op_B.scale, tscale);
Expand Down Expand Up @@ -644,7 +716,7 @@ namespace CTF_int {



void Contract_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
void Contract_Term::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
for (int i=0; i<(int)operands.size(); i++){
operands[i]->get_inputs(inputs_set);
}
Expand Down Expand Up @@ -678,3 +750,22 @@ namespace CTF_int {

}


namespace CTF_int {
bool tensor_name_less::operator()(CTF::Idx_Tensor* A, CTF::Idx_Tensor* B) {
int d = strcmp(A->parent->name, B->parent->name);
if (d>0) return d; else return 1;
/*if (A == NULL && B != NULL) {
return true;
} else if (A == NULL || B == NULL) {
return false;
}
assert(0);//FIXME
//return A->tid < B->tid;
return -1;*/
}
}




19 changes: 5 additions & 14 deletions src/interface/term.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,8 @@ namespace CTF_int {
* \brief comparison function for sets of tensor pointers
* This ensures the set iteration order is consistent across nodes
*/
struct tensor_tid_less {
bool operator()(tensor* A, tensor* B) {
if (A == NULL && B != NULL) {
return true;
} else if (A == NULL || B == NULL) {
return false;
}
assert(0);//FIXME
//return A->tid < B->tid;
return -1;
}
struct tensor_name_less {
bool operator()(CTF::Idx_Tensor* A, CTF::Idx_Tensor* B);
};


Expand Down Expand Up @@ -84,7 +75,7 @@ namespace CTF_int {
/**
* \brief appends the tensors this depends on to the input set
*/
virtual void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const = 0;
virtual void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const = 0;

/**
* \brief constructs a new term which multiplies by tensor A
Expand Down Expand Up @@ -217,7 +208,7 @@ namespace CTF_int {
/**
* \brief appends the tensors this depends on to the input set
*/
void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;

/**
* \brief constructs a new term by addition of two terms
Expand Down Expand Up @@ -278,7 +269,7 @@ namespace CTF_int {
/**
* \brief appends the tensors this depends on to the input set
*/
void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;

/**
* \brief evalues the expression to produce an intermediate with
Expand Down

0 comments on commit 93e9b9b

Please sign in to comment.