Made intermediates formed in contraction expressions be smaller when …

…possible, incremented version to 1.34
cyclops-community · May 11, 2016 · 93e9b9b · 93e9b9b
1 parent a5dbbfa
commit 93e9b9b
Show file tree

Hide file tree

Showing 9 changed files with 131 additions and 53 deletions.
diff --git a/include/ctf.hpp b/include/ctf.hpp
@@ -12,7 +12,7 @@
 #include <complex>
 #include <assert.h>
 
-#define CTF_VERSION 133
+#define CTF_VERSION 134
 
 #include "../src/interface/tensor.h"
 #include "../src/interface/idx_tensor.h"

diff --git a/src/interface/fun_term.cxx b/src/interface/fun_term.cxx
@@ -55,7 +55,7 @@ namespace CTF_int {
   }
 
 
-  void Unifun_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
+  void Unifun_Term::get_inputs(std::set<CTF::Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const {
     A->get_inputs(inputs_set);
   }
 
@@ -131,7 +131,7 @@ namespace CTF_int {
   }
 
 
-  void Bifun_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
+  void Bifun_Term::get_inputs(std::set<CTF::Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const {
     A->get_inputs(inputs_set);
     B->get_inputs(inputs_set);
   }

diff --git a/src/interface/fun_term.h b/src/interface/fun_term.h
@@ -32,7 +32,7 @@ namespace CTF_int {
 
       double  estimate_time(CTF::Idx_Tensor output) const;
 
-      void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
+      void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;
 
       CTF::World * where_am_i() const;
   };
@@ -62,7 +62,7 @@ namespace CTF_int {
 
       double  estimate_time(CTF::Idx_Tensor output) const;
 
-      void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
+      void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;
 
       CTF::World * where_am_i() const;
   };

diff --git a/src/interface/idx_tensor.cxx b/src/interface/idx_tensor.cxx
@@ -299,10 +299,8 @@ namespace CTF {
     return *this;
   }
 
-  void Idx_Tensor::get_inputs(std::set<CTF_int::tensor*, tensor_tid_less >* inputs_set) const {
-    if (parent) {
-      inputs_set->insert(parent);
-    }
+  void Idx_Tensor::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
+    inputs_set->insert((Idx_Tensor*)this);
   }
 
   /*template<typename dtype, bool is_ord>

diff --git a/src/interface/idx_tensor.h b/src/interface/idx_tensor.h
@@ -86,7 +86,7 @@ namespace CTF {
       /**
       * \brief appends the tensors this depends on to the input set
       */
-      void get_inputs(std::set< CTF_int::tensor*, CTF_int::tensor_tid_less >* inputs_set) const;
+      void get_inputs(std::set<Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const;
 
       /**
        * \brief A = B, compute any operations on operand B and set
@@ -319,6 +319,4 @@ namespace CTF {
    * @}
    */
 }
-
-
 #endif
diff --git a/src/interface/schedule.cxx b/src/interface/schedule.cxx
@@ -37,11 +37,11 @@ namespace CTF {
     World * world;
 
     std::vector<TensorOperation*> ops;  // operations to execute
-    std::set<tensor*, tensor_tid_less > local_tensors; // all local tensors used
+    std::set<Idx_Tensor*, tensor_name_less > local_tensors; // all local tensors used
     std::map<tensor*, tensor*> remap; // mapping from global tensor -> local tensor
 
-    std::set<tensor*, tensor_tid_less > global_tensors; // all referenced tensors stored as global tensors
-    std::set<tensor*, tensor_tid_less > output_tensors; // tensors to be written back out, stored as global tensors
+    std::set<Idx_Tensor*, tensor_name_less > global_tensors; // all referenced tensors stored as global tensors
+    std::set<Idx_Tensor*, tensor_name_less > output_tensors; // tensors to be written back out, stored as global tensors
   };
 
   ScheduleTimer Schedule::partition_and_execute() {
@@ -156,21 +156,21 @@ namespace CTF {
     // Create and communicate tensors to subworlds
     schedule_timer.comm_down_time = MPI_Wtime();
     for (comm_op_iter=comm_ops.begin(); comm_op_iter!=comm_ops.end(); comm_op_iter++) {
-      typename std::set<tensor*, tensor_tid_less >::iterator global_tensor_iter;
+      typename std::set<Idx_Tensor*, tensor_name_less >::iterator global_tensor_iter;
       for (global_tensor_iter=comm_op_iter->global_tensors.begin(); global_tensor_iter!=comm_op_iter->global_tensors.end(); global_tensor_iter++) {
-        tensor* local_clone;
+        Idx_Tensor* local_clone;
         if (comm_op_iter->world != NULL) {
-          local_clone = new tensor(*(*global_tensor_iter));//, *comm_op_iter->world);
+          local_clone = new Idx_Tensor(*(*global_tensor_iter));//, *comm_op_iter->world);
         } else {
           local_clone = NULL;
         }
         comm_op_iter->local_tensors.insert(local_clone);
-        comm_op_iter->remap[*global_tensor_iter] = local_clone;
-        (*global_tensor_iter)->add_to_subworld(local_clone, (*global_tensor_iter)->sr->mulid(), (*global_tensor_iter)->sr->addid());
+        comm_op_iter->remap[(*global_tensor_iter)->parent] = local_clone->parent;
+        (*global_tensor_iter)->parent->add_to_subworld(local_clone->parent, (*global_tensor_iter)->sr->mulid(), (*global_tensor_iter)->sr->addid());
       }
-      typename std::set<tensor*, tensor_tid_less >::iterator output_tensor_iter;
+      typename std::set<Idx_Tensor*, tensor_name_less >::iterator output_tensor_iter;
       for (output_tensor_iter=comm_op_iter->output_tensors.begin(); output_tensor_iter!=comm_op_iter->output_tensors.end(); output_tensor_iter++) {
-        assert(comm_op_iter->remap.find(*output_tensor_iter) != comm_op_iter->remap.end());
+        assert(comm_op_iter->remap.find((*output_tensor_iter)->parent) != comm_op_iter->remap.end());
       }
     }
     schedule_timer.comm_down_time = MPI_Wtime() - schedule_timer.comm_down_time;
@@ -201,16 +201,16 @@ namespace CTF {
     // Communicate results back into global
     schedule_timer.comm_up_time = MPI_Wtime();
     for (comm_op_iter=comm_ops.begin(); comm_op_iter!=comm_ops.end(); comm_op_iter++) {
-      typename std::set<tensor*, tensor_tid_less >::iterator output_tensor_iter;
+      typename std::set<Idx_Tensor*, tensor_name_less >::iterator output_tensor_iter;
       for (output_tensor_iter=comm_op_iter->output_tensors.begin(); output_tensor_iter!=comm_op_iter->output_tensors.end(); output_tensor_iter++) {
-        (*output_tensor_iter)->add_from_subworld(comm_op_iter->remap[*output_tensor_iter], (*output_tensor_iter)->sr->mulid(), (*output_tensor_iter)->sr->addid());
+        (*output_tensor_iter)->parent->add_from_subworld(comm_op_iter->remap[(*output_tensor_iter)->parent], (*output_tensor_iter)->sr->mulid(), (*output_tensor_iter)->sr->addid());
       }
     }
     schedule_timer.comm_up_time = MPI_Wtime() - schedule_timer.comm_up_time;
 
     // Clean up local tensors & world
     if ((int64_t)comm_ops.size() > my_color) {
-      typename std::set<tensor*, tensor_tid_less >::iterator local_tensor_iter;
+      typename std::set<Idx_Tensor*, tensor_name_less >::iterator local_tensor_iter;
       for (local_tensor_iter=comm_ops[my_color].local_tensors.begin(); local_tensor_iter!=comm_ops[my_color].local_tensors.end(); local_tensor_iter++) {
         delete *local_tensor_iter;
       }
@@ -279,17 +279,17 @@ namespace CTF {
   void Schedule::add_operation_typed(TensorOperation* op) {
     steps_original.push_back(op);
 
-    std::set<tensor*, tensor_tid_less > op_lhs_set;
+    std::set<Idx_Tensor*, tensor_name_less > op_lhs_set;
     op->get_outputs(&op_lhs_set);
     assert(op_lhs_set.size() == 1); // limited case to make this a bit easier
-    tensor* op_lhs = *op_lhs_set.begin();
+    tensor* op_lhs = (*op_lhs_set.begin())->parent;
 
-    std::set<tensor*, tensor_tid_less > op_deps;
+    std::set<Idx_Tensor*, tensor_name_less > op_deps;
     op->get_inputs(&op_deps);
 
-    typename std::set<tensor*, tensor_tid_less >::iterator deps_iter;
+    typename std::set<Idx_Tensor*, tensor_name_less >::iterator deps_iter;
     for (deps_iter = op_deps.begin(); deps_iter != op_deps.end(); deps_iter++) {
-      tensor* dep = *deps_iter;
+      tensor* dep = (*deps_iter)->parent;
       typename std::map<tensor*, TensorOperation*>::iterator dep_loc = latest_write.find(dep);
       TensorOperation* dep_op;
       if (dep_loc != latest_write.end()) {
@@ -363,13 +363,13 @@ namespace CTF {
     }
   }
 
-  void TensorOperation::get_outputs(std::set<tensor*, tensor_tid_less >* outputs_set) const {
+  void TensorOperation::get_outputs(std::set<Idx_Tensor*, tensor_name_less >* outputs_set) const {
     assert(lhs->parent);
     assert(outputs_set != NULL);
-    outputs_set->insert(lhs->parent);
+    outputs_set->insert(lhs);
   }
 
-  void TensorOperation::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
+  void TensorOperation::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
     rhs->get_inputs(inputs_set);
 
     switch (op) {
@@ -379,7 +379,7 @@ namespace CTF {
     case TENSOR_OP_SUBTRACT:
     case TENSOR_OP_MULTIPLY:
       assert(lhs->parent != NULL);
-      inputs_set->insert(lhs->parent);
+      inputs_set->insert(lhs);
       break;
     default:
       std::cerr << "TensorOperation::get_inputs(): unexpected op: " << op << std::endl;

diff --git a/src/interface/schedule.h b/src/interface/schedule.h
@@ -48,13 +48,13 @@ namespace CTF {
     /**
      * \brief appends the tensors this writes to to the input set
      */
-    void get_outputs(std::set<CTF_int::tensor*, CTF_int::tensor_tid_less >* outputs_set) const;
+    void get_outputs(std::set<Idx_Tensor*, CTF_int::tensor_name_less >* outputs_set) const;
 
     /**
      * \brief appends the tensors this depends on (reads from, including the output
      * if a previous value is required) to the input set
      */
-    void get_inputs(std::set<CTF_int::tensor*, CTF_int::tensor_tid_less >* inputs_set) const;
+    void get_inputs(std::set<Idx_Tensor*, CTF_int::tensor_name_less >* inputs_set) const;
 
     /**
      * \brief runs this operation, but does NOT handle dependency scheduling

diff --git a/src/interface/term.cxx b/src/interface/term.cxx
@@ -38,6 +38,7 @@ namespace CTF_int {
         }
       }
     }
+
 
     idx_C = (char*)alloc(sizeof(char)*order_C);
     sym_C = (int*)alloc(sizeof(int)*order_C);
@@ -92,6 +93,61 @@ namespace CTF_int {
     free(len_C);
     free(idx_C);
     return out;
+
+  }
+
+  Idx_Tensor * get_full_intm(Idx_Tensor& A, 
+                             Idx_Tensor& B,
+                             int num_out_inds,
+                             char const * out_inds){
+    int * len_C, * sym_C;
+    char * idx_C;
+    int order_C, i, j;
+
+    idx_C = (char*)alloc(sizeof(char)*num_out_inds);
+    sym_C = (int*)alloc(sizeof(int)*num_out_inds);
+    len_C = (int*)alloc(sizeof(int)*num_out_inds);
+    order_C = 0;
+    for (j=0; j<num_out_inds; j++){
+      bool found = false;
+      int len = -1;
+      int sym_prev = -1;
+      for (i=0; i<A.parent->order; i++){
+        if (A.idx_map[i] == out_inds[j]){
+          found = true;
+          len = A.parent->lens[i];
+          if (sym_prev != -1) sym_prev = NS;
+          else if (i>0 && order_C>0 && A.idx_map[i-1] == idx_C[order_C-1]) sym_prev = A.parent->sym[i-1];
+          else sym_prev = NS;
+        }
+      }
+      if (!found){
+        for (i=0; i<B.parent->order; i++){
+          if (B.idx_map[i] == out_inds[j]){
+            found = true;
+            len = B.parent->lens[i];
+            if (sym_prev != NS && i>0 && order_C>0 && B.idx_map[i-1] == idx_C[order_C-1]) sym_prev = B.parent->sym[i-1];
+            else sym_prev = NS;
+
+          }
+        }
+      }
+      if (found){
+        idx_C[order_C] = out_inds[j];
+        len_C[order_C] = len;
+        if (sym_prev > 0)
+          sym_C[order_C-1] = sym_prev;
+        sym_C[order_C] = NS;
+        order_C++;
+      }
+    }
+    tensor * tsr_C = new tensor(A.parent->sr, order_C, len_C, sym_C, A.parent->wrld, 1);
+    Idx_Tensor * out = new Idx_Tensor(tsr_C, idx_C);
+    out->is_intm = 1;
+    free(sym_C);
+    free(len_C);
+    free(idx_C);
+    return out;
   }
 
 
@@ -366,7 +422,7 @@ namespace CTF_int {
   }
 
 
-  void Sum_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
+  void Sum_Term::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
     for (int i=0; i<(int)operands.size(); i++){
       operands[i]->get_inputs(inputs_set);
     }
@@ -455,7 +511,22 @@ namespace CTF_int {
         sr->safemul(op_A.scale, op_B.scale, op_A.scale);
         tmp_ops.push_back(op_A.clone());
       } else {
-        Idx_Tensor * intm = get_full_intm(op_A, op_B);
+        std::set<char> uniq_inds;
+        for (int k=0; k<output.parent->order; k++){
+          uniq_inds.insert(output.idx_map[k]);
+        }
+        std::set<Idx_Tensor*, tensor_name_less > inputs;
+        for (int j=0; j<(int)tmp_ops.size(); j++){
+          tmp_ops[j]->get_inputs(&inputs);
+        }
+        for (std::set<Idx_Tensor*>::iterator j=inputs.begin(); j!=inputs.end(); j++){
+          for (int k=0; k<(*j)->parent->order; k++){
+            uniq_inds.insert((*j)->idx_map[k]);
+          }
+        }
+        std::vector<char> arr(uniq_inds.begin(), uniq_inds.end());
+
+        Idx_Tensor * intm = get_full_intm(op_A, op_B, uniq_inds.size(), &(arr[0]));
         sr->safemul(tscale, op_A.scale, tscale);
         sr->safemul(tscale, op_B.scale, tscale);
         contraction c(op_A.parent, op_A.idx_map,
@@ -527,6 +598,7 @@ namespace CTF_int {
         sr->safemul(op_B.scale, op_A.scale, op_A.scale);
         tmp_ops.push_back(op_A.clone());
       } else {
+        printf("HERE2\n");
         Idx_Tensor * intm = get_full_intm(op_A, op_B);
         sr->safemul(tscale, op_A.scale, tscale);
         sr->safemul(tscale, op_B.scale, tscale);
@@ -644,7 +716,7 @@ namespace CTF_int {
 
 
 
-  void Contract_Term::get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const {
+  void Contract_Term::get_inputs(std::set<Idx_Tensor*, tensor_name_less >* inputs_set) const {
     for (int i=0; i<(int)operands.size(); i++){
       operands[i]->get_inputs(inputs_set);
     }
@@ -678,3 +750,22 @@ namespace CTF_int {
 
 }
 
+
+namespace CTF_int {  
+  bool tensor_name_less::operator()(CTF::Idx_Tensor* A, CTF::Idx_Tensor* B) {
+    int d = strcmp(A->parent->name, B->parent->name);
+    if (d>0) return d; else return 1;
+    /*if (A == NULL && B != NULL) {
+      return true;
+    } else if (A == NULL || B == NULL) {
+      return false;
+    }
+    assert(0);//FIXME
+    //return A->tid < B->tid;
+    return -1;*/
+  }
+}
+
+
+
+
diff --git a/src/interface/term.h b/src/interface/term.h
@@ -22,17 +22,8 @@ namespace CTF_int {
    * \brief comparison function for sets of tensor pointers
    * This ensures the set iteration order is consistent across nodes
    */
-  struct tensor_tid_less {
-    bool operator()(tensor* A, tensor* B) {
-      if (A == NULL && B != NULL) {
-        return true;
-      } else if (A == NULL || B == NULL) {
-        return false;
-      }
-      assert(0);//FIXME
-      //return A->tid < B->tid;
-      return -1;
-    }
+  struct tensor_name_less {
+    bool operator()(CTF::Idx_Tensor* A, CTF::Idx_Tensor* B);
   };
 
 
@@ -84,7 +75,7 @@ namespace CTF_int {
       /**
       * \brief appends the tensors this depends on to the input set
       */
-      virtual void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const = 0;
+      virtual void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const = 0;
 
       /**
        * \brief constructs a new term which multiplies by tensor A
@@ -217,7 +208,7 @@ namespace CTF_int {
       /**
       * \brief appends the tensors this depends on to the input set
       */
-      void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
+      void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;
 
       /**
        * \brief constructs a new term by addition of two terms
@@ -278,7 +269,7 @@ namespace CTF_int {
       /**
       * \brief appends the tensors this depends on to the input set
       */
-      void get_inputs(std::set<tensor*, tensor_tid_less >* inputs_set) const;
+      void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;
 
       /**
        * \brief evalues the expression to produce an intermediate with