Using cudaMemcpyAsync now, removed unused code. Working for non-fused…

… setting
flexflow · Aug 7, 2024 · 30b3fae · 30b3fae
1 parent f1444e9
commit 30b3fae
Show file tree

Hide file tree

Showing 7 changed files with 23 additions and 273 deletions.
diff --git a/include/flexflow/operator.h b/include/flexflow/operator.h
@@ -301,20 +301,17 @@ class Op {
 
     // save all inputs
     for (int i = 0; i < input_tensors.size(); i++) {
-      std::cout<<"input tensor "<<i<<": "<<input_tensors[i].domain.lo()<<" "<<input_tensors[i].domain.hi()<<std::endl;
       std::string filename = dst_filepath.string() + ".input_";
       if (fwd_pass) {
         filename += std::to_string(i);
       } else {
         filename += "gradient_" + std::to_string(i);
       }
       if (input_tensors[i].data_type == DT_FLOAT) {
-        std::cout<<"saving tensor as float"<<std::endl;
         save_tensor(input_tensors[i].get_float_ptr(),
                     input_tensors[i].domain.get_volume(),
                     filename.c_str());
       } else if (input_tensors[i].data_type == DT_HALF) {
-        std::cout<<"saving tensor as half"<<std::endl;
         save_tensor(input_tensors[i].get_half_ptr(),
                     input_tensors[i].domain.get_volume(),
                     filename.c_str());
@@ -365,20 +362,17 @@ class Op {
 
     // save all outputs
     for (int i = 0; i < output_tensors.size(); i++) {
-      std::cout<<"output tensor "<<i<<": "<<output_tensors[i].domain.lo()<<" "<<output_tensors[i].domain.hi()<<std::endl;
       std::string filename = dst_filepath.string() + ".output_";
       if (fwd_pass) {
         filename += std::to_string(i);
       } else {
         filename += "gradient_" + std::to_string(i);
       }
       if (output_tensors[i].data_type == DT_FLOAT) {
-        std::cout<<"saving tensor as float"<<std::endl;
         save_tensor(output_tensors[i].get_float_ptr(),
                     output_tensors[i].domain.get_volume(),
                     filename.c_str());
       } else if (output_tensors[i].data_type == DT_HALF) {
-        std::cout<<"saving tensor as half"<<std::endl;
         save_tensor(output_tensors[i].get_half_ptr(),
                     output_tensors[i].domain.get_volume(),
                     filename.c_str());

diff --git a/include/flexflow/ops/inc_multihead_self_attention.h b/include/flexflow/ops/inc_multihead_self_attention.h
@@ -125,10 +125,7 @@ class IncMultiHeadSelfAttention : public Op {
                                        BatchConfig const *bc,
                                        int shard_id,
                                        GenericTensorAccessorR const &input,
-                                      //  GenericTensorAccessorR const &weight,
-                                       GenericTensorAccessorW const &output
-                                      //  GenericTensorAccessorR const &bias);
-  );
+                                       GenericTensorAccessorW const &output);
   static void peft_bwd_kernel_wrapper(IncMultiHeadSelfAttentionMeta *m,
                                       BatchConfig const *bc,
                                       int shard_id,

diff --git a/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h b/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h
@@ -94,9 +94,7 @@ template <typename DT>
 void compute_qkv_kernel(IncMultiHeadSelfAttentionMeta const *m,
                         BatchConfig const *bc,
                         int shard_id,
-                        // DT const *weight_ptr,
                         DT *output_ptr,
-                        // DT const *bias_ptr,
                         ffStream_t stream);
 
 template <typename DT>