flexflow · lockshaw · Sep 30, 2022 · Sep 30, 2022 · Oct 13, 2022 · Oct 18, 2022
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ python/flexflow/core/legion_cffi_header.py
 *.pb.h
 *.o
 *.a
+/debug/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/gdb/pretty_print.py b/gdb/pretty_print.py
@@ -56,21 +56,21 @@ def __init__(self, val):
     def to_string(self):
         toks = []
         ndim = self.val['num_dims']
+        datatype = self.val['data_type']
         for i in range(ndim):
             dim = self.val['dims'][i]
             size = dim['size']
             degree = dim['degree']
             parallel_idx = dim['parallel_idx']
             toks.append(f'{i}=[s={size} d={degree} pi={parallel_idx}]')
-        return f'TensorShape<{" ".join(toks)}>'
+        return f'TensorShape<dt={datatype} {" ".join(toks)}>'
 
 class ParallelTensorBasePrinter:
     def __init__(self, val):
         self.val = val
-    
+
     def to_string(self):
         toks = []
-        toks.append(f'guid={self.val["parallel_tensor_guid"]}')
         ndim = self.val['num_dims']
         for i in range(ndim):
             dim = self.val['dims'][i]

diff --git a/include/flexflow/ffconst.h b/include/flexflow/ffconst.h
@@ -224,7 +224,6 @@ enum {
   OP_GUID_LAST_VALID = 2999999,
   TENSOR_GUID_FIRST_VALID = 3000000,
   TENSOR_GUID_LAST_VALID = 3999999,
-  PARALLEL_TENSOR_GUID_FIRST_VALID = 4000000,
   NODE_GUID_FIRST_VALID = 5000000,
 };
 #endif // _FLEXFLOW_CONST_H_
diff --git a/include/flexflow/model.h b/include/flexflow/model.h
@@ -17,6 +17,7 @@
 #include "accessor.h"
 #include "config.h"
 #include "device.h"
+#include "flexflow/ffconst_utils.h"
 #include "flexflow/node.h"
 #include "flexflow/operator_params.h"
 #include "flexflow/utils/hash_utils.h"
@@ -266,50 +267,6 @@ class Replicate;
 class FusedParallelOp;
 class ParallelOpInfo;
 
-// TODO: Move to an appropriate place
-/*
-  This is used to create a type that recursively replaces value type
-  ParallelTensor by ParallelTensorShape in T. E.g., ToShape<std::tuple<int,
-  ParallelTensor>>::type gives std::tuple<int, ParallelTensorShape>
-*/
-template <typename T>
-struct ToShape {
-  using type = T;
-};
-
-template <>
-struct ToShape<ParallelTensor> {
-  using type = ParallelTensorShape;
-};
-
-template <typename... Args, template <typename...> class Container>
-struct ToShape<Container<Args...>> {
-  using type = Container<typename ToShape<Args>::type...>;
-};
-
-// TODO: Move to an appropriate place
-template <typename Input>
-typename ToShape<Input>::type get_input_shape(Input const &input) = delete;
-
-template <>
-std::tuple<> get_input_shape(std::tuple<> const &);
-
-template <>
-std::tuple<ParallelTensorShape, ParallelTensorShape, ParallelTensorShape>
-    get_input_shape(
-        std::tuple<ParallelTensor, ParallelTensor, ParallelTensor> const &);
-
-template <>
-ParallelTensorShape get_input_shape(ParallelTensor const &input);
-
-template <>
-std::pair<ParallelTensorShape, ParallelTensorShape>
-    get_input_shape(std::pair<ParallelTensor, ParallelTensor> const &inputs);
-
-template <>
-std::vector<ParallelTensorShape>
-    get_input_shape(std::vector<ParallelTensor> const &inputs);
-
 class FFModel {
 public:
   FFModel(FFConfig &config);
@@ -650,40 +607,40 @@ class FFModel {
   // Internal PCG::Node creation APIs
   // ========================================
   template <typename T>
-  PCG::Node get_or_create_node(const typename T::Input &input,
+  PCG::Node get_or_create_node(std::vector<ParallelTensor> const &input,
                                typename T::Params const &params) {
     using Params = typename T::Params;
 
-    auto input_shapes = get_input_shape<typename T::Input>(input);
+    std::vector<ParallelTensorShape> input_shapes;
+    for (ParallelTensor const &t : input) {
+      input_shapes.push_back(t->get_shape());
+    }
 
     if (!params.is_valid(input_shapes)) {
       return PCG::Node::INVALID_NODE;
     }
 
     T *op = nullptr;
 
-    std::pair<typename ToShape<typename T::Input>::type, Params> key{
-        input_shapes, params};
-    auto &cache = get<std::unordered_map<
-        std::pair<typename ToShape<typename T::Input>::type, Params>,
-        T *>>(this->cached_ops);
-    auto const &it = cache.find(key);
-    if (it != cache.end()) {
-      op = it->second;
+    std::pair<std::vector<ParallelTensorShape>, Params> key{input_shapes,
+                                                            params};
+    auto const &it = cached_ops.find(key);
+    if (it != cached_ops.end()) {
+      op = (T *)it->second;
     } else {
       op = new T(*this, params, input);
-      cache[key] = op;
+      cached_ops[key] = op;
     }
 
-    assert(op->get_params() == params);
+    if (!(op->get_params() == params)) {
+      std::ostringstream oss;
+      oss << "Param reconstruction invalid for operator type "
+          << get_operator_type_name(op->op_type);
+      throw std::runtime_error(oss.str());
+    }
     return this->new_node(op);
   }
 
-  PCG::Node get_or_create_noop_node(const ParallelTensor input);
-  PCG::Node get_or_create_input_node(ParallelTensorShape const &);
-  PCG::Node get_or_create_fused_parallel_node(
-      const ParallelTensor input,
-      std::vector<ParallelOpInfo> const &parallel_ops);
   PCG::Node get_or_create_parallel_op_node(const ParallelTensor input,
                                            ParallelOpInfo const &);
   // ========================================
@@ -800,7 +757,7 @@ class FFModel {
 
 public:
   size_t op_global_guid, layer_global_guid;
-  size_t tensor_global_guid, parallel_tensor_global_guid, node_global_guid;
+  size_t tensor_global_guid, node_global_guid;
   FFConfig config;
   FFIterationConfig iter_config;
   Optimizer *optimizer;
@@ -819,56 +776,9 @@ class FFModel {
   FFHandler handlers[MAX_NUM_WORKERS];
   Legion::Future current_metrics;
   // Cached operators: key: operator hash, value: operator pointer
-  std::tuple<
-      std::unordered_map<
-          std::pair<std::pair<ParallelTensorShape, ParallelTensorShape>,
-                    BatchMatmulParams>,
-          BatchMatmul *>,
-      std::unordered_map<std::pair<ParallelTensorShape, CastParams>, Cast *>,
-      std::unordered_map<
-          std::pair<std::vector<ParallelTensorShape>, ConcatParams>,
-          Concat *>,
-      std::unordered_map<std::pair<ParallelTensorShape, Conv2DParams>,
-                         Conv2D *>,
-      std::unordered_map<std::pair<ParallelTensorShape, DropoutParams>,
-                         Dropout *>,
-      std::unordered_map<
-          std::pair<std::pair<ParallelTensorShape, ParallelTensorShape>,
-                    ElementBinaryParams>,
-          ElementBinary *>,
-      std::unordered_map<std::pair<ParallelTensorShape, ElementUnaryParams>,
-                         ElementUnary *>,
-      std::unordered_map<std::pair<ParallelTensorShape, EmbeddingParams>,
-                         Embedding *>,
-      std::unordered_map<std::pair<ParallelTensorShape, FlatParams>, Flat *>,
-      std::unordered_map<std::pair<ParallelTensorShape, LayerNormParams>,
-                         LayerNorm *>,
-      std::unordered_map<std::pair<ParallelTensorShape, LinearParams>,
-                         Linear *>,
-      std::unordered_map<std::pair<ParallelTensorShape, Pool2DParams>,
-                         Pool2D *>,
-      std::unordered_map<std::pair<std::tuple<ParallelTensorShape,
-                                              ParallelTensorShape,
-                                              ParallelTensorShape>,
-                                   MultiHeadAttentionParams>,
-                         MultiHeadAttention *>,
-      std::unordered_map<std::pair<ParallelTensorShape, ReshapeParams>,
-                         Reshape *>,
-      std::unordered_map<std::pair<ParallelTensorShape, SplitParams>, Split *>,
-      std::unordered_map<std::pair<ParallelTensorShape, SoftmaxParams>,
-                         Softmax *>,
-      std::unordered_map<std::pair<ParallelTensorShape, TransposeParams>,
-                         Transpose *>,
-      std::unordered_map<std::pair<ParallelTensorShape, RepartitionParams>,
-                         Repartition *>,
-      std::unordered_map<std::pair<ParallelTensorShape, ReplicateParams>,
-                         Replicate *>,
-      std::unordered_map<std::pair<ParallelTensorShape, ReductionParams>,
-                         Reduction *>,
-      std::unordered_map<std::pair<ParallelTensorShape, CombineParams>,
-                         Combine *>,
-      std::unordered_map<std::pair<ParallelTensorShape, FusedParallelOpParams>,
-                         FusedParallelOp *>>
+  std::unordered_map<
+      std::pair<std::vector<ParallelTensorShape>, OperatorParameters>,
+      Op *>
       cached_ops;
   std::unordered_map<size_t, NoOp *> cached_noop_ops;
   std::unordered_map<size_t, NoOp *> cached_input_ops;

diff --git a/include/flexflow/operator.h b/include/flexflow/operator.h
@@ -178,6 +178,14 @@ class Op {
      int numWeights,
      int numOutputs,
      ParallelTensor const *tensors);
+  Op(FFModel &model,
+     OperatorType type,
+     DataType dtype,
+     char const *name,
+     int numWeights,
+     int numOutputs,
+     std::vector<ParallelTensor> const &inputs);
+
   // graph substitution related methods
   virtual bool get_int_parameter(PMParameter, int *) const;
   virtual bool get_tensor_parameter(TNParameter, DIMParameter, int *) const;
@@ -227,6 +235,8 @@ class Op {
 
   virtual tl::optional<RecordFormatter> as_dot() const;
 
+  std::vector<ParallelTensor> get_inputs() const;
+
   int get_dimension() const;
 #ifdef FF_USE_NCCL
   static ncclUniqueId get_nccl_unique_id_task(

diff --git a/include/flexflow/operator_params.h b/include/flexflow/operator_params.h
@@ -1,28 +1,29 @@
 #ifndef _OPERATOR_PARAMS_H
 #define _OPERATOR_PARAMS_H
 
-#include "flexflow/ops/attention_params.h"
-#include "flexflow/ops/batch_matmul_params.h"
-#include "flexflow/ops/cast_params.h"
-#include "flexflow/ops/concat_params.h"
-#include "flexflow/ops/conv_2d_params.h"
-#include "flexflow/ops/dropout_params.h"
-#include "flexflow/ops/element_binary_params.h"
-#include "flexflow/ops/element_unary_params.h"
-#include "flexflow/ops/embedding_params.h"
-#include "flexflow/ops/flat_params.h"
-#include "flexflow/ops/layer_norm_params.h"
-#include "flexflow/ops/linear_params.h"
-#include "flexflow/ops/pool_2d_params.h"
-#include "flexflow/ops/reshape_params.h"
-#include "flexflow/ops/softmax_params.h"
-#include "flexflow/ops/split_params.h"
-#include "flexflow/ops/transpose_params.h"
-#include "flexflow/parallel_ops/combine_params.h"
-#include "flexflow/parallel_ops/fused_parallel_op_params.h"
-#include "flexflow/parallel_ops/partition_params.h"
-#include "flexflow/parallel_ops/reduction_params.h"
-#include "flexflow/parallel_ops/replicate_params.h"
+#include "flexflow/ops/params/attention_params.h"
+#include "flexflow/ops/params/batch_matmul_params.h"
+#include "flexflow/ops/params/cast_params.h"
+#include "flexflow/ops/params/concat_params.h"
+#include "flexflow/ops/params/conv_2d_params.h"
+#include "flexflow/ops/params/dropout_params.h"
+#include "flexflow/ops/params/element_binary_params.h"
+#include "flexflow/ops/params/element_unary_params.h"
+#include "flexflow/ops/params/embedding_params.h"
+#include "flexflow/ops/params/flat_params.h"
+#include "flexflow/ops/params/layer_norm_params.h"
+#include "flexflow/ops/params/linear_params.h"
+#include "flexflow/ops/params/noop_params.h"
+#include "flexflow/ops/params/pool_2d_params.h"
+#include "flexflow/ops/params/reshape_params.h"
+#include "flexflow/ops/params/softmax_params.h"
+#include "flexflow/ops/params/split_params.h"
+#include "flexflow/ops/params/transpose_params.h"
+#include "flexflow/parallel_ops/params/combine_params.h"
+#include "flexflow/parallel_ops/params/fused_parallel_op_params.h"
+#include "flexflow/parallel_ops/params/partition_params.h"
+#include "flexflow/parallel_ops/params/reduction_params.h"
+#include "flexflow/parallel_ops/params/replicate_params.h"
 #include "mpark/variant.hpp"
 
 namespace mp = mpark;
@@ -41,6 +42,7 @@ using OperatorParameters = mp::variant<BatchMatmulParams,
                                        LayerNormParams,
                                        LinearParams,
                                        MultiHeadAttentionParams,
+                                       NoOpParams,
                                        Pool2DParams,
                                        ReshapeParams,
                                        SplitParams,

diff --git a/include/flexflow/ops/attention.h b/include/flexflow/ops/attention.h
@@ -7,7 +7,7 @@
 #include "flexflow/node.h"
 #include "flexflow/op_meta.h"
 #include "flexflow/operator.h"
-#include "flexflow/ops/attention_params.h"
+#include "flexflow/ops/params/attention_params.h"
 
 namespace FlexFlow {
 
@@ -16,7 +16,6 @@ class MultiHeadAttentionMeta;
 class MultiHeadAttention : public Op {
 public:
   using Params = MultiHeadAttentionParams;
-  using Input = std::tuple<ParallelTensor, ParallelTensor, ParallelTensor>;
 
   MultiHeadAttention(FFModel &model,
                      LayerID const &layer_guid,
@@ -56,7 +55,7 @@ class MultiHeadAttention : public Op {
                      bool allocate_weights);
   MultiHeadAttention(FFModel &model,
                      Params const &params,
-                     Input const &inputs,
+                     std::vector<ParallelTensor> const &inputs,
                      bool allocate_weights = false,
                      char const *name = nullptr);
   static Op *

diff --git a/include/flexflow/ops/batch_matmul.h b/include/flexflow/ops/batch_matmul.h
@@ -14,10 +14,9 @@ class BatchMatmulMeta : public OpMeta {
 class BatchMatmul : public Op {
 public:
   using Params = BatchMatmulParams;
-  using Input = std::pair<ParallelTensor, ParallelTensor>;
   BatchMatmul(FFModel &model,
               BatchMatmulParams const &params,
-              Input const &inputs,
+              std::vector<ParallelTensor> const &inputs,
               char const *name = nullptr);
 
   BatchMatmul(FFModel &model,
@@ -38,8 +37,7 @@ class BatchMatmul : public Op {
   void serialize(Legion::Serializer &) const override;
   static PCG::Node deserialize(FFModel &ff,
                                Legion::Deserializer &d,
-                               ParallelTensor inputs[],
-                               int num_inputs);
+                               std::vector<ParallelTensor> const &inputs);
   Op *materialize(FFModel &ff,
                   ParallelTensor inputs[],
                   int num_inputs) const override;

diff --git a/include/flexflow/ops/cast.h b/include/flexflow/ops/cast.h
@@ -19,7 +19,7 @@
 #include "flexflow/node.h"
 #include "flexflow/op_meta.h"
 #include "flexflow/operator.h"
-#include "flexflow/ops/cast_params.h"
+#include "flexflow/ops/params/cast_params.h"
 
 namespace FlexFlow {
 
@@ -32,14 +32,13 @@ class CastMeta : public OpMeta {
 class Cast : public Op {
 public:
   using Params = CastParams;
-  using Input = ParallelTensor;
   Cast(FFModel &model,
        ParallelTensor const &input,
        DataType dtype,
        char const *name);
   Cast(FFModel &model,
        Params const &params,
-       Input const &input,
+       std::vector<ParallelTensor> const &input,
        char const *name = nullptr);
   void init(FFModel const &);
   void forward(FFModel const &);
@@ -110,8 +109,7 @@ class Cast : public Op {
   void serialize(Legion::Serializer &s) const override;
   static PCG::Node deserialize(FFModel &ff,
                                Legion::Deserializer &d,
-                               ParallelTensor inputs[],
-                               int num_inputs);
+                               std::vector<ParallelTensor> const &);
   Op *materialize(FFModel &ff,
                   ParallelTensor inputs[],
                   int num_inputs) const override;