add new models

Bob-Chen222 · Jun 29, 2024 · ac1f1b2 · ac1f1b2
1 parent 34d3698
commit ac1f1b2
Show file tree

Hide file tree

Showing 6 changed files with 239 additions and 43 deletions.
diff --git a/lib/model/include/model/ResNet.h b/lib/model/include/model/ResNet.h
@@ -0,0 +1,11 @@
+#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h"
+#include "pcg/computation_graph_builder.h"
+
+namespace FlexFlow {
+struct Config {
+  Config(void);
+  int batchSize;
+};
+ComputationGraph create_computation_graph();
+ParallelComputationGraph create_parallel_computation_graph();
+} // namespace FlexFlow
diff --git a/lib/model/include/model/candel_uno.h b/lib/model/include/model/candel_uno.h
@@ -0,0 +1,7 @@
+#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h"
+#include "pcg/computation_graph_builder.h"
+
+namespace FlexFlow {
+ComputationGraph create_computation_graph();
+ParallelComputationGraph create_parallel_computation_graph();
+} // namespace FlexFlow
diff --git a/lib/model/include/model/transformer.h b/lib/model/include/model/transformer.h
@@ -1,18 +1,16 @@
-#include "pcg/computation_graph.h"
 #include "pcg/computation_graph_builder.h"
-#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
-
+#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h"
 
 namespace FlexFlow {
 
 struct Config {
   Config(void);
-  int hidden_size, embedding_size, num_heads, num_layers, sequence_length, batchSize;
-  FFOrdered<size_t> ff_ordered;
+  int hidden_size, embedding_size, num_heads, num_layers, sequence_length,
+      batchSize;
 };
 
-ComputationGraph create_computation_graph(Config& config);
+ComputationGraph create_computation_graph(Config &config);
 
-ParallelComputationGraph create_parallel_computation_graph(Config& config);
+ParallelComputationGraph create_parallel_computation_graph(Config &config);
 
-} // namespace FlexFlow
+} // namespace FlexFlow
diff --git a/lib/model/src/ResNet.cc b/lib/model/src/ResNet.cc
@@ -0,0 +1,109 @@
+#include "model/ResNet.h"
+
+
+namespace FlexFlow {
+
+tensor_guid_t BottleneckBlock(ComputationGraphBuilder& builder, tensor_guid_t input, int out_channels, int stride){
+     tensor_guid_t t = builder.conv2d(input, out_channels, 1, 1, 1, 1, 0, 0, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv1");
+     t = builder.conv2d(t, out_channels, 3, 3, stride, stride, 1, 1, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv2");
+     t = builder.conv2d(t, 4 * out_channels, 1, 1, 1, 1, 0, 0, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv3");
+
+     // if case ignored for now
+
+     t = builder.add(input, t, "add");
+     t = builder.relu(t, "relu");
+     return t;
+}
+
+parallel_tensor_guid_t BottleneckBlock(ParallelComputationGraphBuilder& builder, parallel_tensor_guid_t input, int out_channels, int stride){
+     parallel_tensor_guid_t t = builder.conv2d(input, out_channels, 1, 1, 1, 1, 0, 0, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv1");
+     t = builder.conv2d(t, out_channels, 3, 3, stride, stride, 1, 1, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv2");
+     t = builder.conv2d(t, 4 * out_channels, 1, 1, 1, 1, 0, 0, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv3");
+
+     // if case ignored for now
+
+     t = builder.add(input, t, "add");
+     t = builder.relu(t, "relu");
+     return t;
+}
+
+
+
+ComputationGraph create_compute_graph(Config &config) {
+   ComputationGraphBuilder builder;
+    // Create the t tensor 
+    std::vector<size_t> dims = {config.batchSize, 3, 229, 229};
+    DimOrdered<ff_dim_t, size_t> ff_ordered(dims);
+    TensorDims t_dims(ff_ordered);
+    tensor_guid_t t = builder.create_tensor(
+      TensorShape(t_dims, DataType::FLOAT), CreateGrad::YES);
+
+     t = builder.conv2d(t, 64, 7, 7, 2, 2, 3, 3, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv1");
+     t = builder.pool2d(t, 3, 3, 2, 2, 1, 1, PoolOp::MAX, std::nullopt, "pool1");
+     for (int i = 0; i < 3; i++) {
+         t = BottleneckBlock(builder, t, 64, 1);
+     }
+     for (int i = 0; i < 4; i++) {
+         int stride = (i == 0) ? 2 : 1;
+         t = BottleneckBlock(builder, t, 128, stride);
+     }
+     for (int i = 0; i < 6; i++) {
+         int stride = (i == 0) ? 2 : 1;
+         t = BottleneckBlock(builder, t, 256, stride);
+     }
+     for (int i = 0; i < 3; i++) {
+         int stride = (i == 0) ? 2 : 1;
+         t = BottleneckBlock(builder, t, 512, stride);
+     }
+    t = builder.pool2d(t, 7, 7, 1, 1, 0, 0, PoolOp::AVG, std::nullopt, "pool2");
+    t = builder.flat(t, "flat");
+    t = builder.dense(t, 10, std::nullopt, true, DataType::FLOAT, std::nullopt, std::nullopt, "dense");
+    t = builder.softmax(t, -1, "softmax");
+
+    return builder.computation_graph;
+}
+
+
+ParallelComputationGraph create_parallel_computation_graph() {
+     ParallelComputationGraphBuilder builder;
+     ShardParallelDim dim(2, 4);
+     std::vector<ShardParallelDim> dims = {dim, dim, dim};
+     DimOrdered<ff_dim_t, ShardParallelDim> ff_ordered(dims);
+     SumDegree sd(2);
+     DiscardCopyDegree dcd(2);
+     ReplicaParallelDimSet dims2(sd, dcd);
+     ParallelTensorDims t_dims(ff_ordered, dims2);
+     parallel_tensor_guid_t t = builder.create_input_tensor(
+      ParallelTensorShape(t_dims, DataType::FLOAT), true, "input_tensor");
+
+      t = builder.conv2d(t, 64, 7, 7, 2, 2, 3, 3, std::nullopt, 1, true, std::nullopt, std::nullopt, std::nullopt, "conv1");
+     t = builder.pool2d(t, 3, 3, 2, 2, 1, 1, PoolOp::MAX, Activation::RELU, "pool1");
+     for (int i = 0; i < 3; i++) {
+         t = BottleneckBlock(builder, t, 64, 1);
+     }
+     for (int i = 0; i < 4; i++) {
+         int stride = (i == 0) ? 2 : 1;
+         t = BottleneckBlock(builder, t, 128, stride);
+     }
+     for (int i = 0; i < 6; i++) {
+         int stride = (i == 0) ? 2 : 1;
+         t = BottleneckBlock(builder, t, 256, stride);
+     }
+     for (int i = 0; i < 3; i++) {
+         int stride = (i == 0) ? 2 : 1;
+         t = BottleneckBlock(builder, t, 512, stride);
+     }
+    t = builder.pool2d(t, 7, 7, 1, 1, 0, 0, PoolOp::AVG, Activation::RELU, "pool2");
+    t = builder.flat(t, "flat");
+    t = builder.dense(t, 10, std::nullopt, true, DataType::FLOAT, std::nullopt, std::nullopt, "dense");
+    t = builder.softmax(t, -1, "softmax");
+
+    return builder.pcg;
+
+}
+
+
+
+
+
+} // namespace FlexFlow
diff --git a/lib/model/src/candel_uno.cc b/lib/model/src/candel_uno.cc
@@ -0,0 +1 @@
+#include "model/candel_uno.h"
diff --git a/lib/model/src/transformer.cc b/lib/model/src/transformer.cc
@@ -1,6 +1,5 @@
 #include "model/transformer.h"
 
-
 namespace FlexFlow {
 
 Config::Config(void) {
@@ -10,41 +9,112 @@ Config::Config(void) {
   num_layers = 12;
   sequence_length = 512;
   batchSize = 8;
-  ff_ordered;
 }
 
-ComputationGraph create_computation_graph(Config& config) {
-    ComputationGraphBuilder builder;
-    // Create the t tensor 
-    TensorDims& dims(config.ff_ordered);
-    tensor_guid_t t = builder.create_tensor(
-      TensorShape(dims, DataType::FLOAT), CreateGrad::YES);
-
-    for (int i = 0; i < config.num_layers; i++) {
-      tensor_guid_t attention = builder.multihead_attention(
-      t, t, t, config.hidden_size, config.num_heads, config.hidden_size / config.num_heads, config.hidden_size / config.num_heads, 0.0f, true, false, false, {}, "multihead_attention");
-      tensor_guid_t dense1 = builder.dense(attention, config.hidden_size, Activation::RELU, false, DataType::FLOAT, std::nullopt, std::nullopt, "dense1");
-      tensor_guid_t dense2 = builder.dense(dense1, config.hidden_size, std::nullopt, false, DataType::FLOAT, std::nullopt, std::nullopt, "dense2");
-      t = dense2;
-    }
-
-    tensor_guid_t output = builder.dense(t, 1, std::nullopt, false, DataType::FLOAT, std::nullopt, std::nullopt, "output_dense");
-    return builder.computation_graph;
+ComputationGraph create_computation_graph(Config &config) {
+  ComputationGraphBuilder builder;
+  // Create the t tensor
+  std::vector<size_t> dims = {
+      config.batchSize, config.sequence_length, config.hidden_size};
+  DimOrdered<ff_dim_t, size_t> ff_ordered(dims);
+  TensorDims t_dims(ff_ordered);
+  tensor_guid_t t = builder.create_tensor(TensorShape(t_dims, DataType::FLOAT),
+                                          CreateGrad::YES);
+
+  for (int i = 0; i < config.num_layers; i++) {
+    tensor_guid_t attention =
+        builder.multihead_attention(t,
+                                    t,
+                                    t,
+                                    config.hidden_size,
+                                    config.num_heads,
+                                    config.hidden_size / config.num_heads,
+                                    config.hidden_size / config.num_heads,
+                                    0.0f,
+                                    true,
+                                    false,
+                                    false,
+                                    {},
+                                    "multihead_attention");
+    tensor_guid_t dense1 = builder.dense(attention,
+                                         config.hidden_size,
+                                         Activation::RELU,
+                                         false,
+                                         DataType::FLOAT,
+                                         std::nullopt,
+                                         std::nullopt,
+                                         "dense1");
+    tensor_guid_t dense2 = builder.dense(dense1,
+                                         config.hidden_size,
+                                         std::nullopt,
+                                         false,
+                                         DataType::FLOAT,
+                                         std::nullopt,
+                                         std::nullopt,
+                                         "dense2");
+    t = dense2;
+  }
+
+  tensor_guid_t output = builder.dense(t,
+                                       1,
+                                       std::nullopt,
+                                       false,
+                                       DataType::FLOAT,
+                                       std::nullopt,
+                                       std::nullopt,
+                                       "output_dense");
+  return builder.computation_graph;
 }
-ParallelComputationGraph create_parallel_computation_graph(Config& config) {
-    ParallelComputationGraphBuilder builder;
-    // Create the input tensor  
-   parallel_tensor_guid_t t = builder.create_input_tensor(
-      ParallelTensorShape{config.batchSize, config.sequence_length, config.hidden_size}, true, "input_tensor");
-
-    for (int i = 0; i < config.num_layers; i++) {
-      parallel_tensor_guid_t attention = builder.multihead_attention(
-      t, t, t, config.hidden_size, config.num_heads, config.hidden_size/config.num_heads, config.hidden_size / config.num_heads, 0.0f, true, false, false, {}, {}, {}, "multihead_attention");
-      parallel_tensor_guid_t fused_dense = builder.dense(attention, config.hidden_size, Activation::RELU, false, DataType::FLOAT, std::nullopt, std::nullopt, "fused_dense");
-      t = fused_dense;
-    }
-
-    tensor_guid_t output = builder.dense(t, 1, std::nullopt, false, DataType::FLOAT, std::nullopt, std::nullopt, "output_dense");
-    return builder.parallel_computation_graph;
+ParallelComputationGraph create_parallel_computation_graph(Config &config) {
+  ParallelComputationGraphBuilder builder;
+  // Create the input tensor
+
+  ShardParallelDim dim(2, 4);
+  std::vector<ShardParallelDim> dims = {dim, dim, dim};
+  DimOrdered<ff_dim_t, ShardParallelDim> ff_ordered(dims);
+  SumDegree sd(2);
+  DiscardCopyDegree dcd(2);
+  ReplicaParallelDimSet dims2(sd, dcd);
+  ParallelTensorDims t_dims(ff_ordered, dims2);
+  parallel_tensor_guid_t t = builder.create_input_tensor(
+      ParallelTensorShape(t_dims, DataType::FLOAT), true, "input_tensor");
+
+  for (int i = 0; i < config.num_layers; i++) {
+    parallel_tensor_guid_t attention =
+        builder.multihead_attention(t,
+                                    t,
+                                    t,
+                                    config.hidden_size,
+                                    config.num_heads,
+                                    config.hidden_size / config.num_heads,
+                                    config.hidden_size / config.num_heads,
+                                    0.0f,
+                                    true,
+                                    false,
+                                    false,
+                                    {},
+                                    {},
+                                    {},
+                                    "multihead_attention");
+    parallel_tensor_guid_t fused_dense = builder.dense(attention,
+                                                       config.hidden_size,
+                                                       Activation::RELU,
+                                                       false,
+                                                       DataType::FLOAT,
+                                                       std::nullopt,
+                                                       std::nullopt,
+                                                       "fused_dense");
+    t = fused_dense;
+  }
+
+  parallel_tensor_guid_t output = builder.dense(t,
+                                                1,
+                                                std::nullopt,
+                                                false,
+                                                DataType::FLOAT,
+                                                std::nullopt,
+                                                std::nullopt,
+                                                "output_dense");
+  return builder.pcg;
 }
-} // namespace FlexFlow
+} // namespace FlexFlow