diff --git a/bin/export-model-arch/src/export_model_arch.cc b/bin/export-model-arch/src/export_model_arch.cc index 64419acce4..18ad3c908f 100644 --- a/bin/export-model-arch/src/export_model_arch.cc +++ b/bin/export-model-arch/src/export_model_arch.cc @@ -3,6 +3,7 @@ #include "export_model_arch/json_sp_model_export.dtg.h" #include "models/bert/bert.h" #include "models/candle_uno/candle_uno.h" +#include "models/dlrm/dlrm.h" #include "models/inception_v3/inception_v3.h" #include "models/split_test/split_test.h" #include "models/transformer/transformer.h" @@ -68,6 +69,8 @@ tl::expected return get_candle_uno_computation_graph(get_default_candle_uno_config()); } else if (model_name == "bert") { return get_bert_computation_graph(get_default_bert_config()); + } else if (model_name == "dlrm") { + return get_dlrm_computation_graph(get_default_dlrm_config()); } else if (model_name == "split_test") { int batch_size = 8; return get_split_test_computation_graph(batch_size); @@ -143,6 +146,7 @@ int main(int argc, char **argv) { "inception_v3", "candle_uno", "bert", + "dlrm", "split_test", "single_operator"}; CLIArgumentKey key_model_name = cli_add_positional_argument( diff --git a/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc b/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc index 2b59669aad..f307e430e3 100644 --- a/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc +++ b/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc @@ -1,6 +1,7 @@ #include "compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.h" #include "models/bert/bert.h" #include "models/candle_uno/candle_uno.h" +#include "models/dlrm/dlrm.h" #include "models/inception_v3/inception_v3.h" #include "models/split_test/split_test.h" #include "models/transformer/transformer.h" @@ -324,6 +325,16 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(sp_decomposition.has_value()); } + + SUBCASE("dlrm") { + ComputationGraph cg = + get_dlrm_computation_graph(get_default_dlrm_config()); + + std::optional sp_decomposition = + get_computation_graph_series_parallel_decomposition(cg); + + CHECK(sp_decomposition.has_value()); + } } } @@ -393,5 +404,13 @@ TEST_SUITE(FF_TEST_SUITE) { std::string result = render_preprocessed_computation_graph_for_sp_decomposition(cg); } + + SUBCASE("dlrm") { + ComputationGraph cg = + get_dlrm_computation_graph(get_default_dlrm_config()); + + std::string result = + render_preprocessed_computation_graph_for_sp_decomposition(cg); + } } } diff --git a/lib/models/include/models/dlrm/dlrm.h b/lib/models/include/models/dlrm/dlrm.h new file mode 100644 index 0000000000..9f941176ee --- /dev/null +++ b/lib/models/include/models/dlrm/dlrm.h @@ -0,0 +1,51 @@ +/** + * @file dlrm.h + * + * @brief DLRM model + * + * @details The DLRM implementation refers to the examples at + * https://github.com/flexflow/FlexFlow/blob/inference/examples/cpp/DLRM/dlrm.cc + * and + * https://github.com/pytorch/torchrec/blob/main/torchrec/models/dlrm.py#L440. + */ + +#ifndef _FLEXFLOW_LIB_MODELS_INCLUDE_MODELS_DLRM_H +#define _FLEXFLOW_LIB_MODELS_INCLUDE_MODELS_DLRM_H + +#include "models/dlrm/dlrm_config.dtg.h" +#include "pcg/computation_graph_builder.h" + +namespace FlexFlow { + +// Helper functions to construct the DLRM model + +DLRMConfig get_default_dlrm_config(); + +tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, + DLRMConfig const &config, + tensor_guid_t const &input, + std::vector const &mlp_layers); + +tensor_guid_t create_dlrm_sparse_embedding_network(ComputationGraphBuilder &cgb, + DLRMConfig const &config, + tensor_guid_t const &input, + int input_dim, + int output_dim); + +tensor_guid_t create_dlrm_interact_features( + ComputationGraphBuilder &cgb, + DLRMConfig const &config, + tensor_guid_t const &bottom_mlp_output, + std::vector const &emb_outputs); + +/** + * @brief Get the DLRM computation graph. + * + * @param DLRMConfig The config of DLRM model. + * @return ComputationGraph The computation graph of a DLRM model. + */ +ComputationGraph get_dlrm_computation_graph(DLRMConfig const &config); + +} // namespace FlexFlow + +#endif diff --git a/lib/models/include/models/dlrm/dlrm_config.struct.toml b/lib/models/include/models/dlrm/dlrm_config.struct.toml new file mode 100644 index 0000000000..08fb6837ed --- /dev/null +++ b/lib/models/include/models/dlrm/dlrm_config.struct.toml @@ -0,0 +1,53 @@ +namespace = "FlexFlow" +name = "DLRMConfig" + +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +includes = [ + "", + "", +] + +src_includes = [ + "utils/fmt/vector.h", + "utils/hash/vector.h", +] + +[[fields]] +name = "embedding_dim" +type = "int" + +[[fields]] +name = "embedding_bag_size" +type = "size_t" + +[[fields]] +name = "embedding_size" +type = "std::vector" + +[[fields]] +name = "dense_arch_layer_sizes" +type = "std::vector" + +[[fields]] +name = "over_arch_layer_sizes" +type = "std::vector" + +[[fields]] +name = "arch_interaction_op" +type = "std::string" + +[[fields]] +name = "batch_size" +type = "size_t" + +[[fields]] +name = "seed" +type = "int" diff --git a/lib/models/src/models/dlrm/dlrm.cc b/lib/models/src/models/dlrm/dlrm.cc new file mode 100644 index 0000000000..e98407fe5a --- /dev/null +++ b/lib/models/src/models/dlrm/dlrm.cc @@ -0,0 +1,170 @@ +#include "models/dlrm/dlrm.h" +#include "pcg/computation_graph.h" +#include "utils/containers/concat_vectors.h" +#include "utils/containers/transform.h" +#include "utils/containers/zip.h" + +namespace FlexFlow { + +/** + * @brief Get the default DLRM config. + * + * @details The configs here refer to the example at + * https://github.com/flexflow/FlexFlow/blob/inference/examples/cpp/DLRM/dlrm.cc. + */ +DLRMConfig get_default_dlrm_config() { + return DLRMConfig{ + /*embedding_dim=*/64, + /*embedding_bag_size=*/1, + /*embedding_size=*/ + std::vector{ + 1000000, + 1000000, + 1000000, + 1000000, + }, + /*dense_arch_layer_sizes=*/ + std::vector{ + 4, + 64, + 64, + }, + /*over_arch_layer_sizes=*/ + std::vector{ + 64, + 64, + 2, + }, + /*arch_interaction_op=*/"cat", + /*batch_size=*/64, + /*seed=*/std::rand(), + }; +} + +tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, + DLRMConfig const &config, + tensor_guid_t const &input, + std::vector const &mlp_layers) { + tensor_guid_t t = input; + for (size_t i = 0; i < mlp_layers.size() - 1; i++) { + float std_dev = sqrt(2.0f / (mlp_layers[i + 1] + mlp_layers[i])); + InitializerAttrs projection_initializer = + InitializerAttrs{NormInitializerAttrs{ + /*seed=*/config.seed, + /*mean=*/0, + /*stddev=*/std_dev, + }}; + + std_dev = sqrt(2.0f / mlp_layers[i + 1]); + InitializerAttrs bias_initializer = InitializerAttrs{NormInitializerAttrs{ + /*seed=*/config.seed, + /*mean=*/0, + /*stddev=*/std_dev, + }}; + + t = cgb.dense(/*input=*/t, + /*outDim=*/mlp_layers[i + 1], + /*activation=*/Activation::RELU, + /*use_bias=*/true, + /*data_type=*/DataType::FLOAT, + /*projection_initializer=*/projection_initializer, + /*bias_initializer=*/bias_initializer); + } + return t; +} + +tensor_guid_t create_dlrm_sparse_embedding_network(ComputationGraphBuilder &cgb, + DLRMConfig const &config, + tensor_guid_t const &input, + int input_dim, + int output_dim) { + float range = sqrt(1.0f / input_dim); + InitializerAttrs embed_initializer = InitializerAttrs{UniformInitializerAttrs{ + /*seed=*/config.seed, + /*min_val=*/-range, + /*max_val=*/range, + }}; + + tensor_guid_t t = cgb.embedding(input, + /*num_entries=*/input_dim, + /*outDim=*/output_dim, + /*aggr=*/AggregateOp::SUM, + /*dtype=*/DataType::HALF, + /*kernel_initializer=*/embed_initializer); + return cgb.cast(t, DataType::FLOAT); +} + +tensor_guid_t create_dlrm_interact_features( + ComputationGraphBuilder &cgb, + DLRMConfig const &config, + tensor_guid_t const &bottom_mlp_output, + std::vector const &emb_outputs) { + if (config.arch_interaction_op != "cat") { + throw mk_runtime_error(fmt::format( + "Currently only arch_interaction_op=cat is supported, but found " + "arch_interaction_op={}. If you need support for additional " + "arch_interaction_op value, please create an issue.", + config.arch_interaction_op)); + } + + return cgb.concat( + /*tensors=*/concat_vectors({bottom_mlp_output}, emb_outputs), + /*axis=*/1); +} + +ComputationGraph get_dlrm_computation_graph(DLRMConfig const &config) { + ComputationGraphBuilder cgb; + + auto create_input_tensor = [&](FFOrdered const &dims, + DataType const &data_type) -> tensor_guid_t { + TensorShape input_shape = TensorShape{ + TensorDims{dims}, + data_type, + }; + return cgb.create_input(input_shape, CreateGrad::YES); + }; + + // Create input tensors + std::vector sparse_inputs( + config.embedding_size.size(), + create_input_tensor({config.batch_size, config.embedding_bag_size}, + DataType::INT64)); + + tensor_guid_t dense_input = create_input_tensor( + {config.batch_size, config.dense_arch_layer_sizes.front()}, + DataType::FLOAT); + + // Construct the model + tensor_guid_t bottom_mlp_output = create_dlrm_mlp( + /*cgb=*/cgb, + /*config=*/config, + /*input=*/dense_input, + /*mlp_layers=*/config.dense_arch_layer_sizes); + + std::vector emb_outputs; + for (size_t i = 0; i < config.embedding_size.size(); i++) { + int input_dim = config.embedding_size.at(i); + emb_outputs.emplace_back(create_dlrm_sparse_embedding_network( + /*cgb=*/cgb, + /*config=*/config, + /*input=*/sparse_inputs.at(i), + /*input_dim=*/input_dim, + /*output_dim=*/config.embedding_dim)); + } + + tensor_guid_t interacted_features = create_dlrm_interact_features( + /*cgb=*/cgb, + /*config=*/config, + /*bottom_mlp_output=*/bottom_mlp_output, + /*emb_outputs=*/emb_outputs); + + tensor_guid_t output = create_dlrm_mlp( + /*cgb=*/cgb, + /*config=*/config, + /*input=*/interacted_features, + /*mlp_layers=*/config.over_arch_layer_sizes); + + return cgb.computation_graph; +} + +} // namespace FlexFlow diff --git a/lib/models/test/src/models/dlrm/dlrm.cc b/lib/models/test/src/models/dlrm/dlrm.cc new file mode 100644 index 0000000000..97c528254f --- /dev/null +++ b/lib/models/test/src/models/dlrm/dlrm.cc @@ -0,0 +1,19 @@ +#include "models/dlrm/dlrm.h" +#include "pcg/computation_graph.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_dlrm_computation_graph") { + DLRMConfig config = get_default_dlrm_config(); + + ComputationGraph result = get_dlrm_computation_graph(config); + + SUBCASE("num layers") { + int result_num_layers = get_layers(result).size(); + int correct_num_layers = 27; + CHECK(result_num_layers == correct_num_layers); + } + } +} diff --git a/lib/pcg/src/pcg/computation_graph_builder.cc b/lib/pcg/src/pcg/computation_graph_builder.cc index dff647f5a1..691926e01e 100644 --- a/lib/pcg/src/pcg/computation_graph_builder.cc +++ b/lib/pcg/src/pcg/computation_graph_builder.cc @@ -6,6 +6,7 @@ #include "op-attrs/ops/attention.h" #include "op-attrs/ops/batch_norm.h" #include "op-attrs/ops/broadcast.h" +#include "op-attrs/ops/cast.h" #include "op-attrs/ops/concat.h" #include "op-attrs/ops/conv_2d.h" #include "op-attrs/ops/dropout.h" @@ -41,6 +42,14 @@ static TensorAttrs make_output_attrs(TensorShape const &shape) { return TensorAttrs{shape, std::nullopt, std::nullopt, CreateGrad::YES}; } +static std::string get_default_name(OperatorType op_type) { + return get_operator_type_name(op_type); +} + +static std::string get_default_name(ComputationGraphOpAttrs const &attrs) { + return get_default_name(get_op_type(attrs)); +} + ComputationGraphBuilder::ComputationGraphBuilder() : computation_graph(make_empty_computation_graph()) {} @@ -160,19 +169,23 @@ tensor_guid_t ComputationGraphBuilder::broadcast(tensor_guid_t const &input, this->add_layer(layer, {input}, {}, {make_output_attrs(output_shape)})); } -tensor_guid_t - ComputationGraphBuilder::cast(tensor_guid_t const &input, - DataType dtype, - std::optional const &name) { - NOT_IMPLEMENTED() -} +tensor_guid_t ComputationGraphBuilder::cast( + tensor_guid_t const &input, + DataType dtype, + std::optional const &maybe_name) { -static std::string get_default_name(OperatorType op_type) { - return get_operator_type_name(op_type); -} + CastAttrs attrs = CastAttrs{dtype}; -static std::string get_default_name(ComputationGraphOpAttrs const &attrs) { - return get_default_name(get_op_type(attrs)); + std::string name = + maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); + + LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name}; + + TensorShape output_shape = + throw_if_unexpected(get_output_shape(attrs, this->get_shape(input))); + + return get_only( + this->add_layer(layer, {input}, {}, {make_output_attrs(output_shape)})); } tensor_guid_t ComputationGraphBuilder::element_unary( @@ -447,7 +460,7 @@ tensor_guid_t ComputationGraphBuilder::dropout( } tensor_guid_t ComputationGraphBuilder::embedding( - tensor_guid_t const &x, + tensor_guid_t const &input, int num_entries, int outDim, AggregateOp aggr, @@ -459,8 +472,6 @@ tensor_guid_t ComputationGraphBuilder::embedding( maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); LayerAttrs layer = LayerAttrs{ComputationGraphOpAttrs{attrs}, name}; - tensor_guid_t input = - this->as_type(x, DataType::FLOAT, name + "input_pre_cast"); TensorShape input_shape = this->get_shape(input);