From 7a75858c2b76b7ece659ce37a5ba2b2920dc6e56 Mon Sep 17 00:00:00 2001 From: Paul Balanca Date: Fri, 29 Sep 2023 14:49:41 +0100 Subject: [PATCH] Improve TessellateIPU debug context naming (compute sets & tensors) (#39) This PR improves on a couple of debug naming aspects in TessellateIPU: * Generating readable name for `tile_map` operations (discarding metadata); * Proper naming of outputs; This improves massively the readability of a Popvision profile from a program using TessellateIPU. --- tessellate_ipu/lib/tessellate_ipu_ops_jax.cpp | 5 +- tessellate_ipu/lib/tile_map_ops.cpp | 78 ++++++++++++++----- tessellate_ipu/lib/tile_map_ops.hpp | 12 +-- 3 files changed, 70 insertions(+), 25 deletions(-) diff --git a/tessellate_ipu/lib/tessellate_ipu_ops_jax.cpp b/tessellate_ipu/lib/tessellate_ipu_ops_jax.cpp index 31a31c4..96e1ca2 100644 --- a/tessellate_ipu/lib/tessellate_ipu_ops_jax.cpp +++ b/tessellate_ipu/lib/tessellate_ipu_ops_jax.cpp @@ -1,6 +1,8 @@ // Copyright (c) 2022 Graphcore Ltd. All rights reserved. #include +#include + #include "ipu_custom_primitive.hpp" #include "tile_array_ops.hpp" #include "tile_map_ops.hpp" @@ -238,11 +240,10 @@ class TileMapEquationCall : public jax::ipu::PrimitiveInterface { poplar::Graph& graph, const std::vector& inputs, std::vector& outputs, const std::string& attributes, const std::string& debug_prefix) { - const auto debug_context = poplar::DebugContext(debug_prefix); const auto tile_equation = ipu::from_json_str(attributes); return lowerTileMapCallToPoplar(graph, inputs, outputs, tile_equation, - debug_context); + debug_prefix); } }; diff --git a/tessellate_ipu/lib/tile_map_ops.cpp b/tessellate_ipu/lib/tile_map_ops.cpp index 920d0c7..baf9f98 100644 --- a/tessellate_ipu/lib/tile_map_ops.cpp +++ b/tessellate_ipu/lib/tile_map_ops.cpp @@ -4,8 +4,43 @@ #include namespace ipu { +namespace { + +/** + * @brief Make a (readable) tile_map call debug prefix. + * Improves PopVision user experience when using TessellateIPU! + */ +std::string makeTileMapCallDebugPrefix(const std::string& raw_debug_prefix, + const std::string& primitive_name) { + const auto format_debug_prefix = [&raw_debug_prefix, + &primitive_name](std::size_t idx) { + // const std::string debug_prefix = raw_debug_prefix.substr(0, idx) + + // "tile_map"; + const std::string debug_prefix = + fmt::format("{}{}[{}]", raw_debug_prefix.substr(0, idx), "tile_map", + primitive_name); + return debug_prefix; + }; + std::string::size_type idx; + // A bit of ugly string pattern matching to remove the metadata, but keep + // namespace. + idx = raw_debug_prefix.rfind("tile_map_equation_call_single_out["); + if (idx != std::string::npos) { + return format_debug_prefix(idx); + } + idx = raw_debug_prefix.rfind("tile_map_equation_call_multi_out["); + if (idx != std::string::npos) { + return format_debug_prefix(idx); + } + // Failing => just keep the same prefix. + return raw_debug_prefix; +} + +} // namespace + std::vector TileMapEquation::allocateInputTensors( - poplar::Graph& graph, const std::vector& inputs) const { + poplar::Graph& graph, const std::vector& inputs, + const poplar::DebugContext& debug_context) const { FMT_ASSERT(inputs.size() <= inputs_info.size(), "Inconsistent input vector size."); @@ -18,9 +53,9 @@ std::vector TileMapEquation::allocateInputTensors( const std::string raw_values = input_info.constant_data.decode(); const auto raw_values_ref = poplar::ArrayRef(raw_values.data(), raw_values.size()); - auto t = createReplicatedConstantTensor(graph, input_info.aval.dtype, - input_info.aval.shape, - raw_values_ref, this->tiles); + auto t = createReplicatedConstantTensor( + graph, input_info.aval.dtype, input_info.aval.shape, raw_values_ref, + this->tiles, {debug_context, input_info.name}); inputs_all.push_back(t); } else { // Keep existing input tensor. @@ -32,7 +67,8 @@ std::vector TileMapEquation::allocateInputTensors( } std::vector TileMapEquation::allocateOutputTensors( - poplar::Graph& graph, const std::vector& inputs) const { + poplar::Graph& graph, const std::vector& inputs, + const poplar::DebugContext& debug_context) const { FMT_ASSERT(inputs.size() == inputs_info.size(), "Inconsistent input vector size."); @@ -48,9 +84,9 @@ std::vector TileMapEquation::allocateOutputTensors( outputs.push_back(inputs.at(idx)); } else if (outinfo.iotype == VertexIOType::Out) { // Allocate an output tensor with proper shape. - outputs.push_back(createShardedVariable(graph, - toPoplar(outinfo.aval.dtype), - outinfo.aval.shape, this->tiles)); + outputs.push_back(createShardedVariable( + graph, toPoplar(outinfo.aval.dtype), outinfo.aval.shape, this->tiles, + {debug_context, outinfo.name})); } else { throw std::runtime_error("Unknown IO type for vertex output tensor."); } @@ -59,26 +95,26 @@ std::vector TileMapEquation::allocateOutputTensors( } std::optional TileMapEquation::allocateTmpSpaceTensor( - poplar::Graph& graph) const { + poplar::Graph& graph, const poplar::DebugContext& debug_context) const { if (!useTmpSpace()) { return std::nullopt; } return createShardedVariable(graph, toPoplar(tmp_space_aval.dtype), - {tmp_space_aval.size()}, this->tiles); + {tmp_space_aval.size()}, this->tiles, + {debug_context, "tmp_space"}); } void TileMapEquation::add(poplar::Graph& graph, poplar::program::Sequence& prog, const std::vector& inputs, const std::vector& outputs, - const poplar::DebugContext& debug_prefix) const { + const poplar::DebugContext& debug_context) const { FMT_ASSERT(inputs.size() == inputs_info.size(), "Inconsistent inputs vector size."); FMT_ASSERT(outputs.size() == outputs_info.size(), "Inconsistent outputs vector size."); - poplar::DebugContext debug_context(debug_prefix, this->pname); // Tensor used for vertex temp. scratch space. - auto tmp_space_tensor_opt = allocateTmpSpaceTensor(graph); + auto tmp_space_tensor_opt = allocateTmpSpaceTensor(graph, debug_context); poplar::ComputeSet cs = graph.addComputeSet(debug_context); for (size_t tidx = 0; tidx < tiles.size(); ++tidx) { @@ -122,9 +158,10 @@ void TileMapEquation::add(poplar::Graph& graph, poplar::program::Sequence& prog, std::vector TileMapEquation::add( poplar::Graph& graph, poplar::program::Sequence& prog, const std::vector& inputs, - const poplar::DebugContext& debug_prefix) const { + const poplar::DebugContext& debug_context) const { // All input tensors: i.e. add constant tensors. - const auto inputs_all = this->allocateInputTensors(graph, inputs); + const auto inputs_all = + this->allocateInputTensors(graph, inputs, debug_context); // No vertex => assume identity function. // Forwarding inputs, with just potential change of shape and dtype. @@ -148,8 +185,9 @@ std::vector TileMapEquation::add( return outputs_all; } // Usual path => map a vertex. - const auto outputs = this->allocateOutputTensors(graph, inputs); - this->add(graph, prog, inputs_all, outputs, debug_prefix); + const auto outputs = + this->allocateOutputTensors(graph, inputs, debug_context); + this->add(graph, prog, inputs_all, outputs, debug_context); return outputs; } @@ -173,8 +211,12 @@ std::size_t TileMapEquation::numInOuts() const { poplar::program::Program lowerTileMapCallToPoplar( poplar::Graph& graph, const std::vector& inputs, std::vector& outputs, const TileMapEquation& tile_map_eqn, - const poplar::DebugContext& debug_context) { + const std::string& raw_debug_prefix) { auto prog = poplar::program::Sequence(); + // Base debug context used in the `tile_map` operation. + const auto debug_prefix = + makeTileMapCallDebugPrefix(raw_debug_prefix, tile_map_eqn.pname); + const auto debug_context = poplar::DebugContext(debug_prefix); // IPU tiles synchronization before compute set. if (tile_map_eqn.sync) { const auto sync_type = poplar::SyncType::INTERNAL; diff --git a/tessellate_ipu/lib/tile_map_ops.hpp b/tessellate_ipu/lib/tile_map_ops.hpp index c7a72ec..b89898a 100644 --- a/tessellate_ipu/lib/tile_map_ops.hpp +++ b/tessellate_ipu/lib/tile_map_ops.hpp @@ -268,7 +268,8 @@ struct TileMapEquation { * @return Collection of input tensors. */ std::vector allocateInputTensors( - poplar::Graph& graph, const std::vector& inputs) const; + poplar::Graph& graph, const std::vector& inputs, + const poplar::DebugContext& debug_context) const; /** * @brief Allocate output (or use existing input) tensors. @@ -277,13 +278,14 @@ struct TileMapEquation { * @return Collection of output tensors. */ std::vector allocateOutputTensors( - poplar::Graph& graph, const std::vector& inputs) const; + poplar::Graph& graph, const std::vector& inputs, + const poplar::DebugContext& debug_context) const; /** * @brief Allocate the temporary-scratch space tensor (if used). */ std::optional allocateTmpSpaceTensor( - poplar::Graph& graph) const; + poplar::Graph& graph, const poplar::DebugContext& debug_context) const; /** * @brief Add vertex/equation to Poplar graph & compute set. @@ -334,12 +336,12 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(TileMapEquation, pname, vname, tiles, * @param inputs List of inputs. * @param outputs List of outputs, to update. * @param tile_map_eqn TileMapEquation info. - * @param debug_context Poplar debug context. + * @param debug_prefix Poplar (raw) debug prefix. * @return Poplar program. */ poplar::program::Program lowerTileMapCallToPoplar( poplar::Graph& graph, const std::vector& inputs, std::vector& outputs, const TileMapEquation& tile_map_eqn, - const poplar::DebugContext& debug_context); + const std::string& debug_prefix); } // namespace ipu