Merge branch 'main' of github.com:triton-inference-server/common into…

… jacky-python-based-pytorch
triton-inference-server · Dec 6, 2023 · de95edb · de95edb
2 parents da04332 + c8ce7c7
commit de95edb
Show file tree

Hide file tree

Showing 4 changed files with 112 additions and 17 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -60,7 +60,7 @@ target_compile_features(common-compile-settings INTERFACE cxx_std_11)
 target_compile_options(common-compile-settings INTERFACE
   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
     -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
-  $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
+  $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
 )
 
 #

diff --git a/include/triton/common/nvtx.h b/include/triton/common/nvtx.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -31,29 +31,88 @@
 
 namespace triton { namespace common {
 
+namespace detail {
+
+class NvtxTritonDomain {
+ public:
+  static nvtxDomainHandle_t& GetDomain()
+  {
+    static NvtxTritonDomain inst;
+    return inst.triton_nvtx_domain_;
+  }
+
+ private:
+  NvtxTritonDomain() { triton_nvtx_domain_ = nvtxDomainCreateA("Triton"); }
+
+  ~NvtxTritonDomain() { nvtxDomainDestroy(triton_nvtx_domain_); }
+
+  nvtxDomainHandle_t triton_nvtx_domain_;
+};
+
+}  // namespace detail
+
 // Updates a server stat with duration measured by a C++ scope.
 class NvtxRange {
  public:
-  explicit NvtxRange(const char* label) { nvtxRangePushA(label); }
+  explicit NvtxRange(const char* label, uint32_t rgb = kNvGreen)
+  {
+    auto attr = GetAttributes(label, rgb);
+    nvtxDomainRangePushEx(detail::NvtxTritonDomain::GetDomain(), &attr);
+  }
+
+  explicit NvtxRange(const std::string& label, uint32_t rgb = kNvGreen)
+      : NvtxRange(label.c_str(), rgb)
+  {
+  }
 
-  explicit NvtxRange(const std::string& label) : NvtxRange(label.c_str()) {}
+  ~NvtxRange() { nvtxDomainRangePop(detail::NvtxTritonDomain::GetDomain()); }
 
-  ~NvtxRange() { nvtxRangePop(); }
+  static constexpr uint32_t kNvGreen = 0x76b900;
+  static constexpr uint32_t kRed = 0xc1121f;
+  static constexpr uint32_t kGreen = 0x588157;
+  static constexpr uint32_t kBlue = 0x023047;
+  static constexpr uint32_t kYellow = 0xffb703;
+  static constexpr uint32_t kOrange = 0xfb8500;
+
+ private:
+  nvtxEventAttributes_t GetAttributes(const char* label, uint32_t rgb)
+  {
+    nvtxEventAttributes_t attr;
+    attr.version = NVTX_VERSION;
+    attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+    attr.colorType = NVTX_COLOR_ARGB;
+    attr.color = rgb | 0xff000000;
+    attr.messageType = NVTX_MESSAGE_TYPE_ASCII;
+    attr.message.ascii = label;
+    return attr;
+  }
 };
 
 }}  // namespace triton::common
 
 #endif  // TRITON_ENABLE_NVTX
 
 //
-// Macros to access NVTX functionality
+// Macros to access NVTX functionality.
+// For `NVTX_RANGE` macro please refer to the usage below.
 //
 #ifdef TRITON_ENABLE_NVTX
 #define NVTX_INITIALIZE nvtxInitialize(nullptr)
-#define NVTX_RANGE(V, L) triton::common::NvtxRange V(L)
+#define NVTX_RANGE1(V, L) triton::common::NvtxRange V(L)
+#define NVTX_RANGE2(V, L, RGB) triton::common::NvtxRange V(L, RGB)
 #define NVTX_MARKER(L) nvtxMarkA(L)
 #else
 #define NVTX_INITIALIZE
-#define NVTX_RANGE(V, L)
+#define NVTX_RANGE1(V, L)
+#define NVTX_RANGE2(V, L, RGB)
 #define NVTX_MARKER(L)
 #endif  // TRITON_ENABLE_NVTX
+
+// "Overload" for `NVTX_RANGE` macro.
+// Usage:
+// NVTX_RANGE(nvtx1, "My message")  -> Records NVTX marker with kNvGreen color.
+// NVTX_RANGE(nvtx1, "My message", NvtxRange::kRed)  -> Records NVTX marker with
+//                                                      kRed color.
+#define GET_NVTX_MACRO(_1, _2, _3, NAME, ...) NAME
+#define NVTX_RANGE(...) \
+  GET_NVTX_MACRO(__VA_ARGS__, NVTX_RANGE2, NVTX_RANGE1)(__VA_ARGS__)
diff --git a/protobuf/CMakeLists.txt b/protobuf/CMakeLists.txt
@@ -67,7 +67,7 @@ if(${TRITON_COMMON_ENABLE_PROTOBUF})
     proto-library PRIVATE
     $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
       -Wall -Wextra -Wno-unused-parameter -Werror>
-    $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
+    $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
   )
 
   set_target_properties(
@@ -140,7 +140,7 @@ if(${TRITON_COMMON_ENABLE_GRPC})
     grpc-service-library PRIVATE
     $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
       -Wall -Wextra -Wno-unused-parameter -Werror>
-    $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
+    $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
   )
 
   set_target_properties(
@@ -197,7 +197,7 @@ if(${TRITON_COMMON_ENABLE_GRPC})
     grpc-health-library PRIVATE
     $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
       -Wall -Wextra -Wno-unused-parameter -Werror>
-    $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
+    $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
   )
 
   set_target_properties(

diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto
@@ -1382,6 +1382,40 @@ message ModelSequenceBatching
     //@@     The optional field to specify the initial state for the model.
     //@@
     repeated InitialState initial_state = 5;
+
+    //@@  .. cpp:var:: bool use_same_buffer_for_input_output
+    //@@
+    //@@     The optional field to use a single buffer for both input and output
+    //@@     state. Without this option, Triton allocates separate buffers
+    //@@     for input and output state
+    //@@     which can be problematic if the state size is
+    //@@     large. This option reduces the memory usage by allocating a single
+    //@@     buffer. Enabling this option is recommended whenever
+    //@@     the input state is processed before the output state is written.
+    //@@     When enabled the state
+    //@@     will always be updated independent of whether
+    //@@     TRITONBACKEND_StateUpdate is called
+    //@@     (however TRITONBACKEND_StateUpdate should still be called for
+    //@@     completeness).
+    //@@
+    //@@     The default value is false.
+    //@@
+    bool use_same_buffer_for_input_output = 6;
+
+    //@@  .. cpp:var:: bool use_growable_memory
+    //@@
+    //@@     The optional field to enable an implicit state buffer to grow
+    //@@     without reallocating or copying existing memory.
+    //@@     Additional memory will be appended to the end of the buffer and
+    //@@     existing data will be preserved.
+    //@@     This option is only available for CUDA memory and requires enabling
+    //@@     use_same_buffer_for_input_output. When using this option,
+    //@@     StateBuffer call will always return CUDA memory even if CPU memory
+    //@@     is requested.
+    //@@
+    //@@     The default value is false.
+    //@@
+    bool use_growable_memory = 7;
   }
 
   //@@  .. cpp:var:: message StrategyDirect
@@ -1534,13 +1568,15 @@ message ModelSequenceBatching
   //@@
   repeated State state = 5;
 
-  //@@  .. cpp:var:: bool generative_sequence
+  //@@  .. cpp:var:: bool iterative_sequence
   //@@
-  //@@     The sequence batcher is expecting the sequence to be generative. A
-  //@@     generative sequence is initiated by single request, the sequence
-  //@@     batcher expects the same request to be "rescheduled" by the model if
-  //@@     the sequence is continuing.
-  bool generative_sequence = 6;
+  //@@     Requests for iterative sequences are processed over a number
+  //@@     of iterations. An iterative sequence is initiated by a single
+  //@@     request and is "rescheduled" by the model until completion.
+  //@@     Requests for inflight requests will be batched together
+  //@@     and can complete independently. Note this feature
+  //@@     requires backend support. Default value is false.
+  bool iterative_sequence = 6;
 }
 
 //@@