Skip to content

Commit

Permalink
Merge branch 'main' of github.com:triton-inference-server/common into…
Browse files Browse the repository at this point in the history
… jacky-python-based-pytorch
  • Loading branch information
kthui committed Dec 6, 2023
2 parents da04332 + c8ce7c7 commit de95edb
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 17 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ target_compile_features(common-compile-settings INTERFACE cxx_std_11)
target_compile_options(common-compile-settings INTERFACE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
)

#
Expand Down
73 changes: 66 additions & 7 deletions include/triton/common/nvtx.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -31,29 +31,88 @@

namespace triton { namespace common {

namespace detail {

class NvtxTritonDomain {
public:
static nvtxDomainHandle_t& GetDomain()
{
static NvtxTritonDomain inst;
return inst.triton_nvtx_domain_;
}

private:
NvtxTritonDomain() { triton_nvtx_domain_ = nvtxDomainCreateA("Triton"); }

~NvtxTritonDomain() { nvtxDomainDestroy(triton_nvtx_domain_); }

nvtxDomainHandle_t triton_nvtx_domain_;
};

} // namespace detail

// Updates a server stat with duration measured by a C++ scope.
class NvtxRange {
public:
explicit NvtxRange(const char* label) { nvtxRangePushA(label); }
explicit NvtxRange(const char* label, uint32_t rgb = kNvGreen)
{
auto attr = GetAttributes(label, rgb);
nvtxDomainRangePushEx(detail::NvtxTritonDomain::GetDomain(), &attr);
}

explicit NvtxRange(const std::string& label, uint32_t rgb = kNvGreen)
: NvtxRange(label.c_str(), rgb)
{
}

explicit NvtxRange(const std::string& label) : NvtxRange(label.c_str()) {}
~NvtxRange() { nvtxDomainRangePop(detail::NvtxTritonDomain::GetDomain()); }

~NvtxRange() { nvtxRangePop(); }
static constexpr uint32_t kNvGreen = 0x76b900;
static constexpr uint32_t kRed = 0xc1121f;
static constexpr uint32_t kGreen = 0x588157;
static constexpr uint32_t kBlue = 0x023047;
static constexpr uint32_t kYellow = 0xffb703;
static constexpr uint32_t kOrange = 0xfb8500;

private:
nvtxEventAttributes_t GetAttributes(const char* label, uint32_t rgb)
{
nvtxEventAttributes_t attr;
attr.version = NVTX_VERSION;
attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
attr.colorType = NVTX_COLOR_ARGB;
attr.color = rgb | 0xff000000;
attr.messageType = NVTX_MESSAGE_TYPE_ASCII;
attr.message.ascii = label;
return attr;
}
};

}} // namespace triton::common

#endif // TRITON_ENABLE_NVTX

//
// Macros to access NVTX functionality
// Macros to access NVTX functionality.
// For `NVTX_RANGE` macro please refer to the usage below.
//
#ifdef TRITON_ENABLE_NVTX
#define NVTX_INITIALIZE nvtxInitialize(nullptr)
#define NVTX_RANGE(V, L) triton::common::NvtxRange V(L)
#define NVTX_RANGE1(V, L) triton::common::NvtxRange V(L)
#define NVTX_RANGE2(V, L, RGB) triton::common::NvtxRange V(L, RGB)
#define NVTX_MARKER(L) nvtxMarkA(L)
#else
#define NVTX_INITIALIZE
#define NVTX_RANGE(V, L)
#define NVTX_RANGE1(V, L)
#define NVTX_RANGE2(V, L, RGB)
#define NVTX_MARKER(L)
#endif // TRITON_ENABLE_NVTX

// "Overload" for `NVTX_RANGE` macro.
// Usage:
// NVTX_RANGE(nvtx1, "My message") -> Records NVTX marker with kNvGreen color.
// NVTX_RANGE(nvtx1, "My message", NvtxRange::kRed) -> Records NVTX marker with
// kRed color.
#define GET_NVTX_MACRO(_1, _2, _3, NAME, ...) NAME
#define NVTX_RANGE(...) \
GET_NVTX_MACRO(__VA_ARGS__, NVTX_RANGE2, NVTX_RANGE1)(__VA_ARGS__)
6 changes: 3 additions & 3 deletions protobuf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ if(${TRITON_COMMON_ENABLE_PROTOBUF})
proto-library PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
)

set_target_properties(
Expand Down Expand Up @@ -140,7 +140,7 @@ if(${TRITON_COMMON_ENABLE_GRPC})
grpc-service-library PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
)

set_target_properties(
Expand Down Expand Up @@ -197,7 +197,7 @@ if(${TRITON_COMMON_ENABLE_GRPC})
grpc-health-library PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
)

set_target_properties(
Expand Down
48 changes: 42 additions & 6 deletions protobuf/model_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,40 @@ message ModelSequenceBatching
//@@ The optional field to specify the initial state for the model.
//@@
repeated InitialState initial_state = 5;

//@@ .. cpp:var:: bool use_same_buffer_for_input_output
//@@
//@@ The optional field to use a single buffer for both input and output
//@@ state. Without this option, Triton allocates separate buffers
//@@ for input and output state
//@@ which can be problematic if the state size is
//@@ large. This option reduces the memory usage by allocating a single
//@@ buffer. Enabling this option is recommended whenever
//@@ the input state is processed before the output state is written.
//@@ When enabled the state
//@@ will always be updated independent of whether
//@@ TRITONBACKEND_StateUpdate is called
//@@ (however TRITONBACKEND_StateUpdate should still be called for
//@@ completeness).
//@@
//@@ The default value is false.
//@@
bool use_same_buffer_for_input_output = 6;

//@@ .. cpp:var:: bool use_growable_memory
//@@
//@@ The optional field to enable an implicit state buffer to grow
//@@ without reallocating or copying existing memory.
//@@ Additional memory will be appended to the end of the buffer and
//@@ existing data will be preserved.
//@@ This option is only available for CUDA memory and requires enabling
//@@ use_same_buffer_for_input_output. When using this option,
//@@ StateBuffer call will always return CUDA memory even if CPU memory
//@@ is requested.
//@@
//@@ The default value is false.
//@@
bool use_growable_memory = 7;
}

//@@ .. cpp:var:: message StrategyDirect
Expand Down Expand Up @@ -1534,13 +1568,15 @@ message ModelSequenceBatching
//@@
repeated State state = 5;

//@@ .. cpp:var:: bool generative_sequence
//@@ .. cpp:var:: bool iterative_sequence
//@@
//@@ The sequence batcher is expecting the sequence to be generative. A
//@@ generative sequence is initiated by single request, the sequence
//@@ batcher expects the same request to be "rescheduled" by the model if
//@@ the sequence is continuing.
bool generative_sequence = 6;
//@@ Requests for iterative sequences are processed over a number
//@@ of iterations. An iterative sequence is initiated by a single
//@@ request and is "rescheduled" by the model until completion.
//@@ Requests for inflight requests will be batched together
//@@ and can complete independently. Note this feature
//@@ requires backend support. Default value is false.
bool iterative_sequence = 6;
}

//@@
Expand Down

0 comments on commit de95edb

Please sign in to comment.