Skip to content

Commit

Permalink
Limit size of constant nodes creates by DML EP following deduplicatio… (
Browse files Browse the repository at this point in the history
#18915)

### Description
This limits the size of constant data nodes which the DML EP creates in
the DML graph following de-duplication of 1D quantization tensors. In
the process it reduces a check for the maximum size of the constant
node.

This is merged from: #18494

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
  • Loading branch information
jeffbloo authored Jan 2, 2024
1 parent bcd453b commit 2c60bd8
Showing 1 changed file with 13 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -347,19 +347,23 @@ namespace Dml::GraphDescBuilder
// This is a highly inefficient approach to generating constant nodes. It duplicates constant data
// across the graph input as well as every consumer's unique constant node. However it is currently
// only used for small inputs.

// TODO: Rework this to create DML constant nodes with the minimum data size actually used by consuming
// nodes. This would allow this size to be reduced while handling the case that 1D scale and zero point
// values that have been de-duplicated with conversion to scalars in kernels.
uint32_t c_maxConstNodeDataSize = 1024 * 1024;
uint32_t c_maxConstNodeDataSize = 8;

ComPtr<OnnxTensorWrapper> constantInput = constantCpuGraphInputGetter(arg->Name());

if (constantInput && constantInput->GetTensorByteSize() < c_maxConstNodeDataSize)
auto& operatorGraphInputNode = graphNodeCreateInfo.nodesAsOperatorDesc[operatorGraphInputEdge.ToNodeIndex];
std::vector<DmlBufferTensorDesc*> toNodeInputTensorDescs = operatorGraphInputNode->GetInputTensors();
DmlBufferTensorDesc* tensorDesc = toNodeInputTensorDescs[operatorGraphInputEdge.ToNodeInputIndex];

if (constantInput && tensorDesc->totalTensorSizeInBytes < c_maxConstNodeDataSize)
{
// The tensor description's size should be no larger than the constant input unless it was rounded to
// the required alignment.
assert(((constantInput->GetTensorByteSize() + 3) & ~3) >= tensorDesc->totalTensorSizeInBytes);
size_t minimumConstantSize = std::min(constantInput->GetTensorByteSize(), tensorDesc->totalTensorSizeInBytes);
auto data = static_cast<const uint8_t*>(constantInput->GetData());
std::vector<uint8_t> tensorData(data, data + constantInput->GetTensorByteSize());
std::vector<uint8_t> tensorData(data, data + minimumConstantSize);

NodeInfo nodeInfo = {};
nodeInfo.nodeDef = std::move(tensorData);
graphNodes.push_back(std::move(nodeInfo));
Expand All @@ -379,9 +383,6 @@ namespace Dml::GraphDescBuilder
edge.ToNodeInputIndex = operatorGraphInputEdge.ToNodeInputIndex;
graphInputEdges.push_back(edge);

auto& graphInputNode = graphNodeCreateInfo.nodesAsOperatorDesc[operatorGraphInputEdge.ToNodeIndex];
std::vector<DmlBufferTensorDesc*> toNodeInputTensorDescs = graphInputNode->GetInputTensors();
DmlBufferTensorDesc* tensorDesc = toNodeInputTensorDescs[operatorGraphInputEdge.ToNodeInputIndex];
tensorDesc->flags |= DML_TENSOR_FLAG_OWNED_BY_DML;
}
}
Expand Down Expand Up @@ -445,7 +446,7 @@ namespace Dml::GraphDescBuilder
// TODO: Change as new header is ingested
if (dmlDesc.Type == (DML_OPERATOR_TYPE) DML_OPERATOR_QUANTIZED_LINEAR_AVERAGE_POOLING)
dmlDesc.Type = (DML_OPERATOR_TYPE) 169;

// TODO: Change as new header is ingested
if (dmlDesc.Type == (DML_OPERATOR_TYPE) DML_OPERATOR_MATRIX_MULTIPLY_INTEGER_TO_FLOAT)
dmlDesc.Type = (DML_OPERATOR_TYPE) 170;
Expand Down

0 comments on commit 2c60bd8

Please sign in to comment.