Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update winml to use #cores - #soc cores by Default as the number of intraopthreads #18384

Merged
merged 10 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/winml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ onnxruntime_add_static_library(winml_lib_api
${winml_lib_api_dir}/impl/TensorKindFrom.h
${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h
${winml_lib_api_dir}/NumericData.cpp
${winml_lib_api_dir}/HardwareCoreEnumerator.cpp
${winml_lib_api_dir}/HardwareCoreEnumerator.h
${winml_lib_api_dir}/ImageFeatureDescriptor.cpp
${winml_lib_api_dir}/ImageFeatureDescriptor.h
${winml_lib_api_dir}/ImageFeatureValue.cpp
Expand Down
80 changes: 80 additions & 0 deletions winml/lib/Api/HardwareCoreEnumerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) Microsoft Corporation. All rights reserved.

Check warning on line 1 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L1

At least two spaces is best between code and comments [whitespace/comments] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:1:  At least two spaces is best between code and comments  [whitespace/comments] [2]
Fixed Show fixed Hide fixed
// Licensed under the MIT License.

#include "lib/Api/pch/pch.h"

#include "HardwareCoreEnumerator.h"

namespace WINMLP {

struct LogicalProcessorInformation {
std::unique_ptr<char[]> Buffer;
size_t Length;
};

struct CoreCounter {
long long PhysicalCores = 0;

Check warning on line 16 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L16

Use int16/int64/etc, rather than the C type long [runtime/int] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:16:  Use int16/int64/etc, rather than the C type long  [runtime/int] [4]
long long SocDieCores = 0;

Check warning on line 17 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L17

Use int16/int64/etc, rather than the C type long [runtime/int] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:17:  Use int16/int64/etc, rather than the C type long  [runtime/int] [4]
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
DWORD length = 0;
DWORD rc = GetLogicalProcessorInformationEx(relationship, nullptr, &length);

assert(rc == FALSE);

auto processorInformationBytes = std::make_unique<char[]>(length);

Check warning on line 26 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L26

Add #include <memory> for make_unique<> [build/include_what_you_use] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:26:  Add #include <memory> for make_unique<>  [build/include_what_you_use] [4]
PatriceVignola marked this conversation as resolved.
Show resolved Hide resolved

rc = GetLogicalProcessorInformationEx(
relationship, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)processorInformationBytes.get(), &length
smk2007 marked this conversation as resolved.
Show resolved Hide resolved
);

Check warning on line 30 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L30

Closing ) should be moved to the previous line [whitespace/parens] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:30:  Closing ) should be moved to the previous line  [whitespace/parens] [2]

assert(rc == TRUE);

return {std::move(processorInformationBytes), length};

Check warning on line 34 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L34

Add #include <utility> for move [build/include_what_you_use] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:34:  Add #include <utility> for move  [build/include_what_you_use] [4]
}

static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);

CoreCounter cores;
DWORD dwLevel2GroupMask = 0;
DWORD dwLevel3GroupMask = 0;
size_t read = 0;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX currentProcessorInfo = NULL;

while ((read + FIELD_OFFSET(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Processor)) < logicalProcessorInformation.Length) {

Check warning on line 46 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L46

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:46:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
currentProcessorInfo = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(logicalProcessorInformation.Buffer.get() + read);
smk2007 marked this conversation as resolved.
Show resolved Hide resolved
if ((read + currentProcessorInfo->Size) > logicalProcessorInformation.Length) {
break;
}

switch (currentProcessorInfo->Relationship) {
case RelationProcessorCore:
cores.PhysicalCores++;
break;
case RelationCache:
if (currentProcessorInfo->Cache.Level == 2) {
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
} else if (currentProcessorInfo->Cache.Level == 3) {
dwLevel3GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
}
break;
}

read += currentProcessorInfo->Size;
}

cores.SocDieCores = __popcnt(dwLevel2GroupMask & ~dwLevel3GroupMask);
smk2007 marked this conversation as resolved.
Show resolved Hide resolved
return cores;
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
// We want to use the number of pysical cores, but exclude soc cores
smk2007 marked this conversation as resolved.
Show resolved Hide resolved
return static_cast<uint32_t>(cores.PhysicalCores - cores.SocDieCores);
}

} // namespace WINMLP
11 changes: 11 additions & 0 deletions winml/lib/Api/HardwareCoreEnumerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.

Check warning on line 1 in winml/lib/Api/HardwareCoreEnumerator.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.h#L1

At least two spaces is best between code and comments [whitespace/comments] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.h:1:  At least two spaces is best between code and comments  [whitespace/comments] [2]
// Licensed under the MIT License.

#pragma once

namespace WINMLP {
struct HardwareCoreEnumerator {
HardwareCoreEnumerator() = delete;
static uint32_t DefaultIntraOpNumThreads();
};
} // namespace WINMLP
3 changes: 2 additions & 1 deletion winml/lib/Api/LearningModelDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <D3d11_4.h>
#include <d3d11on12.h>
#include "D3DDeviceCache.h"
#include "HardwareCoreEnumerator.h"

#include "ConverterResourceStore.h"

Expand Down Expand Up @@ -131,7 +132,7 @@ LearningModelDevice::CacheThreadPool(_winml::IThreading* thread_pool) {

uint32_t LearningModelDevice::NumberOfIntraOpThreads() {
if (IsCpuDevice()) {
return std::thread::hardware_concurrency();
return HardwareCoreEnumerator::DefaultIntraOpNumThreads();
} else {
// GPU sessions should not rely on intra op threads.
// Creating a large thread pool is unnecessary and wasteful, and can cause
Expand Down
12 changes: 11 additions & 1 deletion winml/lib/Api/LearningModelSessionOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,21 @@

#include "lib/Api/pch/pch.h"
#include "LearningModelSessionOptions.h"
#include "HardwareCoreEnumerator.h"

namespace WINMLP {

LearningModelSessionOptions::LearningModelSessionOptions()
{

Check warning on line 11 in winml/lib/Api/LearningModelSessionOptions.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/LearningModelSessionOptions.cpp#L11

{ should almost always be at the end of the previous line [whitespace/braces] [4]
Raw output
winml/lib/Api/LearningModelSessionOptions.cpp:11:  { should almost always be at the end of the previous line  [whitespace/braces] [4]
intra_op_num_threads_override_ = HardwareCoreEnumerator::DefaultIntraOpNumThreads();
}

LearningModelSessionOptions::LearningModelSessionOptions(const LearningModelSessionOptions& options)
: batch_size_override_(options.batch_size_override_),
close_model_on_session_creation_(options.close_model_on_session_creation_) {
close_model_on_session_creation_(options.close_model_on_session_creation_),
named_dim_overrides_(options.named_dim_overrides_),
intra_op_num_threads_override_(options.intra_op_num_threads_override_),
custom_ops_lib_paths_(options.custom_ops_lib_paths_) {
}

uint32_t LearningModelSessionOptions::BatchSizeOverride() {
Expand Down
4 changes: 2 additions & 2 deletions winml/lib/Api/LearningModelSessionOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct LearningModelSessionOptions : LearningModelSessionOptionsT<
LearningModelSessionOptions,
ILearningModelSessionOptionsNative,
ILearningModelSessionOptionsNative1> {
LearningModelSessionOptions() = default;
LearningModelSessionOptions();

LearningModelSessionOptions(const LearningModelSessionOptions& options);

Expand Down Expand Up @@ -72,7 +72,7 @@ struct LearningModelSessionOptions : LearningModelSessionOptionsT<
// The intra operator num threads property is used to control the number of threads used in the threadpool for intra operator calculations.
// The default value here is the maximum number of logical cores to ensure that the default behavior of WinML always runs the fastest.
// WARNING: Setting a number higher than the maximum number of logical cores may result in an inefficient threadpool
uint32_t intra_op_num_threads_override_ = std::thread::hardware_concurrency();
uint32_t intra_op_num_threads_override_;

bool allow_thread_spinning_ = true;

Expand Down
Loading