Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update winml to use #cores - #soc cores by Default as the number of intraopthreads #18384

Merged
merged 10 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/winml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ onnxruntime_add_static_library(winml_lib_api
${winml_lib_api_dir}/impl/TensorKindFrom.h
${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h
${winml_lib_api_dir}/NumericData.cpp
${winml_lib_api_dir}/HardwareCoreEnumerator.cpp
${winml_lib_api_dir}/HardwareCoreEnumerator.h
${winml_lib_api_dir}/ImageFeatureDescriptor.cpp
${winml_lib_api_dir}/ImageFeatureDescriptor.h
${winml_lib_api_dir}/ImageFeatureValue.cpp
Expand Down
90 changes: 90 additions & 0 deletions winml/lib/Api/HardwareCoreEnumerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (c) Microsoft Corporation. All rights reserved.

Check warning on line 1 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L1

At least two spaces is best between code and comments [whitespace/comments] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:1:  At least two spaces is best between code and comments  [whitespace/comments] [2]
Fixed Show fixed Hide fixed
// Licensed under the MIT License.

#include "lib/Api/pch/pch.h"

#include "HardwareCoreEnumerator.h"

namespace WINMLP {

struct LogicalProcessorInformation {
std::unique_ptr<char[]> Buffer;
size_t Length;
};

static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
DWORD length = 0;
DWORD rc = GetLogicalProcessorInformationEx(relationship, nullptr, &length);

assert(rc == FALSE);

auto processorInformationBytes = std::make_unique<char[]>(length);

Check warning on line 21 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L21

Add #include <memory> for make_unique<> [build/include_what_you_use] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:21:  Add #include <memory> for make_unique<>  [build/include_what_you_use] [4]
PatriceVignola marked this conversation as resolved.
Show resolved Hide resolved

rc = GetLogicalProcessorInformationEx(
relationship, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)processorInformationBytes.get(), &length
smk2007 marked this conversation as resolved.
Show resolved Hide resolved
);

Check warning on line 25 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L25

Closing ) should be moved to the previous line [whitespace/parens] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:25:  Closing ) should be moved to the previous line  [whitespace/parens] [2]

assert(rc == TRUE);

return {std::move(processorInformationBytes), length};

Check warning on line 29 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L29

Add #include <utility> for move [build/include_what_you_use] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:29:  Add #include <utility> for move  [build/include_what_you_use] [4]
}

static long long GetNumberOfSoCDieAtoms() {

Check warning on line 32 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L32

Use int16/int64/etc, rather than the C type long [runtime/int] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:32:  Use int16/int64/etc, rather than the C type long  [runtime/int] [4]
// while (Size > (ULONG)FIELD_OFFSET(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Processor)) {
DWORD dwLevel2GroupMask = 0;
DWORD dwLevel3GroupMask = 0;
DWORD dwSoCGroupMask = 0;

auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);
auto processorInformation = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)logicalProcessorInformation.Buffer.get();

size_t read = 0;
while (read <= logicalProcessorInformation.Length) {
switch (processorInformation->Relationship) {
case RelationCache:
if (processorInformation->Cache.Level == 2) {
dwLevel2GroupMask |= processorInformation->Cache.GroupMask.Mask;
} else if (processorInformation->Cache.Level == 3) {
dwLevel3GroupMask |= processorInformation->Cache.GroupMask.Mask;
}
break;
}

read += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX);
processorInformation++;
}

dwSoCGroupMask = (dwLevel2GroupMask & ~dwLevel3GroupMask);

return __popcnt(dwSoCGroupMask);
}

static long long GetNumberOfCores() {

Check warning on line 62 in winml/lib/Api/HardwareCoreEnumerator.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.cpp#L62

Use int16/int64/etc, rather than the C type long [runtime/int] [4]
Raw output
winml/lib/Api/HardwareCoreEnumerator.cpp:62:  Use int16/int64/etc, rather than the C type long  [runtime/int] [4]
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationProcessorCore);
auto processorInformation = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)logicalProcessorInformation.Buffer.get();

KAFFINITY coreMask = 0;
size_t read = 0;
while (read <= logicalProcessorInformation.Length) {
switch (processorInformation->Relationship) {
case RelationProcessorCore:
coreMask |= processorInformation->Processor.GroupMask->Mask;
break;
}

read += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX);
processorInformation++;
}
return __popcnt64(coreMask);
}

uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
auto get_number_of_cores = static_cast<uint32_t>(GetNumberOfCores());
auto get_number_of_soc_die_atoms = static_cast<uint32_t>(GetNumberOfSoCDieAtoms());
auto num_p_and_e_cores = get_number_of_cores - get_number_of_soc_die_atoms;
printf("num_cores = %d, get_number_of_cores = %d, get_number_of_soc_die_atoms = %d\n", num_cores, get_number_of_cores,
get_number_of_soc_die_atoms);
return num_p_and_e_cores;
}

} // namespace WINMLP
11 changes: 11 additions & 0 deletions winml/lib/Api/HardwareCoreEnumerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.

Check warning on line 1 in winml/lib/Api/HardwareCoreEnumerator.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/HardwareCoreEnumerator.h#L1

At least two spaces is best between code and comments [whitespace/comments] [2]
Raw output
winml/lib/Api/HardwareCoreEnumerator.h:1:  At least two spaces is best between code and comments  [whitespace/comments] [2]
// Licensed under the MIT License.

#pragma once

namespace WINMLP {
struct HardwareCoreEnumerator {
HardwareCoreEnumerator() = delete;
static uint32_t DefaultIntraOpNumThreads();
};
} // namespace WINMLP
4 changes: 3 additions & 1 deletion winml/lib/Api/LearningModelDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <D3d11_4.h>
#include <d3d11on12.h>
#include "D3DDeviceCache.h"
#include "HardwareCoreEnumerator.h"

#include "ConverterResourceStore.h"

Expand Down Expand Up @@ -129,9 +130,10 @@ LearningModelDevice::CacheThreadPool(_winml::IThreading* thread_pool) {
return S_OK;
}


uint32_t LearningModelDevice::NumberOfIntraOpThreads() {
if (IsCpuDevice()) {
return std::thread::hardware_concurrency();
return HardwareCoreEnumerator::DefaultIntraOpNumThreads();
} else {
// GPU sessions should not rely on intra op threads.
// Creating a large thread pool is unnecessary and wasteful, and can cause
Expand Down
13 changes: 12 additions & 1 deletion winml/lib/Api/LearningModelSessionOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,22 @@

#include "lib/Api/pch/pch.h"
#include "LearningModelSessionOptions.h"
#include "HardwareCoreEnumerator.h"

namespace WINMLP {

LearningModelSessionOptions::LearningModelSessionOptions()
{

Check warning on line 11 in winml/lib/Api/LearningModelSessionOptions.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] winml/lib/Api/LearningModelSessionOptions.cpp#L11

{ should almost always be at the end of the previous line [whitespace/braces] [4]
Raw output
winml/lib/Api/LearningModelSessionOptions.cpp:11:  { should almost always be at the end of the previous line  [whitespace/braces] [4]
intra_op_num_threads_override_ = HardwareCoreEnumerator::DefaultIntraOpNumThreads();
}


LearningModelSessionOptions::LearningModelSessionOptions(const LearningModelSessionOptions& options)
: batch_size_override_(options.batch_size_override_),
close_model_on_session_creation_(options.close_model_on_session_creation_) {
close_model_on_session_creation_(options.close_model_on_session_creation_),
named_dim_overrides_(options.named_dim_overrides_),
intra_op_num_threads_override_(options.intra_op_num_threads_override_),
custom_ops_lib_paths_(options.custom_ops_lib_paths_) {
}

uint32_t LearningModelSessionOptions::BatchSizeOverride() {
Expand Down
4 changes: 2 additions & 2 deletions winml/lib/Api/LearningModelSessionOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct LearningModelSessionOptions : LearningModelSessionOptionsT<
LearningModelSessionOptions,
ILearningModelSessionOptionsNative,
ILearningModelSessionOptionsNative1> {
LearningModelSessionOptions() = default;
LearningModelSessionOptions();

LearningModelSessionOptions(const LearningModelSessionOptions& options);

Expand Down Expand Up @@ -72,7 +72,7 @@ struct LearningModelSessionOptions : LearningModelSessionOptionsT<
// The intra operator num threads property is used to control the number of threads used in the threadpool for intra operator calculations.
// The default value here is the maximum number of logical cores to ensure that the default behavior of WinML always runs the fastest.
// WARNING: Setting a number higher than the maximum number of logical cores may result in an inefficient threadpool
uint32_t intra_op_num_threads_override_ = std::thread::hardware_concurrency();
uint32_t intra_op_num_threads_override_;

bool allow_thread_spinning_ = true;

Expand Down
Loading