forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParallelCommon.cpp
128 lines (106 loc) · 3.28 KB
/
ParallelCommon.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <ATen/Parallel.h>
#include <ATen/Config.h>
#include <ATen/PTThreadPool.h>
#include <ATen/Version.h>
#include <sstream>
#include <thread>
#if AT_MKL_ENABLED()
#include <mkl.h>
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
#if defined(__APPLE__) && defined(__aarch64__) && !defined(C10_MOBILE)
#include <sys/sysctl.h>
#endif
namespace at {
namespace {
const char* get_env_var(
const char* var_name, const char* def_value = nullptr) {
const char* value = std::getenv(var_name);
return value ? value : def_value;
}
size_t get_env_num_threads(const char* var_name, size_t def_value = 0) {
try {
if (auto* value = std::getenv(var_name)) {
int nthreads = std::stoi(value);
TORCH_CHECK(nthreads > 0);
return nthreads;
}
} catch (const std::exception& e) {
std::ostringstream oss;
oss << "Invalid " << var_name << " variable value, " << e.what();
TORCH_WARN(oss.str());
}
return def_value;
}
} // namespace
std::string get_parallel_info() {
std::ostringstream ss;
ss << "ATen/Parallel:\n\tat::get_num_threads() : "
<< at::get_num_threads() << '\n';
ss << "\tat::get_num_interop_threads() : "
<< at::get_num_interop_threads() << '\n';
ss << at::get_openmp_version() << '\n';
#ifdef _OPENMP
ss << "\tomp_get_max_threads() : " << omp_get_max_threads() << '\n';
#endif
ss << at::get_mkl_version() << '\n';
#if AT_MKL_ENABLED()
ss << "\tmkl_get_max_threads() : " << mkl_get_max_threads() << '\n';
#endif
ss << at::get_mkldnn_version() << '\n';
ss << "std::thread::hardware_concurrency() : "
<< std::thread::hardware_concurrency() << '\n';
ss << "Environment variables:" << '\n';
ss << "\tOMP_NUM_THREADS : "
<< get_env_var("OMP_NUM_THREADS", "[not set]") << '\n';
ss << "\tMKL_NUM_THREADS : "
<< get_env_var("MKL_NUM_THREADS", "[not set]") << '\n';
ss << "ATen parallel backend: ";
#if AT_PARALLEL_OPENMP
ss << "OpenMP";
#elif AT_PARALLEL_NATIVE
ss << "native thread pool";
#endif
#ifdef C10_MOBILE
ss << " [mobile]";
#endif
ss << '\n';
#if AT_EXPERIMENTAL_SINGLE_THREAD_POOL
ss << "Experimental: single thread pool" << std::endl;
#endif
return ss.str();
}
int intraop_default_num_threads() {
#ifdef C10_MOBILE
// Intraop thread pool size should be determined by mobile cpuinfo.
// We should hook up with the logic in caffe2/utils/threadpool if we ever need
// call this API for mobile.
TORCH_CHECK(false, "Undefined intraop_default_num_threads on mobile.");
#else
size_t nthreads = get_env_num_threads("OMP_NUM_THREADS", 0);
nthreads = get_env_num_threads("MKL_NUM_THREADS", nthreads);
if (nthreads == 0) {
#if defined(FBCODE_CAFFE2) && defined(__aarch64__)
nthreads = 1;
#else
#if defined(__aarch64__) && defined(__APPLE__)
// On Apple Silicon there are efficient and performance core
// Restrict parallel algorithms to performance cores by default
int32_t num_cores = -1;
size_t num_cores_len = sizeof(num_cores);
if (sysctlbyname("hw.perflevel0.physicalcpu", &num_cores, &num_cores_len, nullptr, 0) == 0) {
if (num_cores > 1) {
nthreads = num_cores;
return num_cores;
}
}
#endif
nthreads = TaskThreadPoolBase::defaultNumThreads();
#endif
}
return static_cast<int>(nthreads);
#endif /* !defined(C10_MOBILE) */
}
} // namespace at