forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
FusedSGD.cpp
86 lines (79 loc) · 2.24 KB
/
FusedSGD.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/native/FusedSGD.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/_fused_sgd.h>
#include <ATen/ops/_fused_sgd_native.h>
#endif
namespace at {
namespace native {
void _fused_sgd_kernel_cpu_(
at::TensorList params,
at::TensorList grads,
at::TensorList momentum_buffer_list,
const double weight_decay,
const double momentum,
const double lr,
const double dampening,
const bool nesterov,
const bool maximize,
const bool is_first_step,
const c10::optional<at::Tensor>& grad_scale,
const c10::optional<at::Tensor>& found_inf) {
const float* grad_scale_ptr =
grad_scale.has_value() ? grad_scale->data_ptr<float>() : nullptr;
const float* found_inf_ptr =
found_inf.has_value() ? found_inf->data_ptr<float>() : nullptr;
if (found_inf_ptr && *found_inf_ptr == 1.0) {
return;
}
size_t n_tensors = params.size();
TORCH_CHECK(grads.size() == n_tensors);
bool no_momentum_buffer = momentum == 0.0;
if (no_momentum_buffer) {
TORCH_CHECK(momentum_buffer_list.size() == 0);
} else {
TORCH_CHECK(momentum_buffer_list.size() == n_tensors);
}
for (size_t i = 0; i < n_tensors; i++){
fused_sgd_stub(
kCPU,
params[i],
grads[i],
no_momentum_buffer ? Tensor() : momentum_buffer_list[i],
weight_decay,
momentum,
lr,
dampening,
nesterov,
maximize,
is_first_step,
grad_scale_ptr);
}
}
void _fused_sgd_kernel_cpu_(
at::TensorList params,
at::TensorList grads,
at::TensorList momentum_buffer_list,
const double weight_decay,
const double momentum,
const at::Tensor& lr,
const double dampening,
const bool nesterov,
const bool maximize,
const bool is_first_step,
const c10::optional<at::Tensor>& grad_scale,
const c10::optional<at::Tensor>& found_inf) {
_fused_sgd_kernel_cpu_(
params, grads, momentum_buffer_list, weight_decay,
momentum, lr.item<double>(), dampening, nesterov,
maximize, is_first_step, grad_scale, found_inf
);
}
DEFINE_DISPATCH(fused_sgd_stub);
}
}