forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
autograd_not_implemented_fallback.cpp
632 lines (582 loc) · 25.1 KB
/
autograd_not_implemented_fallback.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
#include <torch/csrc/autograd/autograd_not_implemented_fallback.h>
#include <c10/util/irange.h>
#include <ATen/core/TorchDispatchUtils.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/ivalue.h>
#include <c10/core/impl/TorchDispatchModeTLS.h>
#include <torch/csrc/autograd/VariableTypeUtils.h>
#include <torch/csrc/autograd/autograd.h>
#include <torch/csrc/autograd/function.h>
#include <torch/csrc/autograd/functions/basic_ops.h>
#include <torch/csrc/autograd/functions/utils.h>
#include <optional>
#include <utility>
#include <vector>
namespace torch::autograd {
namespace {
template <typename F>
void _foreach_tensor(
F fn,
torch::jit::Stack* stack,
size_t stack_start,
size_t size) {
// Enumerate over tensors in a stack, including ones in TensorLists
int idx_tensor = 0;
for (const auto idx_arg : c10::irange(size)) {
auto& ivalue = (*stack)[stack_start + idx_arg];
if (ivalue.isTensor()) { // true for optional tensor that has value
const auto& tensor = ivalue.toTensor();
fn(idx_tensor, idx_arg, tensor);
idx_tensor++;
} else if (ivalue.isTensorList()) {
for (const auto& iv : ivalue.toListRef()) {
const auto& tensor = iv.toTensor();
fn(idx_tensor, idx_arg, tensor);
idx_tensor++;
}
}
}
}
AutogradFallbackMode kAutogradFallbackMode = AutogradFallbackMode::Warn;
} // namespace
void setAutogradFallbackMode(AutogradFallbackMode mode) {
TORCH_CHECK(mode != AutogradFallbackMode::Error, "NYI: mode='error'");
kAutogradFallbackMode = mode;
}
AutogradFallbackMode getAutogradFallbackMode() {
return kAutogradFallbackMode;
}
static void warnAutogradNotImplemented(const std::string& op_name) {
TORCH_WARN(
op_name,
": an autograd kernel was not registered to the Autograd key(s) ",
"but we are trying to backprop through it. This may lead to silently incorrect behavior. ",
"This behavior is deprecated and will be removed in a future version of PyTorch. ",
"If your operator is differentiable, please ensure you have registered an "
"autograd kernel to the correct Autograd key (e.g. DispatchKey::Autograd, "
"DispatchKey::CompositeImplicitAutograd). If your operator is not "
"differentiable, or to squash this warning and use the previous behavior, "
"please register torch::CppFunction::makeFallthrough() to DispatchKey::Autograd.");
}
struct WarnNotImplemented : public Node {
WarnNotImplemented(
std::string op_name,
size_t num_outputs,
edge_list&& next_edges)
: Node(std::move(next_edges)),
op_name(std::move(op_name)),
num_outputs(num_outputs) {}
WarnNotImplemented(std::string op_name, size_t num_outputs)
: op_name(std::move(op_name)), num_outputs(num_outputs) {}
variable_list apply(variable_list&& inputs) override;
std::string op_name;
size_t num_outputs;
};
auto WarnNotImplemented::apply(variable_list&& inputs) -> variable_list {
warnAutogradNotImplemented(op_name);
std::vector<at::Tensor> output(num_outputs);
return output;
}
static void basicAutogradNotImplementedFallbackImpl(
const c10::OperatorHandle& op,
c10::DispatchKeySet dispatch_keys,
torch::jit::Stack* stack) {
const auto& schema = op.schema();
const auto& op_name = schema.operator_name().name;
const auto num_arguments = schema.arguments().size();
const auto num_returns = schema.returns().size();
const auto stack_start = stack->size() - num_arguments;
if (getAutogradFallbackMode() == AutogradFallbackMode::Nothing) {
op.redispatchBoxed(dispatch_keys & c10::after_autograd_keyset, stack);
return;
}
TORCH_INTERNAL_ASSERT(
getAutogradFallbackMode() == AutogradFallbackMode::Warn);
bool any_input_requires_grad = false;
_foreach_tensor(
[&](size_t _, size_t idx_arg, const at::Tensor& t) {
if (t.requires_grad()) {
any_input_requires_grad = true;
}
},
stack,
stack_start,
num_arguments);
// Optimization: TLS access can be slow. So we only check if it necessary
// by putting it after the requires_grad checks.
any_input_requires_grad = any_input_requires_grad && GradMode::is_enabled();
std::shared_ptr<WarnNotImplemented> grad_fn;
if (any_input_requires_grad) {
// NB: It is standard to collect edges from all tensors
// (see generated/VariableTypeEverything.cpp for examples)
std::vector<const at::Tensor*> all_tensors_on_stack;
_foreach_tensor(
[&](size_t _, size_t idx_arg, const at::Tensor& t) {
all_tensors_on_stack.push_back(&t);
},
stack,
stack_start,
num_arguments);
grad_fn = std::shared_ptr<WarnNotImplemented>(
new WarnNotImplemented(op_name, all_tensors_on_stack.size()),
deleteNode);
grad_fn->set_next_edges(collect_next_edges(all_tensors_on_stack));
}
op.redispatchBoxed(dispatch_keys & c10::after_autograd_keyset, stack);
if (any_input_requires_grad) {
// NB: if the operator mutates any inputs in-place and does not return them
// as outputs, we are unable to lazily raise a warning. This is OK because
// we don't expect many existing operators to do this because of the amount
// of technical expertise necessary (you would need to manually register an
// autograd kernel without using autograd.Function)
_foreach_tensor(
[&](size_t _, size_t idx_ret, const at::Tensor& t) {
if (!isDifferentiableType(t.scalar_type())) {
return;
}
const bool is_mutable_output =
schema.is_aliasing({c10::SchemaArgType::output, idx_ret}) &&
schema.is_mutable({c10::SchemaArgType::output, idx_ret});
// If the post-autograd implementation returns Tensors that require
// grad, then we install a hook that will warn during the backwards.
//
// NB: If the operation is inplace and the inputs were views,
// it is possible that the history was rebased and the hook will
// not warn in all places where it should. That is, the following
// won't warn:
// >>> x = torch.randn(3, 3, requires_grad=True)
// >>> z = x.clone()
// >>> w = z[0]
// >>> k = w[0]
// >>> y = op(k)
// >>> torch.autograd.grad(z.sum(), w)
if (t.requires_grad()) {
t.register_hook([op_name](const at::Tensor& grad) {
warnAutogradNotImplemented(op_name);
});
// If history is rebased, then we will attempt to warn
// on the view's base. This will catch most cases (because
// users typically call .backward() and backprop through
// the entire program).
if (t.is_view() && is_mutable_output) {
const auto& base = t._base();
if (base.requires_grad()) {
// Can only register_hook on tensors that require grad.
base.register_hook([op_name](const at::TensorBase& grad) {
warnAutogradNotImplemented(op_name);
});
}
}
return;
}
// If the post-autograd implementation returns any Tensors that
// don't require grad, then we install the WarnNotImplemented grad_fn.
// This grad_fn warns in backward and returns undefined tensor
// gradients.
//
// NOTE [autograd fallback and in-place operations]
// If the schema says the output is mutable, and the output
// is an input, and the input is a view Tensor, then...
// we're not sure if set_history is OK to do, so we just skip
// adding the grad_fn. Builtin operators do rebase_history here,
// but custom operators may have multiple Tensor(a!) returns,
// rebase_history assumes single Tensor(a!) return, and in general
// custom ops don't have a good in-place story.
if (!is_mutable_output) {
set_history(t, grad_fn);
}
},
stack,
stack->size() - num_returns,
num_returns);
}
}
torch::CppFunction basicAutogradNotImplementedFallback() {
return torch::CppFunction::makeFromBoxedFunction<
&basicAutogradNotImplementedFallbackImpl>();
}
void VariableHooks::basic_autograd_not_implemented_fallback(
const c10::OperatorHandle& op,
c10::DispatchKeySet dispatch_keys,
torch::jit::Stack* stack) const {
basicAutogradNotImplementedFallbackImpl(op, dispatch_keys, stack);
}
static void autogradNotImplementedFallbackImpl(
const c10::OperatorHandle& op,
c10::DispatchKeySet dispatch_keys,
torch::jit::Stack* stack) {
// Mimics a subset of the logic of a VariableType NotImplemented kernel
// See gen_variable_type.py
const auto& schema = op.schema();
const auto& op_name = schema.operator_name().name;
const auto num_arguments = schema.arguments().size();
const auto num_returns = schema.returns().size();
const auto stack_start = stack->size() - num_arguments;
const bool grad_mode = GradMode::is_enabled();
std::vector<const at::Tensor*> tensors_requiring_grad_on_stack;
// Keep track of which outputs are output of in-place modification
// so we can rebase_history if necessary
std::vector<bool> is_inplace_output(num_returns, false);
bool any_is_inplace_output = false;
std::vector<bool> is_aliased_output(num_returns, false);
std::optional<size_t> aliased_output_idx;
for (const auto i : c10::irange(num_returns)) {
if (schema.is_aliasing({c10::SchemaArgType::output, i})) {
if (schema.is_mutable({c10::SchemaArgType::output, i})) {
is_inplace_output[i] = true;
any_is_inplace_output = true;
} else {
TORCH_CHECK(
!aliased_output_idx.has_value(),
"Expected only a single output in the operator schema to have a non-write alias annotation (i.e., 'Tensor(a)'). "
"Non-composite functions where multiple outputs are aliased with inputs aren't supported."
"Please rewrite your function as a composite function.");
aliased_output_idx = i;
}
is_aliased_output[i] = true;
}
}
int64_t aliased_input_idx = -1;
for (const auto i : c10::irange(num_arguments)) {
if (schema.is_aliasing({c10::SchemaArgType::input, i}) &&
!schema.is_mutable({c10::SchemaArgType::input, i})) {
TORCH_CHECK(
aliased_input_idx == -1,
"Expected only a single input in the operator schema to have a non-write alias annotation (i.e., 'Tensor(a)'). "
"Non-composite functions where multiple inputs are aliased with outputs aren't supported. "
"Please rewrite your function as a composite function.");
aliased_input_idx = static_cast<int64_t>(i);
}
}
size_t num_tensor_inputs = 0; // Only used for DEBUG-only checks
_foreach_tensor(
[&](size_t _, size_t idx_arg, const at::Tensor& t) {
if (grad_mode && t.requires_grad()) {
tensors_requiring_grad_on_stack.push_back(&t);
}
num_tensor_inputs++;
TORCH_CHECK_NOT_IMPLEMENTED(
!isFwGradDefined(t),
"Trying to use forward AD with ",
op_name,
" that does not support it.");
},
stack,
stack_start,
num_arguments);
const bool any_requires_grad = !tensors_requiring_grad_on_stack.empty();
const bool has_out_arg = std::any_of(
schema.arguments().begin(),
schema.arguments().end(),
[](const c10::Argument& arg) { return arg.is_out(); });
_foreach_tensor(
[&](size_t _, size_t i, const at::Tensor& t) {
if (schema.is_mutable({c10::SchemaArgType::input, i})) {
if (has_out_arg) {
// Normally out argument overloads would not support any arguments
// that require grad. However, we loosen this check to maintain
// backward compatibility.
// See https://github.com/pytorch/pytorch/issues/120988
if (can_mutate_inplace(t, any_requires_grad) !=
can_mutate_inplace_result::success) {
throw_error_out_requires_grad(schema.name().c_str());
}
} else {
check_inplace(t, any_requires_grad);
}
}
},
stack,
stack_start,
num_arguments);
std::shared_ptr<NotImplemented> grad_fn;
if (any_requires_grad) {
grad_fn = std::shared_ptr<NotImplemented>(
new NotImplemented(op_name), deleteNode);
grad_fn->set_next_edges(
collect_next_edges(tensors_requiring_grad_on_stack));
}
#ifndef NDEBUG
// See NOTE [ TensorImpl and Storage Pointer Sanity Checks ]
auto stack_args_copy =
std::vector<c10::IValue>(stack->begin() + stack_start, stack->end());
std::vector<c10::intrusive_ptr<c10::TensorImpl>> impl_saved;
impl_saved.reserve(num_tensor_inputs);
std::vector<std::optional<c10::Storage>> storage_saved;
storage_saved.reserve(num_tensor_inputs);
_foreach_tensor(
[&](size_t idx, size_t _, const at::Tensor& t) {
storage_saved.push_back(
t.has_storage() ? std::optional<c10::Storage>(t.storage())
: std::nullopt);
impl_saved.push_back(t.getIntrusivePtr());
},
&stack_args_copy,
0,
num_arguments);
#endif
if (aliased_input_idx != -1 || any_is_inplace_output) {
at::AutoDispatchBelowAutograd guard;
op.redispatchBoxed(dispatch_keys & c10::after_autograd_keyset, stack);
} else {
// If neither in-place nor view
at::AutoDispatchBelowADInplaceOrView guard;
op.redispatchBoxed(
dispatch_keys & c10::after_ADInplaceOrView_keyset, stack);
}
#ifndef NDEBUG
_foreach_tensor(
[&](size_t idx_tensor, size_t _, const at::Tensor& t) {
// Skip next two for chunk_cat, see
// https://github.com/pytorch/pytorch/issues/130073
if (storage_saved.at(idx_tensor).has_value() &&
op_name != "aten::_chunk_cat")
TORCH_INTERNAL_ASSERT(
storage_saved.at(idx_tensor).value().is_alias_of(t.storage()),
op_name);
if (impl_saved.at(idx_tensor) && op_name != "aten::_chunk_cat")
TORCH_INTERNAL_ASSERT(
impl_saved.at(idx_tensor) == t.getIntrusivePtr(), op_name);
},
&stack_args_copy,
0,
num_arguments);
_foreach_tensor(
[&](size_t idx_tensor, size_t idx_ret, const at::Tensor& t) {
if (at::impl::tensor_has_dispatch(t) ||
at::impl::dispatch_mode_enabled() ||
// NJT components are expected to be reused; skip use_count() check
op_name.rfind("aten::_nested_get", 0) == 0)
return;
// Skip test_parallel_materialize
// For details see https://github.com/pytorch/pytorch/issues/130073
if (op_name == "aten::_test_parallel_materialize" ||
op_name == "aten::_test_optional_intlist" ||
op_name == "aten::_test_optional_filled_intlist" ||
op_name == "aten::_test_optional_floatlist")
return;
if (!is_inplace_output[idx_ret])
TORCH_INTERNAL_ASSERT(
t.use_count() <= 1, op_name); // Okay to return undefined tensor
// note(crcrpar): `_foreach_norm` returns a list of scalar Tensors and
// each Tensor shares a storage of a hidden, intermediate 1D Tensor
// created inside the CUDA implementation. This is because the
// reference implementation of nvidia/apex repo returns this 1D Tensor
// where each element represents the norm of corresponding input Tensor,
// here I want to return the same number of Tensors as the input
// TensorList, see https://github.com/pytorch/pytorch/issues/93940
// Skip native_channel_shuffle as well as transformer_encoder
// For details see https://github.com/pytorch/pytorch/issues/130073
if (!is_aliased_output[idx_ret] && t.has_storage() &&
op_name != "aten::_foreach_norm" &&
op_name != "aten::_transformer_encoder_layer_fwd" &&
op_name != "aten::native_channel_shuffle")
TORCH_INTERNAL_ASSERT(t.storage().use_count() == 1);
},
stack,
stack->size() - num_returns,
num_returns);
// There should be only a single base-view pair, make sure their storage is
// aliased.
if (aliased_input_idx != -1 && aliased_output_idx.has_value()) {
const c10::IValue& aliased_input_iv = stack_args_copy[aliased_input_idx];
const c10::IValue& aliased_output_iv =
(*stack)[stack->size() - num_returns + *aliased_output_idx];
TORCH_INTERNAL_ASSERT(aliased_input_iv.isTensor(), op_name);
TORCH_INTERNAL_ASSERT(
aliased_output_iv.isTensor() || aliased_output_iv.isTensorList(),
op_name);
const at::Tensor& aliased_input = aliased_input_iv.toTensor();
if (aliased_input.has_storage()) {
if (aliased_output_iv.isTensor()) {
const at::Tensor& aliased_output = aliased_input_iv.toTensor();
// for now, skip asserts for subclasses
// TODO: Fix the aliasing situation involving subclasses
if (!at::impl::dispatch_mode_enabled() &&
!at::impl::tensor_has_dispatch(aliased_input) &&
!at::impl::tensor_has_dispatch(aliased_output)) {
TORCH_INTERNAL_ASSERT(
aliased_input.storage().is_alias_of(aliased_output.storage()),
op_name);
}
} else {
const auto aliased_output_vec = aliased_output_iv.toTensorVector();
for (const auto& aliased_output : aliased_output_vec) {
// for now, skip asserts for subclasses
// TODO: Fix the aliasing situation involving subclasses
if (!at::impl::dispatch_mode_enabled() &&
!at::impl::tensor_has_dispatch(aliased_input) &&
!at::impl::tensor_has_dispatch(aliased_output)) {
TORCH_INTERNAL_ASSERT(
aliased_input.storage().is_alias_of(aliased_output.storage()),
op_name);
}
}
}
}
}
#endif
if (any_requires_grad) {
_foreach_tensor(
[&](size_t idx_tensor, size_t idx_ret, const at::Tensor& t) {
if (isDifferentiableType(t.scalar_type())) {
if (is_inplace_output[idx_ret]) {
rebase_history(t, grad_fn);
} else {
set_history(t, grad_fn);
}
}
},
stack,
stack->size() - num_returns,
num_returns);
}
}
torch::CppFunction autogradNotImplementedFallback() {
return torch::CppFunction::makeFromBoxedFunction<
&autogradNotImplementedFallbackImpl>();
}
static void autogradNotImplementedInplaceOrViewFallbackImpl(
const c10::OperatorHandle& op,
c10::DispatchKeySet dispatch_keys,
torch::jit::Stack* stack) {
// Mimics a subset of the logic from ADInplaceOrViewType kernel:
// - see gen_inplace_or_view_type.py
// - this should only be used with autogradNotImplementedFallback above
// - For more information see
// https://pytorch.org/tutorials/advanced/dispatcher
//
// NOTE [ Limitations of ADInplaceOrView boxed kernel ]
//
// This op should only be used with autogradNotImplementedFallback kernel
// because there is some logic we need specifically to enforce that even
// if we do in-place on view's created in this kernel, the proper "derivative
// is not implemented" error is still raised.
//
// Just like the codegened kernel, we try to enforce some things:
// - For views: we enforce that the view relationship is between the first
// input
// and the first output (which may be either Tensor or vec of Tensors
// - For inplace (TODO?): enforce that the same op cannot be both a view and
// inplace
// that is not allowed in the gen_inplace_or_view logic
const auto& schema = op.schema();
const auto& op_name = schema.operator_name().name;
const auto num_arguments = schema.arguments().size();
const auto num_returns = schema.returns().size();
const auto stack_start = stack->size() - num_arguments;
at::Tensor aliased_input;
int64_t aliased_output_idx = -1;
for (const auto i : c10::irange(num_returns)) {
if (schema.is_aliasing({c10::SchemaArgType::output, i}) &&
!schema.is_mutable({c10::SchemaArgType::output, i})) {
TORCH_CHECK(
aliased_output_idx == -1,
"Fallback ADInplaceOrView kernel expects only a single output in the operator schema to have a "
"non-write alias annotation (i.e., 'Tensor(a)'). "
"Non-composite functions where multiple outputs are aliased with inputs aren't supported."
"Please rewrite your function as a composite function.");
aliased_output_idx = static_cast<int64_t>(i);
}
}
std::optional<size_t> aliased_input_idx;
for (const auto i : c10::irange(num_arguments)) {
if (schema.is_aliasing({c10::SchemaArgType::input, i}) &&
!schema.is_mutable({c10::SchemaArgType::input, i})) {
TORCH_CHECK(
!aliased_input_idx.has_value(),
"Fallback ADInplaceOrView kernel expects only a single input in the operator schema to have a "
"non-write alias annotation (i.e., 'Tensor(a)'). "
"Non-composite functions where multiple inputs are aliased with outputs aren't supported. "
"Please rewrite your function as a composite function.");
aliased_input_idx = i;
const c10::IValue& aliased_input_iv =
(*stack)[stack_start + i]; // get a reference to an ivalue on the
// stack
TORCH_CHECK(aliased_input_iv.isTensor());
aliased_input =
aliased_input_iv.toTensor(); // TODO: Can we avoid saving this tensor
// and incurring the refcount bump?
}
}
// See NOTE [ Limitations of ADInplaceOrView boxed kernel ] above
TORCH_CHECK(
(!aliased_input_idx.has_value() && aliased_output_idx == -1) ||
(aliased_input_idx.has_value() && aliased_input_idx.value() == 0 &&
aliased_output_idx == 0),
"Fallback ADInplaceOrView kernel can only create view relationships between the first "
"input and the first output (the output can be a vector of tensors). Please change the "
"order of your operator's parameters so that this is the case.");
const bool is_view = aliased_input_idx.has_value();
{
at::AutoDispatchBelowADInplaceOrView guard;
op.redispatchBoxed(
dispatch_keys & c10::after_ADInplaceOrView_keyset, stack);
}
for (const auto i : c10::irange(num_returns)) {
if (schema.is_mutable({c10::SchemaArgType::output, i})) {
increment_version((*stack)[stack->size() - num_returns + i].toTensor());
}
}
if (is_view) {
c10::IValue& aliased_output_iv =
(*stack)[stack->size() - num_returns + aliased_output_idx];
// See NOTE [ View + Inplace detection ] for more details about this logic
// We always need this view_func because otherwise if we do in-place
// on this view, we would implicitly use AsStridedBackward instead
// of the NotImplemented node. For the cross-dtype/non-strided
// cases, we would create something like this anyway
auto error_msg =
("Mutating the view " + op_name +
"which does not have a derivative implemented is forbidden.");
auto erroring_view_func = std::make_unique<ErroringViewFunc>(error_msg);
const auto erroring_rev_view_func = [op_name = op_name](const at::Tensor&) {
TORCH_CHECK(
false,
"Accessing the reverse view for ",
op_name,
" which does not have a derivative implemented is forbidden.");
return at::Tensor();
};
if (aliased_output_iv.isTensorList()) {
auto aliased_output = aliased_output_iv.toTensorVector();
for (auto& sub_output : aliased_output) {
as_view(
/* base=*/aliased_input,
/* tensor=*/sub_output,
/* is_bw_differentiable=*/true,
/* is_fw_differentiable=*/true,
/* view_func=*/std::move(erroring_view_func),
/* rev_view_func=*/erroring_rev_view_func,
/* creation_meta=*/
InferenceMode::is_enabled()
? CreationMeta::INFERENCE_MODE
: (at::GradMode::is_enabled() ? CreationMeta::MULTI_OUTPUT_NODE
: CreationMeta::NO_GRAD_MODE));
}
auto result = std::move(aliased_output);
stack->at(stack->size() - num_returns + aliased_output_idx) = result;
} else {
TORCH_CHECK(aliased_output_iv.isTensor());
auto result = as_view(
/* base=*/aliased_input,
/* tensor=*/std::move(aliased_output_iv).toTensor(),
/* is_bw_differentiable=*/true,
/* is_fw_differentiable=*/true,
/* view_func=*/std::move(erroring_view_func),
/* rev_view_func=*/erroring_rev_view_func,
/* creation_meta=*/
InferenceMode::is_enabled()
? CreationMeta::INFERENCE_MODE
: (at::GradMode::is_enabled() ? CreationMeta::DEFAULT
: CreationMeta::NO_GRAD_MODE));
stack->at(stack->size() - num_returns + aliased_output_idx) =
std::move(result);
}
}
}
torch::CppFunction autogradNotImplementedInplaceOrViewFallback() {
return torch::CppFunction::makeFromBoxedFunction<
&autogradNotImplementedInplaceOrViewFallbackImpl>();
}
} // namespace torch::autograd