forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctional.py
2204 lines (1856 loc) · 85.1 KB
/
functional.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# mypy: allow-untyped-defs
import itertools
import operator
from typing import Any, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
import torch
import torch.nn.functional as F
from torch import _VF, Tensor
from torch._C import _add_docstr
from torch._jit_internal import _overload as overload, boolean_dispatch
from torch._lowrank import pca_lowrank, svd_lowrank
from torch.overrides import (
handle_torch_function,
has_torch_function,
has_torch_function_unary,
has_torch_function_variadic,
)
__all__ = [
"atleast_1d",
"atleast_2d",
"atleast_3d",
"align_tensors",
"broadcast_shapes",
"broadcast_tensors",
"cartesian_prod",
"block_diag",
"cdist",
"chain_matmul",
"einsum",
"istft",
"lu",
"norm",
"meshgrid",
"pca_lowrank",
"split",
"stft",
"svd_lowrank",
"tensordot",
"unique",
"unique_consecutive",
"unravel_index",
]
def broadcast_tensors(*tensors):
r"""broadcast_tensors(*tensors) -> List of Tensors
Broadcasts the given tensors according to :ref:`broadcasting-semantics`.
Args:
*tensors: any number of tensors of the same type
.. warning::
More than one element of a broadcasted tensor may refer to a single
memory location. As a result, in-place operations (especially ones that
are vectorized) may result in incorrect behavior. If you need to write
to the tensors, please clone them first.
Example::
>>> x = torch.arange(3).view(1, 3)
>>> y = torch.arange(2).view(2, 1)
>>> a, b = torch.broadcast_tensors(x, y)
>>> a.size()
torch.Size([2, 3])
>>> a
tensor([[0, 1, 2],
[0, 1, 2]])
"""
# This wrapper exists to support variadic args.
if has_torch_function(tensors):
return handle_torch_function(broadcast_tensors, tensors, *tensors)
return _VF.broadcast_tensors(tensors) # type: ignore[attr-defined]
def broadcast_shapes(*shapes):
r"""broadcast_shapes(*shapes) -> Size
Similar to :func:`broadcast_tensors` but for shapes.
This is equivalent to
``torch.broadcast_tensors(*map(torch.empty, shapes))[0].shape``
but avoids the need create to intermediate tensors. This is useful for
broadcasting tensors of common batch shape but different rightmost shape,
e.g. to broadcast mean vectors with covariance matrices.
Example::
>>> torch.broadcast_shapes((2,), (3, 1), (1, 1, 1))
torch.Size([1, 3, 2])
Args:
\*shapes (torch.Size): Shapes of tensors.
Returns:
shape (torch.Size): A shape compatible with all input shapes.
Raises:
RuntimeError: If shapes are incompatible.
"""
# This wrapper exists to support variadic args.
# TODO Move this to C++ once the jit has better support for torch.Size.
if not torch.jit.is_tracing():
max_len = 0
for shape in shapes:
if isinstance(shape, (int, torch.SymInt)):
if max_len < 1:
max_len = 1
elif isinstance(shape, (tuple, list)):
s = len(shape)
if max_len < s:
max_len = s
result = [1] * max_len
from torch.fx.experimental.symbolic_shapes import guard_size_oblivious
for shape in shapes:
if isinstance(shape, (int, torch.SymInt)):
shape = (shape,)
if isinstance(shape, (tuple, list)):
for i in range(-1, -1 - len(shape), -1):
if shape[i] < 0:
raise RuntimeError(
f"Trying to create tensor with negative dimension ({shape[i]}): ({shape[i]})"
)
# NB: result is initialized to 1 so this is effectively an
# equals one test
if guard_size_oblivious(shape[i] == 1) or guard_size_oblivious(
shape[i] == result[i]
):
continue
if result[i] != 1:
raise RuntimeError(
"Shape mismatch: objects cannot be broadcast to a single shape"
)
result[i] = shape[i]
else:
raise RuntimeError(
"Input shapes should be of type ints, a tuple of ints, or a list of ints, got ",
shape,
)
return torch.Size(result)
else:
# with implementation above, torch.jit.trace hardcodes the sizes which makes subsequent replays fail
with torch.no_grad():
scalar = torch.zeros((), device="cpu")
tensors = [scalar.expand(shape) for shape in shapes]
tensors = broadcast_tensors(*tensors)
return tensors[0].shape
def split(
tensor: Tensor,
split_size_or_sections: Union[int, List[int]],
dim: int = 0,
) -> Tuple[Tensor, ...]:
r"""Splits the tensor into chunks. Each chunk is a view of the original tensor.
If :attr:`split_size_or_sections` is an integer type, then :attr:`tensor` will
be split into equally sized chunks (if possible). Last chunk will be smaller if
the tensor size along the given dimension :attr:`dim` is not divisible by
:attr:`split_size`.
If :attr:`split_size_or_sections` is a list, then :attr:`tensor` will be split
into ``len(split_size_or_sections)`` chunks with sizes in :attr:`dim` according
to :attr:`split_size_or_sections`.
Args:
tensor (Tensor): tensor to split.
split_size_or_sections (int) or (list(int)): size of a single chunk or
list of sizes for each chunk
dim (int): dimension along which to split the tensor.
Example::
>>> a = torch.arange(10).reshape(5, 2)
>>> a
tensor([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])
>>> torch.split(a, 2)
(tensor([[0, 1],
[2, 3]]),
tensor([[4, 5],
[6, 7]]),
tensor([[8, 9]]))
>>> torch.split(a, [1, 4])
(tensor([[0, 1]]),
tensor([[2, 3],
[4, 5],
[6, 7],
[8, 9]]))
"""
if has_torch_function_unary(tensor):
return handle_torch_function(
split, (tensor,), tensor, split_size_or_sections, dim=dim
)
# Overwriting reason:
# This dispatches to two ATen functions depending on the type of
# split_size_or_sections. The branching code is in _tensor.py, which we
# call here.
return tensor.split(split_size_or_sections, dim)
def einsum(*args: Any) -> Tensor:
r"""einsum(equation, *operands) -> Tensor
Sums the product of the elements of the input :attr:`operands` along dimensions specified using a notation
based on the Einstein summation convention.
Einsum allows computing many common multi-dimensional linear algebraic array operations by representing them
in a short-hand format based on the Einstein summation convention, given by :attr:`equation`. The details of
this format are described below, but the general idea is to label every dimension of the input :attr:`operands`
with some subscript and define which subscripts are part of the output. The output is then computed by summing
the product of the elements of the :attr:`operands` along the dimensions whose subscripts are not part of the
output. For example, matrix multiplication can be computed using einsum as `torch.einsum("ij,jk->ik", A, B)`.
Here, j is the summation subscript and i and k the output subscripts (see section below for more details on why).
Equation:
The :attr:`equation` string specifies the subscripts (letters in `[a-zA-Z]`) for each dimension of
the input :attr:`operands` in the same order as the dimensions, separating subscripts for each operand by a
comma (','), e.g. `'ij,jk'` specify subscripts for two 2D operands. The dimensions labeled with the same subscript
must be broadcastable, that is, their size must either match or be `1`. The exception is if a subscript is
repeated for the same input operand, in which case the dimensions labeled with this subscript for this operand
must match in size and the operand will be replaced by its diagonal along these dimensions. The subscripts that
appear exactly once in the :attr:`equation` will be part of the output, sorted in increasing alphabetical order.
The output is computed by multiplying the input :attr:`operands` element-wise, with their dimensions aligned based
on the subscripts, and then summing out the dimensions whose subscripts are not part of the output.
Optionally, the output subscripts can be explicitly defined by adding an arrow ('->') at the end of the equation
followed by the subscripts for the output. For instance, the following equation computes the transpose of a
matrix multiplication: 'ij,jk->ki'. The output subscripts must appear at least once for some input operand and
at most once for the output.
Ellipsis ('...') can be used in place of subscripts to broadcast the dimensions covered by the ellipsis.
Each input operand may contain at most one ellipsis which will cover the dimensions not covered by subscripts,
e.g. for an input operand with 5 dimensions, the ellipsis in the equation `'ab...c'` cover the third and fourth
dimensions. The ellipsis does not need to cover the same number of dimensions across the :attr:`operands` but the
'shape' of the ellipsis (the size of the dimensions covered by them) must broadcast together. If the output is not
explicitly defined with the arrow ('->') notation, the ellipsis will come first in the output (left-most dimensions),
before the subscript labels that appear exactly once for the input operands. e.g. the following equation implements
batch matrix multiplication `'...ij,...jk'`.
A few final notes: the equation may contain whitespaces between the different elements (subscripts, ellipsis,
arrow and comma) but something like `'. . .'` is not valid. An empty string `''` is valid for scalar operands.
.. note::
``torch.einsum`` handles ellipsis ('...') differently from NumPy in that it allows dimensions
covered by the ellipsis to be summed over, that is, ellipsis are not required to be part of the output.
.. note::
This function uses opt_einsum (https://optimized-einsum.readthedocs.io/en/stable/) to speed up computation or to
consume less memory by optimizing contraction order. This optimization occurs when there are at least three
inputs, since the order does not matter otherwise. Note that finding _the_ optimal path is an NP-hard problem,
thus, opt_einsum relies on different heuristics to achieve near-optimal results. If opt_einsum is not available,
the default order is to contract from left to right.
To bypass this default behavior, add the following line to disable the usage of opt_einsum and skip path
calculation: `torch.backends.opt_einsum.enabled = False`
To specify which strategy you'd like for opt_einsum to compute the contraction path, add the following line:
`torch.backends.opt_einsum.strategy = 'auto'`. The default strategy is 'auto', and we also support 'greedy' and
'optimal'. Disclaimer that the runtime of 'optimal' is factorial in the number of inputs! See more details in
the opt_einsum documentation (https://optimized-einsum.readthedocs.io/en/stable/path_finding.html).
.. note::
As of PyTorch 1.10 :func:`torch.einsum` also supports the sublist format (see examples below). In this format,
subscripts for each operand are specified by sublists, list of integers in the range [0, 52). These sublists
follow their operands, and an extra sublist can appear at the end of the input to specify the output's
subscripts., e.g. `torch.einsum(op1, sublist1, op2, sublist2, ..., [subslist_out])`. Python's `Ellipsis` object
may be provided in a sublist to enable broadcasting as described in the Equation section above.
Args:
equation (str): The subscripts for the Einstein summation.
operands (List[Tensor]): The tensors to compute the Einstein summation of.
Examples::
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # trace
>>> torch.einsum('ii', torch.randn(4, 4))
tensor(-1.2104)
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # diagonal
>>> torch.einsum('ii->i', torch.randn(4, 4))
tensor([-0.1034, 0.7952, -0.2433, 0.4545])
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # outer product
>>> x = torch.randn(5)
>>> y = torch.randn(4)
>>> torch.einsum('i,j->ij', x, y)
tensor([[ 0.1156, -0.2897, -0.3918, 0.4963],
[-0.3744, 0.9381, 1.2685, -1.6070],
[ 0.7208, -1.8058, -2.4419, 3.0936],
[ 0.1713, -0.4291, -0.5802, 0.7350],
[ 0.5704, -1.4290, -1.9323, 2.4480]])
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # batch matrix multiplication
>>> As = torch.randn(3, 2, 5)
>>> Bs = torch.randn(3, 5, 4)
>>> torch.einsum('bij,bjk->bik', As, Bs)
tensor([[[-1.0564, -1.5904, 3.2023, 3.1271],
[-1.6706, -0.8097, -0.8025, -2.1183]],
[[ 4.2239, 0.3107, -0.5756, -0.2354],
[-1.4558, -0.3460, 1.5087, -0.8530]],
[[ 2.8153, 1.8787, -4.3839, -1.2112],
[ 0.3728, -2.1131, 0.0921, 0.8305]]])
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # with sublist format and ellipsis
>>> torch.einsum(As, [..., 0, 1], Bs, [..., 1, 2], [..., 0, 2])
tensor([[[-1.0564, -1.5904, 3.2023, 3.1271],
[-1.6706, -0.8097, -0.8025, -2.1183]],
[[ 4.2239, 0.3107, -0.5756, -0.2354],
[-1.4558, -0.3460, 1.5087, -0.8530]],
[[ 2.8153, 1.8787, -4.3839, -1.2112],
[ 0.3728, -2.1131, 0.0921, 0.8305]]])
>>> # batch permute
>>> A = torch.randn(2, 3, 4, 5)
>>> torch.einsum('...ij->...ji', A).shape
torch.Size([2, 3, 5, 4])
>>> # equivalent to torch.nn.functional.bilinear
>>> A = torch.randn(3, 5, 4)
>>> l = torch.randn(2, 5)
>>> r = torch.randn(2, 4)
>>> torch.einsum('bn,anm,bm->ba', l, A, r)
tensor([[-0.3430, -5.2405, 0.4494],
[ 0.3311, 5.5201, -3.0356]])
"""
import torch.backends.opt_einsum as opt_einsum
# This wrapper exists to support variadic args.
if len(args) < 2:
raise ValueError(
"einsum(): must specify the equation string and at least one operand, "
"or at least one operand and its subscripts list"
)
equation = None
operands = None
if isinstance(args[0], torch.Tensor):
# Convert the subscript list format which is an interleaving of operand and its subscripts
# list with an optional output subscripts list at the end (see documentation for more details on this)
# to the equation string format by creating the equation string from the subscripts list and grouping the
# input operands into a tensorlist (List[Tensor]).
def parse_subscript(n: int) -> str:
if n == Ellipsis:
return "..."
if n >= 0 and n < 26:
return chr(ord("A") + n)
if n >= 26 and n < 52:
return chr(ord("a") + n - 26)
raise ValueError(
"einsum(): subscript in subscript list is not within the valid range [0, 52)"
)
# Parse subscripts for input operands
equation = ",".join("".join(parse_subscript(s) for s in l) for l in args[1::2])
# Parse optional output subscripts (provided when the number of arguments is odd)
if len(args) % 2 == 1:
equation += "->" + "".join(parse_subscript(s) for s in args[-1])
operands = args[:-1:2]
else:
operands = args[::2]
else:
equation = args[0]
operands = args[1:]
if has_torch_function(operands):
return handle_torch_function(einsum, operands, equation, *operands)
if len(operands) == 1 and isinstance(operands[0], (list, tuple)):
# the old interface of passing the operands as one list argument
_operands = operands[0]
# recurse incase operands contains value that has torch function
# in the original implementation this line is omitted
return einsum(equation, *_operands)
if len(operands) <= 2 or not opt_einsum.enabled:
# the path for contracting 0 or 1 time(s) is already optimized
# or the user has disabled using opt_einsum
return _VF.einsum(equation, operands) # type: ignore[attr-defined]
path = None
if opt_einsum.is_available():
_opt_einsum = opt_einsum.get_opt_einsum()
tupled_path = _opt_einsum.contract_path(
equation, *operands, optimize=opt_einsum.strategy
)[0]
# flatten path for dispatching to C++
path = [item for pair in tupled_path for item in pair]
return _VF.einsum(equation, operands, path=path) # type: ignore[attr-defined]
# This wrapper exists to support variadic args.
if TYPE_CHECKING:
# The JIT doesn't understand Union, so only add type annotation for mypy
def meshgrid(
*tensors: Union[Tensor, List[Tensor]], indexing: Optional[str] = None
) -> Tuple[Tensor, ...]:
return _meshgrid(*tensors, indexing=indexing)
else:
def meshgrid(*tensors, indexing: Optional[str] = None) -> Tuple[Tensor, ...]:
r"""Creates grids of coordinates specified by the 1D inputs in `attr`:tensors.
This is helpful when you want to visualize data over some
range of inputs. See below for a plotting example.
Given :math:`N` 1D tensors :math:`T_0 \ldots T_{N-1}` as
inputs with corresponding sizes :math:`S_0 \ldots S_{N-1}`,
this creates :math:`N` N-dimensional tensors :math:`G_0 \ldots
G_{N-1}`, each with shape :math:`(S_0, ..., S_{N-1})` where
the output :math:`G_i` is constructed by expanding :math:`T_i`
to the result shape.
.. note::
0D inputs are treated equivalently to 1D inputs of a
single element.
.. warning::
`torch.meshgrid(*tensors)` currently has the same behavior
as calling `numpy.meshgrid(*arrays, indexing='ij')`.
In the future `torch.meshgrid` will transition to
`indexing='xy'` as the default.
https://github.com/pytorch/pytorch/issues/50276 tracks
this issue with the goal of migrating to NumPy's behavior.
.. seealso::
:func:`torch.cartesian_prod` has the same effect but it
collects the data in a tensor of vectors.
Args:
tensors (list of Tensor): list of scalars or 1 dimensional tensors. Scalars will be
treated as tensors of size :math:`(1,)` automatically
indexing: (str, optional): the indexing mode, either "xy"
or "ij", defaults to "ij". See warning for future changes.
If "xy" is selected, the first dimension corresponds
to the cardinality of the second input and the second
dimension corresponds to the cardinality of the first
input.
If "ij" is selected, the dimensions are in the same
order as the cardinality of the inputs.
Returns:
seq (sequence of Tensors): If the input has :math:`N`
tensors of size :math:`S_0 \ldots S_{N-1}``, then the
output will also have :math:`N` tensors, where each tensor
is of shape :math:`(S_0, ..., S_{N-1})`.
Example::
>>> x = torch.tensor([1, 2, 3])
>>> y = torch.tensor([4, 5, 6])
Observe the element-wise pairings across the grid, (1, 4),
(1, 5), ..., (3, 6). This is the same thing as the
cartesian product.
>>> grid_x, grid_y = torch.meshgrid(x, y, indexing='ij')
>>> grid_x
tensor([[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
>>> grid_y
tensor([[4, 5, 6],
[4, 5, 6],
[4, 5, 6]])
This correspondence can be seen when these grids are
stacked properly.
>>> torch.equal(torch.cat(tuple(torch.dstack([grid_x, grid_y]))),
... torch.cartesian_prod(x, y))
True
`torch.meshgrid` is commonly used to produce a grid for
plotting.
>>> # xdoctest: +REQUIRES(module:matplotlib)
>>> # xdoctest: +REQUIRES(env:DOCTEST_SHOW)
>>> import matplotlib.pyplot as plt
>>> xs = torch.linspace(-5, 5, steps=100)
>>> ys = torch.linspace(-5, 5, steps=100)
>>> x, y = torch.meshgrid(xs, ys, indexing='xy')
>>> z = torch.sin(torch.sqrt(x * x + y * y))
>>> ax = plt.axes(projection='3d')
>>> ax.plot_surface(x.numpy(), y.numpy(), z.numpy())
>>> plt.show()
.. image:: ../_static/img/meshgrid.png
:width: 512
"""
return _meshgrid(*tensors, indexing=indexing)
def _meshgrid(*tensors, indexing: Optional[str]):
if has_torch_function(tensors):
return handle_torch_function(meshgrid, tensors, *tensors, indexing=indexing)
if len(tensors) == 1 and isinstance(tensors[0], (list, tuple)):
# the old interface of passing the operands as one list argument
tensors = tensors[0] # type: ignore[assignment]
# Continue allowing call of old method that takes no indexing
# kwarg for forward compatibility reasons.
#
# Remove this two weeks after landing.
kwargs = {} if indexing is None else {"indexing": indexing}
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
def stft(
input: Tensor,
n_fft: int,
hop_length: Optional[int] = None,
win_length: Optional[int] = None,
window: Optional[Tensor] = None,
center: bool = True,
pad_mode: str = "reflect",
normalized: bool = False,
onesided: Optional[bool] = None,
return_complex: Optional[bool] = None,
) -> Tensor:
r"""Short-time Fourier transform (STFT).
.. warning::
From version 1.8.0, :attr:`return_complex` must always be given
explicitly for real inputs and `return_complex=False` has been
deprecated. Strongly prefer `return_complex=True` as in a future
pytorch release, this function will only return complex tensors.
Note that :func:`torch.view_as_real` can be used to recover a real
tensor with an extra last dimension for real and imaginary components.
.. warning::
From version 2.1, a warning will be provided if a :attr:`window` is
not specified. In a future release, this attribute will be required.
Not providing a window currently defaults to using a rectangular window,
which may result in undesirable artifacts. Consider using tapered windows,
such as :func:`torch.hann_window`.
The STFT computes the Fourier transform of short overlapping windows of the
input. This giving frequency components of the signal as they change over
time. The interface of this function is modeled after (but *not* a drop-in
replacement for) librosa_ stft function.
.. _librosa: https://librosa.org/doc/latest/generated/librosa.stft.html
Ignoring the optional batch dimension, this method computes the following
expression:
.. math::
X[\omega, m] = \sum_{k = 0}^{\text{win\_length-1}}%
\text{window}[k]\ \text{input}[m \times \text{hop\_length} + k]\ %
\exp\left(- j \frac{2 \pi \cdot \omega k}{\text{n\_fft}}\right),
where :math:`m` is the index of the sliding window, and :math:`\omega` is
the frequency :math:`0 \leq \omega < \text{n\_fft}` for ``onesided=False``,
or :math:`0 \leq \omega < \lfloor \text{n\_fft} / 2 \rfloor + 1` for ``onesided=True``.
* :attr:`input` must be either a 1-D time sequence or a 2-D batch of time
sequences.
* If :attr:`hop_length` is ``None`` (default), it is treated as equal to
``floor(n_fft / 4)``.
* If :attr:`win_length` is ``None`` (default), it is treated as equal to
:attr:`n_fft`.
* :attr:`window` can be a 1-D tensor of size :attr:`win_length`, e.g., from
:meth:`torch.hann_window`. If :attr:`window` is ``None`` (default), it is
treated as if having :math:`1` everywhere in the window. If
:math:`\text{win\_length} < \text{n\_fft}`, :attr:`window` will be padded on
both sides to length :attr:`n_fft` before being applied.
* If :attr:`center` is ``True`` (default), :attr:`input` will be padded on
both sides so that the :math:`t`-th frame is centered at time
:math:`t \times \text{hop\_length}`. Otherwise, the :math:`t`-th frame
begins at time :math:`t \times \text{hop\_length}`.
* :attr:`pad_mode` determines the padding method used on :attr:`input` when
:attr:`center` is ``True``. See :meth:`torch.nn.functional.pad` for
all available options. Default is ``"reflect"``.
* If :attr:`onesided` is ``True`` (default for real input), only values for
:math:`\omega` in :math:`\left[0, 1, 2, \dots, \left\lfloor
\frac{\text{n\_fft}}{2} \right\rfloor + 1\right]` are returned because
the real-to-complex Fourier transform satisfies the conjugate symmetry,
i.e., :math:`X[m, \omega] = X[m, \text{n\_fft} - \omega]^*`.
Note if the input or window tensors are complex, then :attr:`onesided`
output is not possible.
* If :attr:`normalized` is ``True`` (default is ``False``), the function
returns the normalized STFT results, i.e., multiplied by :math:`(\text{frame\_length})^{-0.5}`.
* If :attr:`return_complex` is ``True`` (default if input is complex), the
return is a ``input.dim() + 1`` dimensional complex tensor. If ``False``,
the output is a ``input.dim() + 2`` dimensional real tensor where the last
dimension represents the real and imaginary components.
Returns either a complex tensor of size :math:`(* \times N \times T)` if
:attr:`return_complex` is true, or a real tensor of size :math:`(* \times N
\times T \times 2)`. Where :math:`*` is the optional batch size of
:attr:`input`, :math:`N` is the number of frequencies where STFT is applied
and :math:`T` is the total number of frames used.
.. warning::
This function changed signature at version 0.4.1. Calling with the
previous signature may cause error or return incorrect result.
Args:
input (Tensor): the input tensor of shape `(B?, L)` where `B?` is an optional
batch dimension
n_fft (int): size of Fourier transform
hop_length (int, optional): the distance between neighboring sliding window
frames. Default: ``None`` (treated as equal to ``floor(n_fft / 4)``)
win_length (int, optional): the size of window frame and STFT filter.
Default: ``None`` (treated as equal to :attr:`n_fft`)
window (Tensor, optional): the optional window function.
Shape must be 1d and `<= n_fft`
Default: ``None`` (treated as window of all :math:`1` s)
center (bool, optional): whether to pad :attr:`input` on both sides so
that the :math:`t`-th frame is centered at time :math:`t \times \text{hop\_length}`.
Default: ``True``
pad_mode (str, optional): controls the padding method used when
:attr:`center` is ``True``. Default: ``"reflect"``
normalized (bool, optional): controls whether to return the normalized STFT results
Default: ``False``
onesided (bool, optional): controls whether to return half of results to
avoid redundancy for real inputs.
Default: ``True`` for real :attr:`input` and :attr:`window`, ``False`` otherwise.
return_complex (bool, optional): whether to return a complex tensor, or
a real tensor with an extra last dimension for the real and
imaginary components.
.. versionchanged:: 2.0
``return_complex`` is now a required argument for real inputs,
as the default is being transitioned to ``True``.
.. deprecated:: 2.0
``return_complex=False`` is deprecated, instead use ``return_complex=True``
Note that calling :func:`torch.view_as_real` on the output will
recover the deprecated output format.
Returns:
Tensor: A tensor containing the STFT result with shape `(B?, N, T, C?)` where
- `B?` is an optional batch dimension from the input.
- `N` is the number of frequency samples, `(n_fft // 2) + 1` for
`onesided=True`, or otherwise `n_fft`.
- `T` is the number of frames, `1 + L // hop_length`
for `center=True`, or `1 + (L - n_fft) // hop_length` otherwise.
- `C?` is an optional length-2 dimension of real and imaginary
components, present when `return_complex=False`.
"""
if has_torch_function_unary(input):
return handle_torch_function(
stft,
(input,),
input,
n_fft,
hop_length=hop_length,
win_length=win_length,
window=window,
center=center,
pad_mode=pad_mode,
normalized=normalized,
onesided=onesided,
return_complex=return_complex,
)
# NOTE: Do not edit. This code will be removed once the forward-compatibility
# period is over for PR #73432
if center:
signal_dim = input.dim()
extended_shape = [1] * (3 - signal_dim) + list(input.size())
pad = int(n_fft // 2)
input = F.pad(input.view(extended_shape), [pad, pad], pad_mode)
input = input.view(input.shape[-signal_dim:])
return _VF.stft( # type: ignore[attr-defined]
input,
n_fft,
hop_length,
win_length,
window,
normalized,
onesided,
return_complex,
)
istft = _add_docstr(
torch.istft,
"istft(input, n_fft, hop_length=None, win_length=None, window=None, center=True, "
"normalized=False, onesided=None, length=None, return_complex=False) -> Tensor:\n"
r"""
Inverse short time Fourier Transform. This is expected to be the inverse of :func:`~torch.stft`.
.. warning::
From version 2.1, a warning will be provided if a :attr:`window` is
not specified. In a future release, this attribute will be required.
Please provide the same window used in the stft call.
It has the same parameters (+ additional optional parameter of :attr:`length`) and it should return the
least squares estimation of the original signal. The algorithm will check using the NOLA condition (
nonzero overlap).
Important consideration in the parameters :attr:`window` and :attr:`center` so that the envelope
created by the summation of all the windows is never zero at certain point in time. Specifically,
:math:`\sum_{t=-\infty}^{\infty} |w|^2[n-t\times hop\_length] \cancel{=} 0`.
Since :func:`~torch.stft` discards elements at the end of the signal if they do not fit in a frame,
``istft`` may return a shorter signal than the original signal (can occur if :attr:`center` is False
since the signal isn't padded). If `length` is given in the arguments and is longer than expected,
``istft`` will pad zeros to the end of the returned signal.
If :attr:`center` is ``True``, then there will be padding e.g. ``'constant'``, ``'reflect'``, etc.
Left padding can be trimmed off exactly because they can be calculated but right padding cannot be
calculated without additional information.
Example: Suppose the last window is:
``[17, 18, 0, 0, 0]`` vs ``[18, 0, 0, 0, 0]``
The :attr:`n_fft`, :attr:`hop_length`, :attr:`win_length` are all the same which prevents the calculation
of right padding. These additional values could be zeros or a reflection of the signal so providing
:attr:`length` could be useful. If :attr:`length` is ``None`` then padding will be aggressively removed
(some loss of signal).
[1] D. W. Griffin and J. S. Lim, "Signal estimation from modified short-time Fourier transform,"
IEEE Trans. ASSP, vol.32, no.2, pp.236-243, Apr. 1984.
Args:
input (Tensor): The input tensor. Expected to be in the format of :func:`~torch.stft`,
output. That is a complex tensor of shape `(B?, N, T)` where
- `B?` is an optional batch dimension
- `N` is the number of frequency samples, `(n_fft // 2) + 1`
for onesided input, or otherwise `n_fft`.
- `T` is the number of frames, `1 + length // hop_length` for centered stft,
or `1 + (length - n_fft) // hop_length` otherwise.
.. versionchanged:: 2.0
Real datatype inputs are no longer supported. Input must now have a
complex datatype, as returned by ``stft(..., return_complex=True)``.
n_fft (int): Size of Fourier transform
hop_length (Optional[int]): The distance between neighboring sliding window frames.
(Default: ``n_fft // 4``)
win_length (Optional[int]): The size of window frame and STFT filter. (Default: ``n_fft``)
window (Optional[torch.Tensor]): The optional window function.
Shape must be 1d and `<= n_fft`
(Default: ``torch.ones(win_length)``)
center (bool): Whether :attr:`input` was padded on both sides so that the :math:`t`-th frame is
centered at time :math:`t \times \text{hop\_length}`.
(Default: ``True``)
normalized (bool): Whether the STFT was normalized. (Default: ``False``)
onesided (Optional[bool]): Whether the STFT was onesided.
(Default: ``True`` if `n_fft != fft_size` in the input size)
length (Optional[int]): The amount to trim the signal by (i.e. the
original signal length). Defaults to `(T - 1) * hop_length` for
centered stft, or `n_fft + (T - 1) * hop_length` otherwise, where `T`
is the number of input frames.
return_complex (Optional[bool]):
Whether the output should be complex, or if the input should be
assumed to derive from a real signal and window.
Note that this is incompatible with ``onesided=True``.
(Default: ``False``)
Returns:
Tensor: Least squares estimation of the original signal of shape `(B?, length)` where
`B?` is an optional batch dimension from the input tensor.
""",
)
if TYPE_CHECKING:
# These _impl functions return a variable number of tensors as output with
# __torch_function__; tuple unpacking is done already rather than being
# done by the caller of the _impl function
_unique_impl_out = Any
else:
_unique_impl_out = Tuple[Tensor, Tensor, Tensor]
def _unique_impl(
input: Tensor,
sorted: bool = True,
return_inverse: bool = False,
return_counts: bool = False,
dim: Optional[int] = None,
) -> _unique_impl_out:
r"""unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None) -> Tuple[Tensor, Tensor, Tensor]
Returns the unique elements of the input tensor.
.. note:: This function is different from :func:`torch.unique_consecutive` in the sense that
this function also eliminates non-consecutive duplicate values.
.. note:: Currently in the CUDA implementation and the CPU implementation,
`torch.unique` always sort the tensor at the beginning regardless of the `sort` argument.
Sorting could be slow, so if your input tensor is already sorted, it is recommended to use
:func:`torch.unique_consecutive` which avoids the sorting.
Args:
input (Tensor): the input tensor
sorted (bool): Whether to sort the unique elements in ascending order
before returning as output.
return_inverse (bool): Whether to also return the indices for where
elements in the original input ended up in the returned unique list.
return_counts (bool): Whether to also return the counts for each unique
element.
dim (int, optional): the dimension to operate upon. If ``None``, the
unique of the flattened input is returned. Otherwise, each of the
tensors indexed by the given dimension is treated as one of the
elements to apply the unique operation upon. See examples for more
details. Default: ``None``
Returns:
(Tensor, Tensor (optional), Tensor (optional)): A tensor or a tuple of tensors containing
- **output** (*Tensor*): the output list of unique scalar elements.
- **inverse_indices** (*Tensor*): (optional) if
:attr:`return_inverse` is True, there will be an additional
returned tensor (same shape as input) representing the indices
for where elements in the original input map to in the output;
otherwise, this function will only return a single tensor.
- **counts** (*Tensor*): (optional) if
:attr:`return_counts` is True, there will be an additional
returned tensor (same shape as output or output.size(dim),
if dim was specified) representing the number of occurrences
for each unique value or tensor.
Example::
>>> output = torch.unique(torch.tensor([1, 3, 2, 3], dtype=torch.long))
>>> output
tensor([1, 2, 3])
>>> output, inverse_indices = torch.unique(
... torch.tensor([1, 3, 2, 3], dtype=torch.long), sorted=True, return_inverse=True)
>>> output
tensor([1, 2, 3])
>>> inverse_indices
tensor([0, 2, 1, 2])
>>> output, inverse_indices = torch.unique(
... torch.tensor([[1, 3], [2, 3]], dtype=torch.long), sorted=True, return_inverse=True)
>>> output
tensor([1, 2, 3])
>>> inverse_indices
tensor([[0, 2],
[1, 2]])
>>> a = torch.tensor([
... [
... [1, 1, 0, 0],
... [1, 1, 0, 0],
... [0, 0, 1, 1],
... ],
... [
... [0, 0, 1, 1],
... [0, 0, 1, 1],
... [1, 1, 1, 1],
... ],
... [
... [1, 1, 0, 0],
... [1, 1, 0, 0],
... [0, 0, 1, 1],
... ],
... ])
>>> # If we call `torch.unique(a, dim=0)`, each of the tensors `a[idx, :, :]`
>>> # will be compared. We can see that `a[0, :, :]` and `a[2, :, :]` match
>>> # each other, so one of them will be removed.
>>> (a[0, :, :] == a[2, :, :]).all()
tensor(True)
>>> a_unique_dim0 = torch.unique(a, dim=0)
>>> a_unique_dim0
tensor([[[0, 0, 1, 1],
[0, 0, 1, 1],
[1, 1, 1, 1]],
[[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 1, 1]]])
>>> # Notice which sub-tensors from `a` match with the sub-tensors from
>>> # `a_unique_dim0`:
>>> (a_unique_dim0[0, :, :] == a[1, :, :]).all()
tensor(True)
>>> (a_unique_dim0[1, :, :] == a[0, :, :]).all()
tensor(True)
>>> # For `torch.unique(a, dim=1)`, each of the tensors `a[:, idx, :]` are
>>> # compared. `a[:, 0, :]` and `a[:, 1, :]` match each other, so one of
>>> # them will be removed.
>>> (a[:, 0, :] == a[:, 1, :]).all()
tensor(True)
>>> torch.unique(a, dim=1)
tensor([[[0, 0, 1, 1],
[1, 1, 0, 0]],
[[1, 1, 1, 1],
[0, 0, 1, 1]],
[[0, 0, 1, 1],
[1, 1, 0, 0]]])
>>> # For `torch.unique(a, dim=2)`, the tensors `a[:, :, idx]` are compared.
>>> # `a[:, :, 0]` and `a[:, :, 1]` match each other. Also, `a[:, :, 2]` and
>>> # `a[:, :, 3]` match each other as well. So in this case, two of the
>>> # sub-tensors will be removed.
>>> (a[:, :, 0] == a[:, :, 1]).all()
tensor(True)
>>> (a[:, :, 2] == a[:, :, 3]).all()
tensor(True)
>>> torch.unique(a, dim=2)
tensor([[[0, 1],
[0, 1],
[1, 0]],
[[1, 0],
[1, 0],
[1, 1]],
[[0, 1],
[0, 1],
[1, 0]]])
"""
if has_torch_function_unary(input):
return handle_torch_function(
unique,
(input,),
input,
sorted=sorted,
return_inverse=return_inverse,
return_counts=return_counts,
dim=dim,
)
if dim is not None:
output, inverse_indices, counts = _VF.unique_dim(
input,
dim,
sorted=sorted,
return_inverse=return_inverse,
return_counts=return_counts,
)
else:
output, inverse_indices, counts = torch._unique2(
input,
sorted=sorted,
return_inverse=return_inverse,
return_counts=return_counts,
)
return output, inverse_indices, counts
def _unique_consecutive_impl(
input: Tensor,
return_inverse: bool = False,
return_counts: bool = False,
dim: Optional[int] = None,
) -> _unique_impl_out:
r"""Eliminates all but the first element from every consecutive group of equivalent elements.
.. note:: This function is different from :func:`torch.unique` in the sense that this function
only eliminates consecutive duplicate values. This semantics is similar to `std::unique`
in C++.
Args:
input (Tensor): the input tensor
return_inverse (bool): Whether to also return the indices for where
elements in the original input ended up in the returned unique list.
return_counts (bool): Whether to also return the counts for each unique
element.
dim (int): the dimension to apply unique. If ``None``, the unique of the
flattened input is returned. default: ``None``
Returns:
(Tensor, Tensor (optional), Tensor (optional)): A tensor or a tuple of tensors containing