pytorch · vmoens · Nov 23, 2024 · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/docs/source/reference/tensorclass.rst b/docs/source/reference/tensorclass.rst
@@ -282,6 +282,7 @@ Here is an example:
     TensorClass
     NonTensorData
     NonTensorStack
+    from_dataclass
 
 Auto-casting
 ------------

diff --git a/tensordict/__init__.py b/tensordict/__init__.py
@@ -43,6 +43,7 @@
 from tensordict.memmap import MemoryMappedTensor
 from tensordict.persistent import PersistentTensorDict
 from tensordict.tensorclass import (
+    from_dataclass,
     NonTensorData,
     NonTensorStack,
     tensorclass,

diff --git a/tensordict/_lazy.py b/tensordict/_lazy.py
@@ -329,15 +329,38 @@ def _reduce_get_metadata(self):
     @classmethod
     def from_dict(
         cls,
-        input_dict,
+        input_dict: List[Dict[NestedKey, Any]],
+        *other,
+        auto_batch_size: bool = False,
         batch_size=None,
         device=None,
         batch_dims=None,
         stack_dim_name=None,
         stack_dim=0,
     ):
+        # if batch_size is not None:
+        #     batch_size = list(batch_size)
+        #     if stack_dim is None:
+        #         stack_dim = 0
+        #     n = batch_size.pop(stack_dim)
+        #     if n != len(input_dict):
+        #         raise ValueError(
+        #             "The number of dicts and the corresponding batch-size must match, "
+        #             f"got len(input_dict)={len(input_dict)} and batch_size[{stack_dim}]={n}."
+        #         )
+        #     batch_size = torch.Size(batch_size)
         return LazyStackedTensorDict(
-            *(input_dict[str(i)] for i in range(len(input_dict))),
+            *(
+                TensorDict.from_dict(
+                    input_dict[str(i)],
+                    *other,
+                    auto_batch_size=auto_batch_size,
+                    device=device,
+                    batch_dims=batch_dims,
+                    batch_size=batch_size,
+                )
+                for i in range(len(input_dict))
+            ),
             stack_dim=stack_dim,
             stack_dim_name=stack_dim_name,
         )

diff --git a/tensordict/_td.py b/tensordict/_td.py
@@ -615,7 +615,7 @@ def __ne__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other != self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -639,7 +639,7 @@ def __xor__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other ^ self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -663,7 +663,7 @@ def __or__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other | self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -687,7 +687,7 @@ def __eq__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other == self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -709,7 +709,7 @@ def __ge__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other <= self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -731,7 +731,7 @@ def __gt__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other < self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -753,7 +753,7 @@ def __le__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other >= self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -775,7 +775,7 @@ def __lt__(self, other: object) -> T | bool:
         if is_tensorclass(other):
             return other > self
         if isinstance(other, (dict,)):
-            other = self.from_dict_instance(other)
+            other = self.from_dict_instance(other, auto_batch_size=False)
         if _is_tensor_collection(type(other)):
             keys1 = set(self.keys())
             keys2 = set(other.keys())
@@ -1957,8 +1957,46 @@ def _unsqueeze(tensor):
 
     @classmethod
     def from_dict(
-        cls, input_dict, batch_size=None, device=None, batch_dims=None, names=None
+        cls,
+        input_dict,
+        *others,
+        auto_batch_size: bool | None = None,
+        batch_size=None,
+        device=None,
+        batch_dims=None,
+        names=None,
     ):
+        if others:
+            if batch_size is not None:
+                raise TypeError(
+                    "conflicting batch size values. Please use the keyword argument only."
+                )
+            if device is not None:
+                raise TypeError(
+                    "conflicting device values. Please use the keyword argument only."
+                )
+            if batch_dims is not None:
+                raise TypeError(
+                    "conflicting batch_dims values. Please use the keyword argument only."
+                )
+            if names is not None:
+                raise TypeError(
+                    "conflicting names values. Please use the keyword argument only."
+                )
+            warn(
+                "All positional arguments after filename will be deprecated in v0.8. Please use keyword arguments instead.",
+                category=DeprecationWarning,
+            )
+            batch_size, *others = others
+            if len(others):
+                device, *others = others
+                if len(others):
+                    batch_dims, *others = others
+                    if len(others):
+                        names, *others = others
+                        if len(others):
+                            raise TypeError("Too many positional arguments.")
+
         if batch_dims is not None and batch_size is not None:
             raise ValueError(
                 "Cannot pass both batch_size and batch_dims to `from_dict`."
@@ -1967,12 +2005,12 @@ def from_dict(
         batch_size_set = torch.Size(()) if batch_size is None else batch_size
         input_dict = dict(input_dict)
         for key, value in list(input_dict.items()):
-            if isinstance(value, (dict,)):
-                # we don't know if another tensor of smaller size is coming
-                # so we can't be sure that the batch-size will still be valid later
-                input_dict[key] = TensorDict.from_dict(
-                    value, batch_size=[], device=device, batch_dims=None
-                )
+            # we don't know if another tensor of smaller size is coming
+            # so we can't be sure that the batch-size will still be valid later
+            input_dict[key] = TensorDict.from_any(
+                value,
+                auto_batch_size=False,
+            )
         # regular __init__ breaks because a tensor may have the same batch-size as the tensordict
         out = cls(
             input_dict,
@@ -1981,7 +2019,19 @@ def from_dict(
             names=names,
         )
         if batch_size is None:
-            _set_max_batch_size(out, batch_dims)
+            if auto_batch_size is None and batch_dims is None:
+                warn(
+                    "The batch-size was not provided and auto_batch_size isn't set either. "
+                    "Currently, from_dict will call set auto_batch_size=True but this behaviour "
+                    "will be changed in v0.8 and auto_batch_size will be False onward. "
+                    "To silence this warning, pass auto_batch_size directly.",
+                    category=DeprecationWarning,
+                )
+                auto_batch_size = True
+            elif auto_batch_size is None:
+                auto_batch_size = True
+            if auto_batch_size:
+                _set_max_batch_size(out, batch_dims)
         else:
             out.batch_size = batch_size
         return out
@@ -1998,8 +2048,46 @@ def _from_dict_validated(
         )
 
     def from_dict_instance(
-        self, input_dict, batch_size=None, device=None, batch_dims=None, names=None
+        self,
+        input_dict,
+        *others,
+        auto_batch_size: bool | None = None,
+        batch_size=None,
+        device=None,
+        batch_dims=None,
+        names=None,
     ):
+        if others:
+            if batch_size is not None:
+                raise TypeError(
+                    "conflicting batch size values. Please use the keyword argument only."
+                )
+            if device is not None:
+                raise TypeError(
+                    "conflicting device values. Please use the keyword argument only."
+                )
+            if batch_dims is not None:
+                raise TypeError(
+                    "conflicting batch_dims values. Please use the keyword argument only."
+                )
+            if names is not None:
+                raise TypeError(
+                    "conflicting names values. Please use the keyword argument only."
+                )
+            warn(
+                "All positional arguments after filename will be deprecated in v0.8. Please use keyword arguments instead.",
+                category=DeprecationWarning,
+            )
+            batch_size, *others = others
+            if len(others):
+                device, *others = others
+                if len(others):
+                    batch_dims, *others = others
+                    if len(others):
+                        names, *others = others
+                        if len(others):
+                            raise TypeError("Too many positional arguments.")
+
         if batch_dims is not None and batch_size is not None:
             raise ValueError(
                 "Cannot pass both batch_size and batch_dims to `from_dict`."
@@ -2014,22 +2102,45 @@ def from_dict_instance(
                 cur_value = self.get(key, None)
                 if cur_value is not None:
                     input_dict[key] = cur_value.from_dict_instance(
-                        value, batch_size=[], device=device, batch_dims=None
+                        value,
+                        device=device,
+                        auto_batch_size=False,
                     )
                     continue
-                # we don't know if another tensor of smaller size is coming
-                # so we can't be sure that the batch-size will still be valid later
-                input_dict[key] = TensorDict.from_dict(
-                    value, batch_size=[], device=device, batch_dims=None
+                else:
+                    # we don't know if another tensor of smaller size is coming
+                    # so we can't be sure that the batch-size will still be valid later
+                    input_dict[key] = TensorDict.from_dict(
+                        value,
+                        device=device,
+                        auto_batch_size=False,
+                    )
+            else:
+                input_dict[key] = TensorDict.from_any(
+                    value,
+                    auto_batch_size=False,
                 )
+
         out = TensorDict.from_dict(
             input_dict,
             batch_size=batch_size_set,
             device=device,
             names=names,
         )
         if batch_size is None:
-            _set_max_batch_size(out, batch_dims)
+            if auto_batch_size is None and batch_dims is None:
+                warn(
+                    "The batch-size was not provided and auto_batch_size isn't set either. "
+                    "Currently, from_dict will call set auto_batch_size=True but this behaviour "
+                    "will be changed in v0.8 and auto_batch_size will be False onward. "
+                    "To silence this warning, pass auto_batch_size directly.",
+                    category=DeprecationWarning,
+                )
+                auto_batch_size = True
+            elif auto_batch_size is None:
+                auto_batch_size = True
+            if auto_batch_size:
+                _set_max_batch_size(out, batch_dims)
         else:
             out.batch_size = batch_size
         return out
@@ -3857,7 +3968,14 @@ def expand(self, *args: int, inplace: bool = False) -> T:
 
     @classmethod
     def from_dict(
-        cls, input_dict, batch_size=None, device=None, batch_dims=None, names=None
+        cls,
+        input_dict,
+        *others,
+        auto_batch_size: bool = False,
+        batch_size=None,
+        device=None,
+        batch_dims=None,
+        names=None,
     ):
         raise NotImplementedError(f"from_dict not implemented for {cls.__name__}.")
 
@@ -4273,6 +4391,12 @@ def _items(
                 (key, tensordict._get_str(key, NO_DEFAULT))
                 for key in tensordict._source.keys()
             )
+        from tensordict.persistent import PersistentTensorDict
+
+        if isinstance(tensordict, PersistentTensorDict):
+            return (
+                (key, tensordict._get_str(key, NO_DEFAULT)) for key in tensordict.keys()
+            )
         raise NotImplementedError(type(tensordict))
 
     def _keys(self) -> _TensorDictKeysView:
@@ -4697,7 +4821,9 @@ def from_modules(
     )
 
 
-def from_dict(input_dict, batch_size=None, device=None, batch_dims=None, names=None):
+def from_dict(
+    input_dict, *others, batch_size=None, device=None, batch_dims=None, names=None
+):
     """Returns a TensorDict created from a dictionary or another :class:`~.tensordict.TensorDict`.
 
     If ``batch_size`` is not specified, returns the maximum batch size possible.
@@ -4762,6 +4888,7 @@ def from_dict(input_dict, batch_size=None, device=None, batch_dims=None, names=N
     """
     return TensorDict.from_dict(
         input_dict,
+        *others,
         batch_size=batch_size,
         device=device,
         batch_dims=batch_dims,