From a875093d685e4377780e30e8feea7dc158f1b970 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20Andr=C3=A9?= <mail@nunoand.re>
Date: Sun, 13 Dec 2020 06:24:16 +0100
Subject: [PATCH] Add default format detection for Databook.load

---
 docs/api.rst         |   2 +
 src/tablib/core.py   | 235 ++++++++++++++++++++++---------------------
 tests/test_tablib.py |  46 ++++++++-
 3 files changed, 166 insertions(+), 117 deletions(-)
diff --git a/docs/api.rst b/docs/api.rst
index 9d915590..b6767674 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -39,6 +39,8 @@ Functions
 
 .. autofunction:: import_set
 
+.. autofunction:: import_book
+
 
 ----------
 Exceptions
diff --git a/src/tablib/core.py b/src/tablib/core.py
index f19577c3..f30fa8cd 100644
--- a/src/tablib/core.py
+++ b/src/tablib/core.py
@@ -79,16 +79,16 @@ def __contains__(self, item):
 
     @property
     def tuple(self):
-        """Tuple representation of :class:`Row`."""
+        """:class:`tuple` representation of :class:`Row`."""
         return tuple(self._row)
 
     @property
     def list(self):
-        """List representation of :class:`Row`."""
+        """:class:`list` representation of :class:`Row`."""
         return list(self._row)
 
     def has_tag(self, tag):
-        """Returns true if current row contains tag."""
+        """Returns :data:`True: if current row contains :ref:`tag <tags>`."""
 
         if tag is None:
             return False
@@ -102,8 +102,8 @@ class Dataset:
     """The :class:`Dataset` object is the heart of Tablib. It provides all core
     functionality.
 
-    Usually you create a :class:`Dataset` instance in your main module, and append
-    rows as you collect data. ::
+    Usually you create a :class:`Dataset` instance in your main module, and
+    append rows as you collect data. ::
 
         data = tablib.Dataset()
         data.headers = ('name', 'age')
@@ -132,16 +132,16 @@ class Dataset:
 
         data = tablib.Dataset(*data, headers=headers)
 
-    :param \\*args: (optional) list of rows to populate Dataset
-    :param headers: (optional) list strings for Dataset header row
-    :param title: (optional) string to use as title of the Dataset
+    :param optional `*args`: list of rows to populate Dataset
+    :param optional headers: list strings for Dataset header row
+    :param optional title: string to use as title of the Dataset
 
 
     .. admonition:: Format Attributes Definition
 
     If you look at the code, the various output/import formats are not
-    defined within the :class:`Dataset` object. To add support for a new format, see
-    :ref:`Adding New Formats <newformats>`.
+    defined within the :class:`Dataset` object. To add support for a new format,
+    see :ref:`Adding New Formats <newformats>`.
 
     """
 
@@ -255,15 +255,12 @@ def _validate(self, row=None, col=None, safety=False):
             return False
 
     def _package(self, dicts=True, ordered=True):
-        """Packages Dataset into lists of dictionaries for transmission."""
+        """Packages :class:`Dataset` into lists of dictionaries for transmission."""
         # TODO: Dicts default to false?
 
         _data = list(self._data)
 
-        if ordered:
-            dict_pack = OrderedDict
-        else:
-            dict_pack = dict
+        dict_pack = OrderedDict if ordered else dict
 
         # Execute formatters
         if self._formatters:
@@ -280,7 +277,8 @@ def _package(self, dicts=True, ordered=True):
 
         if self.headers:
             if dicts:
-                data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data]
+                data = [dict_pack(list(zip(self.headers, data_row)))
+                        for data_row in _data]
             else:
                 data = [list(self.headers)] + list(_data)
         else:
@@ -289,10 +287,11 @@ def _package(self, dicts=True, ordered=True):
         return data
 
     def _get_headers(self):
-        """An *optional* list of strings to be used for header rows and attribute names.
-
-        This must be set manually. The given list length must equal :attr:`Dataset.width`.
+        """An *optional* list of strings to be used for header rows and
+        attribute names.
 
+        This must be set manually. The given list length must equal
+        :attr:`Dataset.width`.
         """
         return self.__headers
 
@@ -314,7 +313,7 @@ def _get_dict(self):
         been set, a list of Python dictionaries will be returned. If no headers have been set,
         a list of tuples (rows) will be returned instead.
 
-        A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
+        A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute: ::
 
             data = tablib.Dataset()
             data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
@@ -323,7 +322,7 @@ def _get_dict(self):
         return self._package()
 
     def _set_dict(self, pickle):
-        """A native Python representation of the Dataset object. If headers have been
+        """A native Python representation of the :class:`Dataset` object. If headers have been
         set, a list of Python dictionaries will be returned. If no headers have been
         set, a list of tuples (rows) will be returned instead.
 
@@ -333,7 +332,6 @@ def _set_dict(self, pickle):
             data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
 
         """
-
         if not len(pickle):
             return
 
@@ -365,8 +363,8 @@ def _clean_col(self, col):
             header = []
 
         if len(col) == 1 and hasattr(col[0], '__call__'):
-
             col = list(map(col[0], self._data))
+
         col = tuple(header + col)
 
         return col
@@ -383,7 +381,6 @@ def width(self):
         """The number of columns currently in the :class:`Dataset`.
            Cannot be directly modified.
         """
-
         try:
             return len(self._data[0])
         except IndexError:
@@ -393,38 +390,43 @@ def width(self):
                 return 0
 
     def load(self, in_stream, format=None, **kwargs):
-        """
-        Import `in_stream` to the :class:`Dataset` object using the `format`.
-        `in_stream` can be a file-like object, a string, or a bytestring.
+        """Import ``in_stream`` to the :class:`Dataset` object using ``format``. ::
 
-        :param \\*\\*kwargs: (optional) custom configuration to the format `import_set`.
-        """
+            with open('data.json', 'r') as fh:
+                imported_data = Dataset().load(fh)
 
+        :param in_stream: can be a file-like object, a string, or a bytestring.
+        :param format: an available :ref:`format <formats>`. If not set,
+            `Tablib` will try to detect it.
+        :type format: str, optional
+        :param optional `**kwargs`: custom configuration to the format
+            :meth:`import_set`.
+        """
         stream = normalize_input(in_stream)
         if not format:
             format = detect_format(stream)
 
         fmt = registry.get_format(format)
-        if not hasattr(fmt, 'import_set'):
-            raise UnsupportedFormat(f'Format {format} cannot be imported.')
-
-        if not import_set:
+        try:
+            fmt.import_set(self, stream, **kwargs)
+        except AttributeError:
             raise UnsupportedFormat(f'Format {format} cannot be imported.')
 
-        fmt.import_set(self, stream, **kwargs)
         return self
 
     def export(self, format, **kwargs):
         """
-        Export :class:`Dataset` object to `format`.
+        Export :class:`Dataset` object to ``format``.
 
-        :param \\*\\*kwargs: (optional) custom configuration to the format `export_set`.
+        :param optional `**kwargs`: custom configuration to the format
+            :meth:`export_set`.
         """
         fmt = registry.get_format(format)
-        if not hasattr(fmt, 'export_set'):
-            raise UnsupportedFormat(f'Format {format} cannot be exported.')
 
-        return fmt.export_set(self, **kwargs)
+        try:
+            return fmt.export_set(self, **kwargs)
+        except AttributeError:
+            raise UnsupportedFormat(f'Format {format} cannot be exported.')
 
     # ----
     # Rows
@@ -435,37 +437,41 @@ def insert(self, index, row, tags=list()):
 
         Rows inserted must be the correct size (height or width).
 
-        The default behaviour is to insert the given row to the :class:`Dataset`
-        object at the given index.
-       """
+        The default behaviour is to insert the given row to the
+        :class:`Dataset` object at the given index.
 
+        .. versionadded:: 0.9.0
+           If inserting a row, you can add :ref:`tags <tags>` to the row you
+           are inserting. This gives you the ability to :meth:`~Dataset.filter`
+           your :class:`Dataset` later.
+        """
         self._validate(row)
         self._data.insert(index, Row(row, tags=tags))
 
     def rpush(self, row, tags=list()):
-        """Adds a row to the end of the :class:`Dataset`.
-        See :method:`Dataset.insert` for additional documentation.
+        """Adds a row to the end of the :class:`Dataset`. See
+        :meth:`~Dataset.insert` for additional documentation.
         """
 
         self.insert(self.height, row=row, tags=tags)
 
     def lpush(self, row, tags=list()):
-        """Adds a row to the top of the :class:`Dataset`.
-        See :method:`Dataset.insert` for additional documentation.
+        """Adds a row to the top of the :class:`Dataset`. See
+        :meth:`~Dataset.insert` for additional documentation.
         """
 
         self.insert(0, row=row, tags=tags)
 
     def append(self, row, tags=list()):
-        """Adds a row to the :class:`Dataset`.
-        See :method:`Dataset.insert` for additional documentation.
+        """Adds a row to the :class:`Dataset`. See :meth:`~Dataset.insert` for
+        additional documentation.
         """
 
         self.rpush(row, tags)
 
     def extend(self, rows, tags=list()):
         """Adds a list of rows to the :class:`Dataset` using
-        :method:`Dataset.append`
+        :meth:`~Dataset.append`
         """
 
         for row in rows:
@@ -518,13 +524,7 @@ def insert_col(self, index, col=None, header=None):
            header attribute must be set, and will be considered the header for
            that row.
 
-        .. versionadded:: 0.9.0
-           If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
-           This gives you the ability to :method:`filter <Dataset.filter>` your
-           :class:`Dataset` later.
-
         """
-
         if col is None:
             col = []
 
@@ -556,21 +556,21 @@ def insert_col(self, index, col=None, header=None):
             self._data = [Row([row]) for row in col]
 
     def rpush_col(self, col, header=None):
-        """Adds a column to the end of the :class:`Dataset`.
-        See :method:`Dataset.insert` for additional documentation.
+        """Adds a column to the end of the :class:`Dataset`. See
+        :meth:`~Dataset.insert_col` for additional documentation.
         """
 
         self.insert_col(self.width, col, header=header)
 
     def lpush_col(self, col, header=None):
-        """Adds a column to the top of the :class:`Dataset`.
-        See :method:`Dataset.insert` for additional documentation.
+        """Adds a column to the top of the :class:`Dataset`. See
+        :meth:`~Dataset.insert_col` for additional documentation.
         """
 
         self.insert_col(0, col, header=header)
 
     def insert_separator(self, index, text='-'):
-        """Adds a separator to :class:`Dataset` at given index."""
+        """Adds a :ref:`separator <separators>` to :class:`Dataset` at given index."""
 
         sep = (index, text)
         self._separators.append(sep)
@@ -587,8 +587,8 @@ def append_separator(self, text='-'):
         self.insert_separator(index, text)
 
     def append_col(self, col, header=None):
-        """Adds a column to the :class:`Dataset`.
-        See :method:`Dataset.insert_col` for additional documentation.
+        """Adds a column to the :class:`Dataset`. See
+        :meth:`~Dataset.insert_col` for additional documentation.
         """
 
         self.rpush_col(col, header)
@@ -637,10 +637,9 @@ def filter(self, tag):
     def sort(self, col, reverse=False):
         """Sort a :class:`Dataset` by a specific column, given string (for
         header) or integer (for column index). The order can be reversed by
-        setting ``reverse`` to ``True``.
+        setting ``reverse`` to :data:`True`.
 
-        Returns a new :class:`Dataset` instance where columns have been
-        sorted.
+        Returns a new :class:`Dataset` instance where columns have been sorted.
         """
 
         if isinstance(col, str):
@@ -673,8 +672,9 @@ def sort(self, col, reverse=False):
 
     def transpose(self):
         """Transpose a :class:`Dataset`, turning rows into columns and vice
-        versa, returning a new ``Dataset`` instance. The first row of the
-        original instance becomes the new header row."""
+        versa, returning a new :class:`Dataset` instance. The first row of the
+        original instance becomes the new header row.
+        """
 
         # Don't transpose if there is no data
         if not self:
@@ -700,15 +700,15 @@ def transpose(self):
         return _dset
 
     def stack(self, other):
-        """Stack two :class:`Dataset` instances together by
-        joining at the row level, and return new combined
-        ``Dataset`` instance."""
+        """Stack two :class:`Dataset` instances together by joining at the row
+        level, and return new combined :class:`Dataset` instance.
+        """
 
         if not isinstance(other, Dataset):
             return
 
         if self.width != other.width:
-            raise InvalidDimensions
+            raise InvalidDimensions('Datasets have different widths.')
 
         # Copy the source data
         _dset = copy(self)
@@ -722,20 +722,21 @@ def stack(self, other):
         return _dset
 
     def stack_cols(self, other):
-        """Stack two :class:`Dataset` instances together by
-        joining at the column level, and return a new
-        combined ``Dataset`` instance. If either ``Dataset``
-        has headers set, than the other must as well."""
+        """Stack two :class:`Dataset` instances together by joining at the
+        column level, and return a new combined :class:`Dataset` instance. If
+        either :class:`Dataset` has headers set, then the other must as well.
+        """
 
         if not isinstance(other, Dataset):
             return
 
         if self.headers or other.headers:
             if not self.headers or not other.headers:
-                raise HeadersNeeded
+                raise HeadersNeeded('If either dataset has headers, '
+                                    'the other must as well.')
 
         if self.height != other.height:
-            raise InvalidDimensions
+            raise InvalidDimensions('Datasets have different heights.')
 
         try:
             new_headers = self.headers + other.headers
@@ -755,10 +756,11 @@ def stack_cols(self, other):
         return _dset
 
     def remove_duplicates(self):
-        """Removes all duplicate rows from the :class:`Dataset` object
-        while maintaining the original order."""
+        """Removes all duplicate rows from the :class:`Dataset` object while
+        maintaining the original order."""
         seen = set()
-        self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))]
+        self._data[:] = [row for row in self._data
+                         if not (tuple(row) in seen or seen.add(tuple(row)))]
 
     def wipe(self):
         """Removes all content and headers from the :class:`Dataset` object."""
@@ -766,10 +768,9 @@ def wipe(self):
         self.__headers = None
 
     def subset(self, rows=None, cols=None):
-        """Returns a new instance of the :class:`Dataset`,
-        including only specified rows and columns.
+        """Returns a new instance of the :class:`Dataset`, including only
+        specified rows and columns.
         """
-
         # Don't return if no data
         if not self:
             return
@@ -834,83 +835,89 @@ def add_sheet(self, dataset):
 
     def _package(self, ordered=True):
         """Packages :class:`Databook` for delivery."""
-        collector = []
 
-        if ordered:
-            dict_pack = OrderedDict
-        else:
-            dict_pack = dict
+        dict_pack = OrderedDict if ordered else dict
 
-        for dset in self._datasets:
-            collector.append(dict_pack(
-                title=dset.title,
-                data=dset._package(ordered=ordered)
-            ))
-        return collector
+        return [dict_pack(title=dset.title,
+                          data=dset._package(ordered=ordered))
+                for dset in self._datasets]
 
     @property
     def size(self):
-        """The number of the :class:`Dataset` objects within :class:`Databook`."""
+        """The number of the :class:`Dataset` objects within :class:`Databook`.
+        """
         return len(self._datasets)
 
-    def load(self, in_stream, format, **kwargs):
+    def load(self, in_stream, format=None, **kwargs):
         """
-        Import `in_stream` to the :class:`Databook` object using the `format`.
-        `in_stream` can be a file-like object, a string, or a bytestring.
+        Import ``in_stream`` to the :class:`Databook` object using ``format``. ::
 
-        :param \\*\\*kwargs: (optional) custom configuration to the format `import_book`.
-        """
+            with open('data.xlsx', 'rb') as fh:
+                imported_data = Databook().load(fh)
 
+        :param in_stream: can be a file-like object, a string, or a bytestring.
+        :param format: an available :ref:`format <formats>`. If not set,
+            `Tablib` will try to detect it.
+        :type format: str, optional
+        :param optional `**kwargs`: custom configuration to the format
+            :meth:`import_book`.
+        """
         stream = normalize_input(in_stream)
         if not format:
             format = detect_format(stream)
 
         fmt = registry.get_format(format)
-        if not hasattr(fmt, 'import_book'):
+
+        try:
+            fmt.import_book(self, stream, **kwargs)
+        except AttributeError:
             raise UnsupportedFormat(f'Format {format} cannot be loaded.')
 
-        fmt.import_book(self, stream, **kwargs)
         return self
 
     def export(self, format, **kwargs):
         """
         Export :class:`Databook` object to `format`.
 
-        :param \\*\\*kwargs: (optional) custom configuration to the format `export_book`.
+        :param str format: an available :ref:`format <formats>`.
+        :param optional `**kwargs`: custom configuration to the format
+            :meth:`export_book`.
         """
         fmt = registry.get_format(format)
-        if not hasattr(fmt, 'export_book'):
+        try:
+            return fmt.export_book(self, **kwargs)
+        except AttributeError:
             raise UnsupportedFormat(f'Format {format} cannot be exported.')
 
-        return fmt.export_book(self, **kwargs)
-
 
 def detect_format(stream):
     """Return format name of given stream (file-like object, string, or bytestring)."""
     stream = normalize_input(stream)
-    fmt_title = None
+
     for fmt in registry.formats():
         try:
             if fmt.detect(stream):
-                fmt_title = fmt.title
-                break
+                return fmt.title
         except AttributeError:
             pass
         finally:
             if hasattr(stream, 'seek'):
                 stream.seek(0)
-    return fmt_title
 
+    raise UnsupportedFormat('Format cannot be found.')
 
-def import_set(stream, format=None, **kwargs):
-    """Return dataset of given stream (file-like object, string, or bytestring)."""
 
+def import_set(stream, format=None, **kwargs):
+    """Return :class:`Dataset` of given stream (file-like object, string, or
+    bytestring).
+    """
     return Dataset().load(normalize_input(stream), format, **kwargs)
 
 
 def import_book(stream, format=None, **kwargs):
-    """Return dataset of given stream (file-like object, string, or bytestring)."""
-
+    """Return :class:`Databook` of given stream (file-like object, string, or
+    bytestring).
+    """
     return Databook().load(normalize_input(stream), format, **kwargs)
 
 
diff --git a/tests/test_tablib.py b/tests/test_tablib.py
index 96a73862..6666a756 100755
--- a/tests/test_tablib.py
+++ b/tests/test_tablib.py
@@ -348,7 +348,8 @@ def test_auto_format_detect(self):
         _bunk = StringIO(
             '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
         )
-        self.assertEqual(tablib.detect_format(_bunk), None)
+        with self.assertRaises(UnsupportedFormat):
+            tablib.detect_format(_bunk)
 
     def test_transpose(self):
         """Transpose a dataset."""
@@ -406,8 +407,8 @@ def test_column_stacking(self):
         self.assertEqual(column_stacked[0],
                          ("John", "Adams", 90, "John", "Adams", 90))
 
-    def test_sorting(self):
-        """Sort columns."""
+    def test_sorting_by_header(self):
+        """Sort columns by header (str)."""
 
         sorted_data = self.founders.sort(col="first_name")
         self.assertEqual(sorted_data.title, 'Founders')
@@ -423,6 +424,38 @@ def test_sorting(self):
         self.assertEqual(second_row, expected_second)
         self.assertEqual(third_row, expected_third)
 
+    def test_sorting_by_index(self):
+        """Sort columns by index (int)."""
+
+        sorted_data = self.founders.sort(col=0)
+        self.assertEqual(sorted_data.title, 'Founders')
+
+        first_row = sorted_data[0]
+        second_row = sorted_data[2]
+        third_row = sorted_data[1]
+        expected_first = self.founders[1]
+        expected_second = self.founders[2]
+        expected_third = self.founders[0]
+
+        self.assertEqual(first_row, expected_first)
+        self.assertEqual(second_row, expected_second)
+        self.assertEqual(third_row, expected_third)
+
+    def test_extend(self):
+        dset = tablib.Dataset()
+        dset.extend([self.john, self.george, self.tom])
+
+        self.assertEqual(list(dset), list(self.founders))
+
+    def test_filter(self):
+        dset = tablib.Dataset()
+        dset.append(self.john, tags=['Massachusetts'])
+        dset.append(self.george, tags=['Virginia'])
+        dset.append(self.tom, tags=['Virginia'])
+
+        filtered = dset.filter('Virginia')
+        self.assertEqual(list(filtered), [self.george, self.tom])
+
     def test_remove_duplicates(self):
         """Unique Rows."""
 
@@ -476,6 +509,13 @@ def test_subset(self):
         self.assertEqual(subset._data[0].list, ['John', 90])
         self.assertEqual(subset._data[1].list, ['Thomas', 50])
 
+        # Verify default subset equals self
+        subset = data.subset()
+        self.assertEqual(subset.headers, list(self.headers))
+        self.assertEqual(subset._data[0].tuple, self.john)
+        self.assertEqual(subset._data[1].tuple, self.george)
+        self.assertEqual(subset._data[2].tuple, self.tom)
+
     def test_formatters(self):
         """Confirm formatters are being triggered."""