pylhc · fsoubelet · Jan 30, 2023 · Jan 26, 2023 · Jan 26, 2023 · Jan 26, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # TFS-Pandas Changelog
 
+## Version 3.3.0
+
+- Added:
+    - The option is now given to the user to skip DataFrame validation after reading from file / before writing to file. Validation is left "on" by default, but can be turned off with a boolean argument.
+
+- Changes:
+    - The documentation has been expanded and improved, with notably the addition of example code snippets.
+
 ## Version 3.2.1
 
 - Changed:

diff --git a/doc/conf.py b/doc/conf.py
@@ -11,14 +11,9 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
-import os
 import pathlib
 import sys
 
-# ignore numpy warnings, see:
-# https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility
-import warnings
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -55,16 +50,36 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    "sphinx.ext.autodoc",
-    "sphinx.ext.doctest",
-    "sphinx.ext.todo",
-    "sphinx.ext.coverage",
-    "sphinx.ext.mathjax",
-    "sphinx.ext.viewcode",
-    "sphinx.ext.githubpages",
-    "sphinx.ext.napoleon",
+    "sphinx.ext.autodoc",  # Include documentation from docstrings
+    "sphinx.ext.coverage",  # Collect doc coverage stats
+    "sphinx.ext.doctest",  # Test snippets in the documentation
+    "sphinx.ext.githubpages",  # Publish HTML docs in GitHub Pages
+    "sphinx.ext.intersphinx",  # Link to other projects’ documentation
+    "sphinx.ext.mathjax",  # Render math via JavaScript
+    "sphinx.ext.napoleon",  # Support for NumPy and Google style docstrings
+    "sphinx.ext.todo",  # Support for todo items
+    "sphinx.ext.viewcode",  # Add links to highlighted source code
+    "sphinx_copybutton",  # Add a "copy" button to code blocks
+    "sphinx-prompt",  # prompt symbols will not be copy-pastable
+    "sphinx_codeautolink",  # Automatically link example code to documentation source
 ]
 
+# Config for autosectionlabel extension
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 2
+
+# Config for the napoleon extension
+napoleon_numpy_docstring = False
+napoleon_include_init_with_doc = True
+napoleon_use_admonition_for_examples = True
+napoleon_use_admonition_for_notes = True
+napoleon_use_admonition_for_references = True
+napoleon_preprocess_types = True
+napoleon_attr_annotations = True
+
+# Configuration for sphinx.ext.todo
+todo_include_todos = True
+
 # Add any paths that contain templates here, relative to this directory.
 # templates_path = ['_templates']
 
@@ -101,7 +116,7 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -111,8 +126,9 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = "sphinx"
 
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = True
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+default_role = "obj"
 
 # -- Options for HTML output ----------------------------------------------
 
@@ -215,3 +231,17 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
         "Miscellaneous",
     ),
 ]
+
+# -- Instersphinx Configuration ----------------------------------------------
+
+# Example configuration for intersphinx: refer to the Python standard library.
+# use in refs e.g:
+# :ref:`comparison manual <python:comparisons>`
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3/", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+    "matplotlib": ("https://matplotlib.org/stable/", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/", None),
+    "cpymad": ("https://hibtc.github.io/cpymad/", None),
+}
diff --git a/doc/modules/index.rst b/doc/modules/index.rst
@@ -1,5 +1,5 @@
 TFS-Pandas Modules
-**************************
+==================
 
 .. automodule:: tfs.collection
     :members:
@@ -31,4 +31,3 @@ TFS-Pandas Modules
 
 .. automodule:: tfs.writer
     :members:
-
diff --git a/setup.py b/setup.py
@@ -37,7 +37,7 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
 EXTRA_DEPENDENCIES = {
     "test": ["pytest>=5.2", "pytest-cov>=2.9", "cpymad>=1.8.1"],
     "hdf5": ["h5py>=2.9.0", "tables>=3.6.0"],
-    "doc": ["sphinx", "sphinx_rtd_theme"],
+    "doc": ["sphinx", "sphinx_rtd_theme", "sphinx_copybutton", "sphinx-prompt", "sphinx_codeautolink"],
 }
 EXTRA_DEPENDENCIES.update({"all": [elem for list_ in EXTRA_DEPENDENCIES.values() for elem in list_]})
 EXTRA_DEPENDENCIES["test"] += EXTRA_DEPENDENCIES["hdf5"]
@@ -66,6 +66,7 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Topic :: Scientific/Engineering",
         "Topic :: Software Development :: Libraries :: Python Modules",
         "Typing :: Typed",

diff --git a/tests/inputs/space_in_colname.tfs b/tests/inputs/space_in_colname.tfs
@@ -0,0 +1,19 @@
+@ TITLE %s "Title of your tfs file"
+@ DPP %le 1.0
+@ Q1 %le 0.269974877952
+@ Q1RMS %le 1.75642567736e-07
+@ NATQ1 %le 0.280041400187
+@ NATQ1RMS %le 0.00102479265802
+@ BPMCOUNT %d 9
+# Some comment line here
+*            NAME                 S            NUMBER                CO             CORMS           "BPM RES"
+$              %s               %le                %d               %le               %le                 %le
+     BPMYB.5L2.B1            28.288                 1   -0.280727353099  0.00404721900879      0.121264541395
+     BPMYB.4L2.B1            48.858                 2    0.601472827003  0.00301396244054      0.129738519811
+     BPMWI.4L2.B1           73.3255                 3   -0.610294990396   0.0039123010318     0.0952864848273
+     BPMSX.4L2.B1          123.4825        3472136972    0.778206651453  0.00542543379504     0.0578581425476
+    "BPMS.2L2.B1"           161.394          59055944    0.585105573645  0.00291016910226        0.1223625619
+   "BPMSW.1L2.B1"           171.328          09202215     2.50235465023  0.00275350035218      0.148603785488
+     BPMSW.1R2.B1           214.518              3117     1.81036167087  0.00282138482457      0.164954082556
+      BPMS.2R2.B1           224.452       18943819309   0.0791371365672  0.00474290041487      0.122265653712
+     BPMSX.4R2.B1          262.3635               105 -0.00665768479832  0.00350302654669      0.187320306406
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -6,8 +6,8 @@
 
 import tfs
 from tfs import read_tfs, write_tfs
-from tfs.errors import TfsFormatError
 from tfs.constants import HEADER
+from tfs.errors import TfsFormatError
 
 CURRENT_DIR = pathlib.Path(__file__).parent
 
@@ -35,6 +35,26 @@ def test_tfs_read_str_input(self, _tfs_file_str: str):
         assert len(str(test_file)) > 0
         assert isinstance(test_file.index[0], str)
 
+    def test_tfs_read_no_validation(self, _tfs_file_pathlib: pathlib.Path):
+        test_file = read_tfs(_tfs_file_pathlib, index="NAME", validate=False)
+        assert len(test_file.headers) > 0
+        assert len(test_file.columns) > 0
+        assert len(test_file.index) > 0
+        assert len(str(test_file)) > 0
+        assert isinstance(test_file.index[0], str)
+
+    def test_tfs_read_wrong_file_no_validation(self, _space_in_colnames_tfs_path: pathlib.Path):
+        # Read file has a space in a column name which should raise, we make sure that it 
+        # goes through when explicitely skipping validation
+        df = read_tfs(_space_in_colnames_tfs_path, index="NAME", validate=False)
+        assert "BPM RES" in df.columns
+
+
+    def test_tfs_read_no_validation_doesnt_warn(self, caplog):
+        nan_tfs_path = pathlib.Path(__file__).parent / "inputs" / "has_nans.tfs"
+        _ = read_tfs(nan_tfs_path, index="NAME", validate=False)
+        assert "contains non-physical values at Index:" not in caplog.text
+
     def tfs_indx_pathlib_input(self, _tfs_file_pathlib: pathlib.Path):
         test_file = read_tfs(_tfs_file_pathlib)
         assert test_file.indx["BPMYB.5L2.B1"] == test_file.set_index("NAME")["BPMYB.5L2.B1"]
@@ -104,10 +124,16 @@ def test_id_to_type_handles_typo_str_id(self):
         with pytest.raises(TfsFormatError):
             _ = tfs.reader._id_to_type(typoed_str_id)
 
+    def test_fail_space_in_colname(self, _space_in_colnames_tfs_path: pathlib.Path):
+        # Read file has a space in a column name which should raise
+        with pytest.raises(TfsFormatError):
+            read_tfs(_space_in_colnames_tfs_path, index="NAME", validate=True)
+
+
 class TestWarnings:
     def test_warn_unphysical_values(self, caplog):
         nan_tfs_path = pathlib.Path(__file__).parent / "inputs" / "has_nans.tfs"
-        _ = read_tfs(nan_tfs_path, index="NAME")
+        _ = read_tfs(nan_tfs_path, index="NAME", validate=True)
         for record in caplog.records:
             assert record.levelname == "WARNING"
         assert "contains non-physical values at Index:" in caplog.text
@@ -136,6 +162,11 @@ def _no_colnames_tfs_path() -> pathlib.Path:
     return pathlib.Path(__file__).parent / "inputs" / "no_colnames.tfs"
 
 
+@pytest.fixture()
+def _space_in_colnames_tfs_path() -> pathlib.Path:
+    return pathlib.Path(__file__).parent / "inputs" / "space_in_colname.tfs"
+
+
 @pytest.fixture()
 def _tfs_file_wise() -> pathlib.Path:
     return CURRENT_DIR / "inputs" / "wise_header.tfs"
diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -9,7 +9,8 @@
 import pytest
 from cpymad.madx import Madx
 from pandas._testing import assert_dict_equal
-from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
+from pandas.testing import (assert_frame_equal, assert_index_equal,
+                            assert_series_equal)
 
 import tfs
 from tfs import TfsDataFrame, read_tfs, write_tfs
@@ -100,6 +101,15 @@ def test_tfs_write_read(self, _tfs_dataframe, tmp_path):
         assert_frame_equal(_tfs_dataframe, new, check_exact=False)  # float precision can be an issue
         assert_dict_equal(_tfs_dataframe.headers, new.headers, compare_keys=True)
 
+    def test_tfs_write_read_no_validate(self, _tfs_dataframe, tmp_path):
+        write_location = tmp_path / "test.tfs"
+        write_tfs(write_location, _tfs_dataframe, validate=False)
+        assert write_location.is_file()
+
+        new = read_tfs(write_location, validate=False)
+        assert_frame_equal(_tfs_dataframe, new, check_exact=False)  # float precision can be an issue
+        assert_dict_equal(_tfs_dataframe.headers, new.headers, compare_keys=True)
+
     def test_tfs_write_read_no_headers(self, _dataframe_empty_headers: TfsDataFrame, tmp_path):
         write_location = tmp_path / "test.tfs"
         write_tfs(write_location, _dataframe_empty_headers)
@@ -140,6 +150,16 @@ def test_tfs_write_read_autoindex(self, _tfs_dataframe, tmp_path):
         assert_index_equal(df.index, df_read.index, check_exact=False)
         assert_dict_equal(_tfs_dataframe.headers, df_read.headers, compare_keys=True)
 
+    def test_no_warning_on_non_unique_columns_if_no_validate(self, tmp_path, caplog):
+        df = TfsDataFrame(columns=["A", "B", "A"])
+        write_tfs(tmp_path / "temporary.tfs", df, validate=False)
+        assert "Non-unique column names found" not in caplog.text
+
+    def test_no_validation_non_unique_columns(self, tmp_path, caplog):
+        # Making sure this goes through if we skip validation
+        df = TfsDataFrame(columns=["A", "B", "A"])
+        write_tfs(tmp_path / "temporary.tfs", df, validate=False)
+        assert (tmp_path / "temporary.tfs").is_file()
 
 class TestFailures:
     def test_raising_on_non_unique_columns(self, caplog):
@@ -231,20 +251,6 @@ def test_header_line_raises_on_non_strings(self):
 
 
 class TestWarnings:
-    @pytest.mark.skipif(
-        sys.version_info >= (3, 7),
-        reason="Our workers on 3.7+ install pandas >= 1.3.0  which has fixed the .convert_dtypes() bug "
-        "we try...except in _autoset_pandas_types and test here",
-    )
-    def test_empty_df_warns_on_types_inference(self, caplog):
-        empty_df = pandas.DataFrame()
-        converted_df = tfs.writer._autoset_pandas_types(empty_df)
-        assert_frame_equal(converted_df, empty_df)
-
-        for record in caplog.records:
-            assert record.levelname == "WARNING"
-        assert "An empty dataframe was provided, no types were inferred" in caplog.text
-
     def test_warning_on_non_unique_columns(self, tmp_path, caplog):
         df = TfsDataFrame(columns=["A", "B", "A"])
         write_tfs(tmp_path / "temporary.tfs", df)

diff --git a/tfs/__init__.py b/tfs/__init__.py
@@ -3,14 +3,14 @@
 """
 from tfs.errors import TfsFormatError
 from tfs.frame import TfsDataFrame, concat
+from tfs.hdf import read_hdf, write_hdf
 from tfs.reader import read_tfs
 from tfs.writer import write_tfs
-from tfs.hdf import read_hdf, write_hdf
 
 __title__ = "tfs-pandas"
 __description__ = "Read and write tfs files."
 __url__ = "https://github.com/pylhc/tfs"
-__version__ = "3.2.1"
+__version__ = "3.3.0"
 __author__ = "pylhc"
 __author_email__ = "[email protected]"
 __license__ = "MIT"

diff --git a/tfs/collection.py b/tfs/collection.py
@@ -1,6 +1,6 @@
 """
 Collection
-----------------------
+----------
 
 Advanced **TFS** files reading and writing functionality.
 """
@@ -48,47 +48,51 @@ class TfsCollection(metaclass=_MetaTfsCollection):
 
     Classes inheriting from this abstract class will be able to define **TFS** files
     as readable or writable, and read or write them just as attribute access or
-    assignments. All attributes will be read and written as ``TfsDataFrame`` objects.
+    assignments. All attributes will be read and written as `~tfs.TfsDataFrame` objects.
 
     Example:
         If **./example** is a directory that contains two **TFS** files **beta_phase_x.tfs**
         and **beta_phase_y.tfs** with `BETX` and `BETY` columns respectively:
 
-    .. sourcecode:: python
+    .. code-block:: python
 
-         class ExampleCollection(TfsCollection)
-            # All TFS attributes must be marked with the Tfs(...) class, and generated attribute
-            # names will be appended with _x / _y depending on files found in "./example"
+        >>> # All TFS attributes must be marked with the Tfs(...) class,
+        ... # and generated attribute names will be appended with _x / _y 
+        ... # depending on files found in "./example"
+        ... class ExampleCollection(TfsCollection): 
+        ...     beta = Tfs("beta_phase_{}.tfs")  # A TFS attribute
+        ...     other_value = 7  # A traditional attribute.
 
-            beta = Tfs("beta_phase_{}.tfs")  # A TFS attribute
-            other_value = 7  # A traditional attribute.
+        ...     def get_filename(template: str, plane: str) -> str:
+        ...         return template.format(plane)
 
-            def get_filename(template: str, plane: str) -> str:
-               return template.format(plane)
+        >>> example = ExampleCollection("./example")
 
-         example = ExampleCollection("./example")
+        >>> # Get the BETX / BETY column from "beta_phase_x.tfs":
+        >>> beta_x_column = example.beta_x.BETX  # / example.beta_x.BETY
 
-         # Get the BETX / BETY column from "beta_phase_x.tfs":
-         beta_x_column = example.beta_x.BETX  # / example.beta_x.BETY
+        >>> # Get the BETY column from "beta_phase_y.tfs":
+        >>> beta_y_column = example.beta_y.BETY
 
-         # Get the BETY column from "beta_phase_y.tfs":
-         beta_y_column = example.beta_y.BETY
+        >>> # The planes can also be accessed as items (both examples below work):
+        >>> beta_y_column = example.beta["y"].BETY
+        >>> beta_y_column = example.beta["Y"].BETY
 
-         # The planes can also be accessed as items (both examples below work):
-         beta_y_column = example.beta["y"].BETY
-         beta_y_column = example.beta["Y"].BETY
+        >>> # This will write an empty DataFrame to "beta_phase_y.tfs":
+        >>> example.allow_write = True
+        >>> example.beta["y"] = DataFrame()
 
-         # This will write an empty DataFrame to "beta_phase_y.tfs":
-         example.allow_write = True
-         example.beta["y"] = DataFrame()
 
+    If the file to be loaded is not defined for two planes then the attribute can be declared 
+    and accessed as:
+
+    .. code-block:: python
 
-    If the file to be loaded is not defined for two planes then the attribute can be declared as:
-    ``coupling = Tfs("getcouple.tfs", two_planes=False)`` and then accessed as
-    ``f1001w_column = example.coupling.F1001W``.
+        >>> coupling = Tfs("getcouple.tfs", two_planes=False)  # declaration
+        >>> f1001w_column = example.coupling.F1001W  # access
 
     No file will be loaded until the corresponding attribute is accessed and the loaded
-    ``TfsDataFrame`` will be buffered, thus the user should expect an ``IOError`` if the requested
+    `~tfs.TfsDataFrame` will be buffered, thus the user should expect an ``IOError`` if the requested
     file is not in the provided directory (only the first time, but is better to always take it
     into account!).