From 2c6a2a4929c9242a741e953d5743d25fda249aa9 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 5 Aug 2024 10:32:12 +0100 Subject: [PATCH] docs --- .github/workflows/docs.yml | 50 ++++++++++++++++++++++++++++++++++++++ docs/getting/tutorial.rst | 2 -- docs/index.rst | 4 +-- docs/user/common.rst | 3 ++- docs/user/initializing.rst | 34 ++++++++++++++++++-------- docs/user/reading.rst | 5 +++- pint_pandas/pint_array.py | 4 +-- requirements_docs.txt | 4 +-- 8 files changed, 85 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..8ebea5e --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,50 @@ +name: Documentation Build + +on: [push, pull_request] + +jobs: + docbuild: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 100 + + - name: Get tags + run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* + + - name: Set up minimal Python version + uses: actions/setup-python@v2 + with: + python-version: "3.10" + + - name: Get pip cache dir + id: pip-cache + run: echo "::set-output name=dir::$(pip cache dir)" + + - name: Setup pip cache + uses: actions/cache@v2 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: pip-docs + restore-keys: pip-docs + + - name: Install locales + run: | + sudo apt-get install language-pack-fr + sudo localedef -i fr_FR -f UTF-8 fr_FR + + - name: Install dependencies + run: | + sudo apt install -y pandoc + pip install --upgrade pip setuptools wheel + pip install -r "requirements_docs.txt" + pip install docutils==0.14 commonmark==0.8.1 recommonmark==0.5.0 babel==2.8 + pip install . + + - name: Build documentation + run: sphinx-build -n -j auto -b html -d build/doctrees docs build/html + + - name: Doc Tests + run: sphinx-build -a -j auto -b doctest -d build/doctrees docs build/doctest diff --git a/docs/getting/tutorial.rst b/docs/getting/tutorial.rst index 46762f6..f8f1b48 100644 --- a/docs/getting/tutorial.rst +++ b/docs/getting/tutorial.rst @@ -1,5 +1,3 @@ -.. _tutorial: - ************************** Tutorial ************************** diff --git a/docs/index.rst b/docs/index.rst index 56a0d54..e5aa1ee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ :orphan: Pint-pandas: Unit support for pandas -====================== +===================================== **Useful links**: `Code Repository `__ | @@ -66,9 +66,7 @@ Pint-pandas: Unit support for pandas Getting started User Guide - Advanced topics ecosystem - API Reference .. toctree:: :maxdepth: 1 diff --git a/docs/user/common.rst b/docs/user/common.rst index 0256db0..f7445a4 100644 --- a/docs/user/common.rst +++ b/docs/user/common.rst @@ -58,8 +58,9 @@ Creating DataFrames from Series The default operation of Pandas `pd.concat` function is to perform row-wise concatenation. When given a list of Series, each of which is backed by a PintArray, this will inefficiently convert all the PintArrays to arrays of `object` type, concatenate the several series into a DataFrame with that many rows, and then leave it up to you to convert that DataFrame back into column-wise PintArrays. A much more efficient approach is to concatenate Series in a column-wise fashion: .. ipython:: python - :suppress: :okwarning: + + list_of_series = [pd.Series([1.0, 2.0], dtype="pint[m]") for i in range(0, 10)] df = pd.concat(list_of_series, axis=1) diff --git a/docs/user/initializing.rst b/docs/user/initializing.rst index f50fb3a..4704a05 100644 --- a/docs/user/initializing.rst +++ b/docs/user/initializing.rst @@ -4,7 +4,7 @@ Initializing data ************************** -There are several ways to initialize PintArrays in a DataFrame. Here's the most common methods. We'll use `PA_` and `Q_` as shorthand for PintArray and Quantity. +There are several ways to initialize a `PintArray`s` in a `DataFrame`. Here's the most common methods. We'll use `PA_` and `Q_` as shorthand for `PintArray` and `Quantity`. @@ -13,7 +13,6 @@ There are several ways to initialize PintArrays in a DataFrame. Here's the most import pandas as pd import pint import pint_pandas - import io PA_ = pint_pandas.PintArray ureg = pint_pandas.PintType.ureg @@ -21,18 +20,33 @@ There are several ways to initialize PintArrays in a DataFrame. Here's the most df = pd.DataFrame( { - "A": pd.Series([1.0, 2.0], dtype="pint[m]"), - "B": pd.Series([1.0, 2.0]).astype("pint[m]"), - "C": PA_([2.0, 3.0], dtype="pint[m]"), - "D": PA_([2.0, 3.0], dtype="m"), - "E": PA_([2.0, 3.0], dtype=ureg.m), - "F": PA_.from_1darray_quantity(Q_([2, 3], ureg.m)), - "G": PA_(Q_([2.0, 3.0], ureg.m)), + "Ser1": pd.Series([1, 2], dtype="pint[m]"), + "Ser2": pd.Series([1, 2]).astype("pint[m]"), + "Ser3": pd.Series([1, 2], dtype="pint[m][Int64]"), + "Ser4": pd.Series([1, 2]).astype("pint[m][Int64]"), + "PArr1": PA_([1, 2], dtype="pint[m]"), + "PArr2": PA_([1, 2], dtype="pint[m][Int64]"), + "PArr3": PA_([1, 2], dtype="m"), + "PArr4": PA_([1, 2], dtype=ureg.m), + "PArr5": PA_(Q_([1, 2], ureg.m)), + "PArr6": PA_([1, 2],"m"), } ) df +In the first two Series examples above, the data was converted to Float64. + +.. ipython:: python + + df.dtypes + + +To avoid this conversion, specify the subdtype (dtype of the magnitudes) in the dtype `"pint[m][Int64]"` when constructing using a `Series`. The default data dtype that pint-pandas converts to can be changed by modifying `pint_pandas.DEFAULT_SUBDTYPE`. + +`PintArray` infers the subdtype from the data passed into it when there is no subdtype specified in the dtype. It also accepts a pint `Unit`` or unit string as the dtype. + + .. note:: - "pint[unit]" must be used for the Series or DataFrame constuctor. + `"pint[unit]"` or `"pint[unit][subdtype]"` must be used for the Series or DataFrame constuctor. diff --git a/docs/user/reading.rst b/docs/user/reading.rst index 93a57f5..70e2a22 100644 --- a/docs/user/reading.rst +++ b/docs/user/reading.rst @@ -40,7 +40,10 @@ Let's read that into a DataFrame. Here io.StringIO is used in place of reading a df = pd.read_csv(io.StringIO(test_data), header=[0, 1], index_col=[0, 1]).T # df = pd.read_csv("/path/to/test_data.csv", header=[0, 1]) for col in df.columns: - df[col] = pd.to_numeric(df[col], errors="ignore") + try: + df[col] = pd.to_numeric(df[col]) + except: + pass df.dtypes diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 6568e14..b4ea833 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -306,7 +306,7 @@ class PintArray(ExtensionArray, ExtensionScalarOpsMixin): def __init__(self, values, dtype=None, copy=False): # infer subdtype from values if not given in dtype - if isinstance(dtype, str) and dtype.count("[") <= 1: + if (isinstance(dtype, str) and dtype.count("[") <= 1) or isinstance(dtype, _Unit): _dtype = PintType(dtype) if isinstance(values, _Quantity): values = values.m_as(_dtype.units) @@ -316,7 +316,7 @@ def __init__(self, values, dtype=None, copy=False): if dtype is None: if isinstance(values, _Quantity): units = values.units - values = pd.array(values, copy=copy) + values = pd.array(values.magnitude, copy=copy) dtype = PintType(units=units, subdtype=values.dtype) elif isinstance(values, PintArray): dtype = values._dtype diff --git a/requirements_docs.txt b/requirements_docs.txt index 8f44109..40528e2 100644 --- a/requirements_docs.txt +++ b/requirements_docs.txt @@ -1,4 +1,4 @@ -sphinx>4 +sphinx>=5 ipython<=8.12 matplotlib mip>=1.13 @@ -16,7 +16,7 @@ dask[complete] setuptools>=41.2 Serialize pygments>=2.4 -sphinx-book-theme==0.3.3 +sphinx-book-theme>=0.3.3 sphinx_copybutton sphinx_design typing_extensions