#17 Version 1.0.1 Release

davidkowalk · Dec 6, 2024 · b9acc41 · b9acc41
2 parents 1c70611 + 40120d0
commit b9acc41
Show file tree

Hide file tree

Showing 12 changed files with 304 additions and 21 deletions.
diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+docs/stats.csv
+tests/plot.png
+pip_token
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 ![GitHub commit activity](https://img.shields.io/github/commit-activity/m/davidkowalk/fitting_toolkit)
 ![GitHub License](https://img.shields.io/github/license/davidkowalk/fitting_toolkit)
 ![University](https://img.shields.io/badge/Univeristy_of_Bonn-brown)
-![Version](https://img.shields.io/badge/version-open%20beta-red)
+![Version](https://img.shields.io/badge/version-1.0.1-green)
 ![GitHub Repo stars](https://img.shields.io/github/stars/davidkowalk/fitting_toolkit?style=flat&label=github%20stars)
 
 
@@ -19,6 +19,28 @@ Check out the `docs` folder for documentation and tutorials.
 
 ## Quick Introduction
 
+### Installation
+
+There are multiple ways to install this package. The easiest is via pip:
+```
+pip install fitting-toolkit
+```
+If you need a specific version (for example due to compatibillity issues) you can specify the version via `fitting-toolkit==version`, e.g:
+```
+pip install fitting-toolkit==1.0.1
+```
+
+**Alternative Methods**
+
+You can also find the `fitting_toolkit.py` in the `src` folder and copy it into your project.
+
+To build the project yourself and install it, make sure `setuptools` and `wheel` are installed, then run
+```
+python3 setup.py sdist bdist_wheel
+pip install .\dist\fitting_toolkit-1.0.1-py3-none-any.whl --force-reinstall   
+pip show fitting-toolkit -v
+```
+
 ### Requirements
 This project requires the following modules along with their dependencies:
 - numpy
@@ -27,14 +49,15 @@ This project requires the following modules along with their dependencies:
 
 It is highly recommended that the user familiarizes themselves with the functionality of these modules first. A rudimentary understanding of `numpy` and `matplotlib.pyplot` is required.
 
+If you install via pip the dependencies will automatically be installed. However if the project files are used directly you may want to install dependencies manually:
+
 To install the dependencies, first a [virtual environment](https://docs.python.org/3/library/venv.html) should be created. `requirements.txt` lists all necessary packages. Run:
 ```
 pip install -r requirements.txt
 ```
 
 ### Getting Started
 
-To get started find the `fitting_toolkit.py` in the `src` folder and copy it into your project.
 You can now import the relevant functions into your code:
 ```python
 from fitting_toolkit import curve_fit, plot_fit 

diff --git a/docs/functions.md b/docs/functions.md
@@ -7,7 +7,7 @@ By separating the fitting functionality from the display options, a user can uti
 
 To fit a dataset, call:
 ```python
-curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples = 5000, confidence_resolution: int = None, nsigma:float = 1, **kwargs)
+curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples = 5000, model_resolution: int = None, model_axis = None, nsigma:float = 1, **kwargs)
 ```
 
 | Parameters | | |
@@ -18,6 +18,8 @@ curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples = 50
 | ydata    | np.array | The dependent data, a length M array - nominally f(xdata, ...)
 | yerror   | np.array | (optional) Determines the uncertainty in ydata. Pass absolute values.
 | resamples| int      | (optional) Number of samples to be generated in parameter space for bootstrapping.
+|model_resolution | int, optional | If specified the confidence interval will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+| model_axis | numpy.ndarray, optional | If specified this axis is used instead of axis generated via model_resolution.
 | **kwargs | any      | (optional) Parameters to be passed on to `scipy.optimize.curve_fit`
 
 | Returns | | |
@@ -53,7 +55,7 @@ plt.show()
 
 The fitting toolkit ships with built-in functions for displaying data with their fitted functions and their respective confidence intervals.
 ```python
-plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror = None, confidence_resolution: int = None, markersize = 4, capsize = 4, fit_color = "black", fit_label = "Least Squares Fit", confidence_label = "1$\\sigma$-Confidence", fig = None, ax = None, **kwargs)
+plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror = None, model_resolution: int = None, markersize = 4, capsize = 4, fit_color = "black", fit_label = "Least Squares Fit", confidence_label = "1$\\sigma$-Confidence", fig = None, ax = None, **kwargs)
 ```
 
 | Parameters | | |
@@ -68,7 +70,8 @@ plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror = None
 | **Optional Arguments** |
 |xerror    | numpy.ndarray, optional | The uncertainties in the x-values of the data points. Default is None.
 |yerror    | numpy.ndarray, optional | The uncertainties in the y-values of the data points. Default is None.
-|confidence_resolution | int, optional | If specified the confidence interval will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+|model_resolution | int, optional | If specified the confidence interval will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+| model_axis | numpy.ndarray, optional | If specified this axis is used instead of axis generated via model_resolution.
 | **Display Options** |
 |fit_color | color, optional | color of the fitted function.
 |markersize| int, optional | The size of the markers for the data points. Default is 4.
@@ -80,7 +83,7 @@ plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror = None
 |**kwargs  || Additional arguments passed to `pyplot.subplots()`
 
 
-If the upper and lower bounds were generated with a custom resolution, the same resolution must be provided in the `confidence_resolution` parameter.
+If the upper and lower bounds were generated with a custom resolution, the same resolution must be provided in the `model_resolution` parameter.
 
 You may also pass keyword arguments to `matplotlib.pyplot.subplots()` via `**kwargs`. 
 For comprehensive documentation please consult [`subplots()`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.subplots.html), [`figure()`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.figure.html#matplotlib.pyplot.figure) and [`add_plot()`](https://matplotlib.org/stable/api/_as_gen/matplotlib.figure.Figure.add_subplot.html#matplotlib.figure.Figure.add_subplot).

diff --git a/docs/manual.md b/docs/manual.md
@@ -8,7 +8,11 @@ These instructions provide a basic introduction on how to use the toolkit and la
 
 This section covers the use of the toolkit's functions as intended, which will cover most use cases.
 
-To get started find the `fitting_toolkit.py` in the `src` folder and copy it into your project.
+To get started install the package via
+```
+pip install fitting-toolkit
+```
+or copy `fitting_toolkit.py` into your working directory.
 You can now import the relevant functions into your code:
 ```python
 from fitting_toolkit import curve_fit, plot_fit 
@@ -84,13 +88,13 @@ For each point on the x-axis, the fitted function is calculated using all the re
 
 This method is referred to as ["parametric bootstrapping"](https://en.wikipedia.org/wiki/Bootstrapping_(statistics)) [[2]](https://doi.org/10.1214/ss/1063994971). The resampling method assumes normally distributed parameters, when errors on the fitted parameters turn out to be asymmetric the numerical approximation may prove out to be inaccurate.
 
-By default the confidence interval is estimated at each x-position of the data, however this may cause issues of resolution when data points are sparse or non-uniformly distributed along the x-axis, or it may be computationally expensive for large datasets. When a `confidence_resolution: int` parameter is set in `curve_fit`, that number of points is generated between the highest and lowest point on the x-axis and used as the x-axis instead.
+By default the confidence interval is estimated at each x-position of the data, however this may cause issues of resolution when data points are sparse or non-uniformly distributed along the x-axis, or it may be computationally expensive for large datasets. When a `model_resolution: int` parameter is set in `curve_fit`, that number of points is generated between the highest and lowest point on the x-axis and used as the x-axis instead.
 
 ```python
-resampled_x_axis = np.linspace(min(xdata), max(xdata), confidence_resolution) 
+resampled_x_axis = np.linspace(min(xdata), max(xdata), model_resolution) 
 ```
 
-Note that the `confidence_resolution` must be provided to both `curve_fit` and `plot_fit`  
+Note that the `model_resolution` must be provided to both `curve_fit` and `plot_fit`  
 
 ### Specifying the Number of Standard Deviations
 
@@ -133,7 +137,7 @@ For function documentation consult `./functions.md`.
 | lower    | np.ndarray | The lower bounds of the confidence intervals for the model predictions. 
 | upper    | np.ndarray | The upper bounds of the confidence intervals for the model predictions.
 
-If the upper and lower bounds were generated with a custom resolution, the same resolution must be provided in the `confidence_resolution` parameter.
+If the upper and lower bounds were generated with a custom resolution, the same resolution must be provided in the `model_resolution` parameter.
 
 For customization the function also provides the keywords `markersize`, `capsize`, `fit_label`, and `confidence_label`
 

diff --git a/examples/experiment_dc_current.py b/examples/experiment_dc_current.py
@@ -0,0 +1,37 @@
+"""
+This is an example model for an experiment from the second undergraduate lab course. (232)
+Here we demonstrate how to plot two separate fits in the same graph
+"""
+
+import numpy as np
+from fitting_toolkit import curve_fit, plot_fit
+
+def lin(x, a, b):
+    return a*x + b
+
+# R = infty
+x_inf = 1-np.array([20, 30, 40, 50, 60, 70, 80])/100
+U_inf = np.array([3.2, 2.8, 2.4, 2.0, 1.6, 1.2, 0.8])
+dU = 0.1
+
+params_inf, cov_inf, lower_inf, upper_inf = curve_fit(lin, x_inf, U_inf, dU, confidence_resolution = 50, nsigma = 2, absolute_sigma = True)
+
+# R = 50 Ohm
+x_20 = x_inf
+U_20 = np.array([2.7, 2.4, 2.0, 1.7, 1.5, 1.1, 0.7])
+params_20, cov_20, lower_20, upper_20 = curve_fit(lin, x_20, U_20, dU, confidence_resolution = 50, nsigma = 2, absolute_sigma = True)
+#params_21, cov_21, lower_21, upper_21 = curve_fit(lin, x_20, U_20, dU, confidence_resolution = 50, nsigma = 2, absolute_sigma = True)
+#params_22, cov_22, lower_22, upper_22 = curve_fit(lin, x_20, U_20, dU, confidence_resolution = 50, nsigma = 3, absolute_sigma = True)
+
+
+#show both fits
+fig, ax = plt.subplots()
+ax.grid("both")
+fig, ax = plot_fit(1-np.array([20, 30, 40, 50, 60, 70, 80, 90])/100, U_inf, lin, params_inf, lower_inf, upper_inf, yerror=dU, confidence_resolution = 50, fit_label="$R = \\infty$", confidence_label="2-$\\sigma$", fig = fig)
+fig, ax = plot_fit(x_20, U_20, lin, params_20, lower_20, upper_20, yerror=dU, confidence_resolution = 50, fit_color = "crimson", fit_label="$R = 50\\Omega$", confidence_label="2-$\\sigma$", fig = fig, ax = ax)
+#fig, ax = plot_fit(x_20, U_20, lin, params_21, lower_21, upper_21, yerror=dU, confidence_resolution = 50, fit_color = "crimson", fit_label="$R = 50\\Omega$", confidence_label=None, fig = fig, ax = ax)
+#fig, ax = plot_fit(x_20, U_20, lin, params_22, lower_22, upper_22, yerror=dU, confidence_resolution = 50, fit_color = "crimson", fit_label="$R = 50\\Omega$", confidence_label=None, fig = fig, ax = ax)
+ax.legend()
+ax.set_xlabel("x / l")
+ax.set_ylabel("U / V")
+plt.show()
diff --git a/src/experiment_phys_pendulum.py → examples/experiment_phys_pendulum.py b/src/experiment_phys_pendulum.py → examples/experiment_phys_pendulum.py
diff --git a/setup.py b/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup
+#run with python3 setup.py sdist bdist_wheel
+
+with open("./README.md") as f:
+    description = f.read()
+
+with open("./requirements.txt", encoding="utf-16") as f:
+    requirements = f.readlines()
+
+setup(
+    name = "fitting_toolkit",
+    version = "1.0.1",
+    package_dir={"": "src"},
+    packages=[""],
+    long_description=description,
+    long_description_content_type="text/markdown",
+    install_requires = requirements,
+    project_urls = {
+        "Documentation": "https://github.com/davidkowalk/fitting_toolkit/blob/development/docs/manual.md",
+        "Source": "https://github.com/davidkowalk/fitting_toolkit/",
+        "Tracker": "https://github.com/davidkowalk/fitting_toolkit/issues"
+    },
+    license="MIT",
+    description="Easy and Flexible Curve Fitting",
+    url="https://github.com/davidkowalk/fitting_toolkit/"
+)
diff --git a/src/__init__.py b/src/__init__.py
@@ -0,0 +1,6 @@
+from .fitting_toolkit import *
+#generate_thresholds, get_sigma_probability, confidence_interval, curve_fit, plot_fit 
+
+__package_name__ = "fitting_toolkit"
+__author__ = "David J. Kowalk"
+__version__ = "1.0.0"
diff --git a/src/fitting_toolkit.py b/src/fitting_toolkit.py
@@ -74,7 +74,7 @@ def confidence_interval(model, xdata: np.array, params: np.array, cov: np.array,
 
     return np.array(lower_conf), np.array(upper_conf)
 
-def curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples = 5000, confidence_resolution: int = None, nsigma:float = 1, **kwargs) -> tuple[np.array, np.array, np.array, np.array]:
+def curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples = 5000, model_resolution: int = None, model_axis = None, nsigma:float = 1, **kwargs) -> tuple[np.array, np.array, np.array, np.array]:
     """
     Fits a model to data and calculates confidence intervals for the fitted parameters and predictions.
 
@@ -88,7 +88,8 @@ def curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples
         ydata (numpy.ndarray): The observed data corresponding to `xdata`.
         yerror (numpy.ndarray, optional): The uncertainties in the observed data `ydata`. Default is None.
         resamples (int, optional): The number of resampling iterations for bootstrapping confidence intervals. Default is 5000.
-        confidence_resolution (int, optional): If specified the confidence interval will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+        model_resolution (int, optional): If specified the confidence interval and model will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+        model_axis (np.ndarray, optional): If specified this axis is used instead of axis generated via model_resolution
         nsigma (float): Number of standard deviation passed to confidence_interval()
         **kwargs: Additional arguments passed to SciPy's `curve_fit` function.
 
@@ -99,19 +100,25 @@ def curve_fit(model, xdata: np.array, ydata: np.array, yerror = None, resamples
             - lower_conf (numpy.ndarray): The lower bounds of the confidence intervals for each data point.
             - upper_conf (numpy.ndarray): The upper bounds of the confidence intervals for each data point.
     """
+
+    if not(np.shape(xdata) == np.shape(ydata)):
+        raise ValueError(f"x-data and y-data have different lengths and thus cannot be broadcast together.\nx: {np.shape(xdata)}, y: {np.shape(ydata)}")
+
     params, cov = sc_curve_fit(f = model, xdata = xdata, ydata = ydata, sigma = yerror, **kwargs)
-    if confidence_resolution is None:
+    if not model_axis is None:
+         resampled_points = model_axis
+    elif model_resolution is None:
         resampled_points = xdata
-    elif confidence_resolution > 0:
-        resampled_points = np.linspace(min(xdata), max(xdata), confidence_resolution) 
+    elif model_resolution > 0:
+        resampled_points = np.linspace(min(xdata), max(xdata), model_resolution) 
     else:
         raise ValueError("Unable to specify confidence points")
 
     lower_conf, upper_conf = confidence_interval(model, resampled_points, params, cov, resamples, nsigma)
 
     return params, cov, lower_conf, upper_conf
 
-def plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror = None, confidence_resolution: int = None, markersize = 4, capsize = 4, fit_color = "black", fit_label = "Least Squares Fit", confidence_label = "1$\\sigma$-Confidence", fig = None, ax = None, **kwargs) -> tuple[plt.figure, plt.axes]:
+def plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror = None, model_resolution: int = None, model_axis = None, markersize = 4, capsize = 4, fit_color = "black", fit_label = "Least Squares Fit", confidence_label = "1$\\sigma$-Confidence", fig = None, ax = None, **kwargs) -> tuple[plt.figure, plt.axes]:
     """
     Plots the model fit to the data along with its confidence intervals.
 
@@ -128,7 +135,8 @@ def plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror =
         upper (numpy.ndarray): The upper bounds of the confidence intervals for the model predictions.
         xerror (numpy.ndarray, optional): The uncertainties in the x-values of the data points. Default is None.
         yerror (numpy.ndarray, optional): The uncertainties in the y-values of the data points. Default is None.
-        confidence_resolution (int, optional): If specified the confidence interval will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+        model_resolution (int, optional): If specified the confidence interval and fitted model will be calculated at linearly spaced points along x-axis. Otherwise xdata is used.
+        model_axis (np.ndarray, optional): If specified this axis is used instead of axis generated via model_resolution
         fit_color (color, optional): color of the fitted function.
         markersize (int, optional): The size of the markers for the data points. Default is 4.
         capsize (int, optional): The size of the caps on the error bars. Default is 4.
@@ -150,13 +158,24 @@ def plot_fit(xdata, ydata, model, params, lower, upper, xerror = None, yerror =
         - A grid is added to the plot for improved readability.
     """
 
-    if confidence_resolution is None:
+    if not(np.shape(xdata) == np.shape(ydata)):
+        raise ValueError(f"x-data and y-data have different lengths and thus cannot be broadcast together.\nx: {np.shape(xdata)}, y: {np.shape(ydata)}")
+
+
+    if not model_axis is None:
+         resampled_points = model_axis
+    elif model_resolution is None:
         resampled_points = xdata
-    elif confidence_resolution > 0:
-        resampled_points = np.linspace(min(xdata), max(xdata), confidence_resolution) 
+    elif model_resolution > 0:
+        resampled_points = np.linspace(min(xdata), max(xdata), model_resolution) 
     else:
         raise ValueError("Unable to specify confidence points")
 
+    if not(np.shape(resampled_points) == np.shape(lower)):
+        raise ValueError(f"x-axis does not match length of lower confidence interval\nx: {np.shape(resampled_points)}, y: {np.shape(lower)}")
+    if not(np.shape(resampled_points) == np.shape(upper)):
+        raise ValueError(f"x-axis does not match length of upper confidence interval\nx: {np.shape(resampled_points)}, y: {np.shape(upper)}")
+
     if fig is None and ax is None:
         fig, ax = plt.subplots(**kwargs)
 

diff --git a/tests/__init__.py b/tests/__init__.py