diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6981f278..5521cefc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,16 @@ name: Build webpage -on: [push, pull_request] +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +on: + pull_request: + push: + tags: + - "*" + branches: + - master jobs: build-pages: @@ -8,13 +18,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" channels: conda-forge,defaults - python-version: 3.8 + python-version: 3.11 environment-file: environment.yml activate-environment: analysis-essentials diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97170582..9a66852d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files args: ['--maxkb=1000'] @@ -17,11 +17,11 @@ repos: rev: 1.7.0 hooks: - id: nbqa-isort - additional_dependencies: [ isort==5.6.4 ] + additional_dependencies: [ isort ] - id: nbqa-pyupgrade - additional_dependencies: [ pyupgrade==2.7.4 ] - args: [ --py37-plus ] + additional_dependencies: [ pyupgrade ] + args: [ --py38-plus ] # # - repo: https://github.com/ambv/black # rev: 21.9b0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1efea0aa..95a9a355 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ In exchange, we will address your issues and/or assess your change proposal as promptly as we can, and help you become a member of our community. Everyone involved in [HSF training][hsf-training] -agrees to abide by our [code of conduct](CODE_OF_CONDUCT.md). +agrees to abide by our [code of conduct](CONDUCT.md). ## How to Contribute @@ -49,12 +49,12 @@ There are many ways to contribute, from writing new exercises and improving existing ones to updating or filling in the documentation and submitting [bug reports][issues] -about things that do not work, aren not clear, or are missing. +about things that do not work, are not clear, or are missing. If you are looking for ideas, please see the 'Issues' tab for a list of issues associated with this repository, or you may also look at all issues in [hsf-training][hsf-training-issues] -There is also [a list](hsf-training-gfis) of all issues that are particularly easy and suitable +There is also [a list][hsf-training-gfis] of all issues that are particularly easy and suitable for first contributions. Comments on issues and reviews of pull requests are just as welcome: diff --git a/LICENSE.md b/LICENSE.md index 47801218..79f18f88 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,3 +1,4 @@ + # Instructional Material All instructional material is made available under the [Creative Commons diff --git a/README.md b/README.md index 4e649e7e..38ca0a78 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Analysis essentials [![Build Status](https://api.travis-ci.org/hsf-training/analysis-essentials.svg?branch=master)](https://travis-ci.org/hsf-training/analysis-essentials) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/hsf-training/analysis-essentials/master) +# Analysis essentials [![Build Status]([https://github.com/hsf-training/analysis-essentials/actions/workflows/build.yml/badge.svg](https://github.com/hsf-training/analysis-essentials/actions/workflows/build.yml/badge.svg))](https://github.com/hsf-training/analysis-essentials/actions/workflows/build.yml/badge.svg) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/hsf-training/analysis-essentials/master) This is the source material for the [analysis essentials website][website], a @@ -19,29 +19,36 @@ There are two options for running these lessons. Running locally should be prefe ### Local -This tutorial uses `Python 3.7` and requires some packages. -It is recommended to use [Conda](https://docs.conda.io/en/latest/) to install the correct packages. +This tutorial uses `Python 3.11` and requires some packages. +It is recommended to use [mambaforge](https://github.com/conda-forge/miniforge#mambaforge) to install the correct packages. +**Note:** `mamba` is like `conda` and can be used interchangeably. "forge" in the name refers to the [conda-forge](https://conda-forge.org/) channel, _the_ open-source maintained channel which contains a lot of packages. -To install `Conda` you will need to do the following: +To install `Conda`/`mamba` you will need to do the following: - - Install `Conda` according to the instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html#installing-in-silent-mode) - - You can add `source /my/path/for/miniconda/etc/profile.d/conda.sh` to your `.bashrc` - - Add the channel: + - Install `mamba` according to the instructions [here](https://github.com/conda-forge/miniforge#install) + - To add `mamba`/`conda` to your shell, follow the instructions after the installation and execute ```bash -conda config --add channels conda-forge +mamba init +``` + - In order no not use the base environment (which you almost never should), do +```bash +conda config --set auto_activate_base false ``` -Now to use your first ```Conda``` environment: - - Create an environment with some packages already installed: + +Now to use your first ```Conda/Mamba``` environment: + + - This will install the above packages. In order to make sure that you install all of the packages needed in the tutorial, you can use the `environment.yml` file (make sure that the file `environment.yml` is in the current directory): ```bash -conda create -n my-analysis-env python=3.7 jupyterlab ipython matplotlib uproot numpy pandas scikit-learn scipy tensorflow xgboost hep_ml wget +mamba env create -f environment.yml ``` - - Activate your environment by doing: `conda activate my-analysis-env` - - You can install additional packages by doing: `conda install package_name` - - For the lessons to work fully you will also need to install a special helper package with pip: + - Alternatively, you could create an environment with some packages already in this way ```bash -pip install git+https://github.com/hsf-training/python-lesson.git +mamba create -n analysis-essentials python=3.11 jupyterlab ipython matplotlib uproot numpy pandas scikit-learn scipy tensorflow xgboost hep_ml wget ``` + - Activate your environment by doing: `mamba activate analysis-essentials` + - You can install additional packages by doing: `mamba install package_name` + You will also need [Jupyter](https://jupyterlab.readthedocs.io/) to run the examples in this tutorial. Jupyter was already installed in the previous command and can be ran by following the instructions [here](https://jupyterlab.readthedocs.io/en/stable/getting_started/starting.html). @@ -86,4 +93,6 @@ If you have any problems or questions, you can [open an issue][issues] on this r snakemake/README.md git/README.md CONTRIBUTING.md + CONDUCT.md + LICENSE.md ``` diff --git a/advanced-python/10Basics.ipynb b/advanced-python/10Basics.ipynb index 9301d4bd..87d8d778 100644 --- a/advanced-python/10Basics.ipynb +++ b/advanced-python/10Basics.ipynb @@ -52,7 +52,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.162938280Z", + "start_time": "2023-11-09T18:21:54.591214759Z" + } + }, "outputs": [], "source": [ "a = ['a', 'b', 'c']\n", @@ -66,7 +71,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.164206324Z", + "start_time": "2023-11-09T18:21:54.591634409Z" + } + }, "outputs": [], "source": [ "a = {'a': '0', 'b': '1', 'c': '2'}\n", @@ -80,7 +90,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.165379338Z", + "start_time": "2023-11-09T18:21:54.592201956Z" + } + }, "outputs": [], "source": [ "a = 'foo'\n", @@ -102,7 +117,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.174086165Z", + "start_time": "2023-11-09T18:21:54.592801793Z" + } + }, "outputs": [], "source": [ "N = 10\n", @@ -123,7 +143,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.174961405Z", + "start_time": "2023-11-09T18:21:54.639862013Z" + } + }, "outputs": [], "source": [ "squares = {i: i**2 for i in range(10)}\n", @@ -133,7 +158,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.177083872Z", + "start_time": "2023-11-09T18:21:54.640208754Z" + } + }, "outputs": [], "source": [ "N = 5\n", @@ -202,7 +232,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.178900927Z", + "start_time": "2023-11-09T18:21:54.640684978Z" + } + }, "outputs": [], "source": [ "pt_cut = 1789.234567890987654\n", @@ -232,7 +267,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.179761160Z", + "start_time": "2023-11-09T18:21:54.683193413Z" + } + }, "outputs": [], "source": [ "\"Hello starterkitters\"" @@ -241,7 +281,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.180074403Z", + "start_time": "2023-11-09T18:21:54.683507163Z" + } + }, "outputs": [], "source": [ "None" @@ -257,7 +302,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:55.708097724Z", + "start_time": "2023-11-09T18:21:54.683700759Z" + } + }, "outputs": [], "source": [ "!ls" @@ -266,7 +316,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:57.188240198Z", + "start_time": "2023-11-09T18:21:55.712286379Z" + } + }, "outputs": [], "source": [ "!wget https://example.com/index.html" @@ -282,7 +337,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:57.209109687Z", + "start_time": "2023-11-09T18:21:57.194328587Z" + } + }, "outputs": [], "source": [ "%time sum([i**2 for i in range(10000)])" @@ -298,7 +358,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:57.268821159Z", + "start_time": "2023-11-09T18:21:57.208320012Z" + } + }, "outputs": [], "source": [ "%%time\n", @@ -337,7 +402,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:57.269768775Z", + "start_time": "2023-11-09T18:21:57.263045210Z" + } + }, "outputs": [], "source": [ "def my_print(my_string):\n", @@ -347,7 +417,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.320620623Z", + "start_time": "2023-11-09T18:21:57.263292639Z" + } + }, "outputs": [], "source": [ "my_print?" @@ -363,7 +438,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.324026004Z", + "start_time": "2023-11-09T18:21:57.541042260Z" + } + }, "outputs": [], "source": [ "my_print??" @@ -372,7 +452,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.357805971Z", + "start_time": "2023-11-09T18:21:57.552034854Z" + } + }, "outputs": [], "source": [ "range?" @@ -388,7 +473,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.396066090Z", + "start_time": "2023-11-09T18:21:57.647040419Z" + } + }, "outputs": [], "source": [ "{'a': 'b'}.get?" @@ -397,7 +487,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.398964659Z", + "start_time": "2023-11-09T18:21:57.695017046Z" + } + }, "outputs": [], "source": [ "{'a': 'b'}.get" @@ -406,7 +501,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.401053224Z", + "start_time": "2023-11-09T18:21:57.695273312Z" + } + }, "outputs": [], "source": [ "junk = {'a': 'b'}.get\n", @@ -432,7 +532,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.404008533Z", + "start_time": "2023-11-09T18:21:57.695446136Z" + } + }, "outputs": [], "source": [ "max(10, 15)" @@ -444,7 +549,11 @@ "metadata": { "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "end_time": "2023-11-09T18:21:59.040701073Z", + "start_time": "2023-11-09T18:21:57.695622001Z" + } }, "outputs": [], "source": [ @@ -463,7 +572,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.564441476Z" + } + }, "outputs": [], "source": [ "import numpy as np\n", @@ -497,7 +610,10 @@ "metadata": { "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.606952947Z" + } }, "outputs": [], "source": [ diff --git a/advanced-python/11AdvancedPython.ipynb b/advanced-python/11AdvancedPython.ipynb index 5b00cdea..ea1344cb 100644 --- a/advanced-python/11AdvancedPython.ipynb +++ b/advanced-python/11AdvancedPython.ipynb @@ -17,7 +17,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.287777986Z", + "start_time": "2023-11-09T18:21:57.475130307Z" + } + }, "outputs": [], "source": [ "import time" @@ -37,7 +42,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.288125493Z", + "start_time": "2023-11-09T18:21:57.475439183Z" + } + }, "outputs": [], "source": [ "a, c, *b = [3, 4, 4.5, 5, 6]" @@ -46,7 +56,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.288659478Z", + "start_time": "2023-11-09T18:21:57.475730863Z" + } + }, "outputs": [], "source": [ "b" @@ -62,7 +77,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.289024121Z", + "start_time": "2023-11-09T18:21:57.476020895Z" + } + }, "outputs": [], "source": [ "d1, d2, *d3, d4 = [1, 2, 3] # nothing left for d3" @@ -71,7 +91,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.289770343Z", + "start_time": "2023-11-09T18:21:57.476279441Z" + } + }, "outputs": [], "source": [ "d3" @@ -87,7 +112,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.289972173Z", + "start_time": "2023-11-09T18:21:57.476626713Z" + } + }, "outputs": [], "source": [ "a = [3, 4, 5]" @@ -103,7 +133,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.290148105Z", + "start_time": "2023-11-09T18:21:57.519751245Z" + } + }, "outputs": [], "source": [ "d, e, f, g, h, i = *a, *b" @@ -119,7 +154,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.290328424Z", + "start_time": "2023-11-09T18:21:57.519943991Z" + } + }, "outputs": [], "source": [ "def func(*args, **kwargs):\n", @@ -130,7 +170,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.291657968Z", + "start_time": "2023-11-09T18:21:57.520119691Z" + } + }, "outputs": [], "source": [ "mykwargs = {'a': 5, 'b': 3}\n", @@ -141,7 +186,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.297387283Z", + "start_time": "2023-11-09T18:21:57.520366676Z" + } + }, "outputs": [], "source": [ "func(5, a=4)" @@ -150,7 +200,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.297741024Z", + "start_time": "2023-11-09T18:21:57.520883499Z" + } + }, "outputs": [], "source": [ "# play around with it!" @@ -193,7 +248,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.324668018Z", + "start_time": "2023-11-09T18:21:57.563095740Z" + } + }, "outputs": [], "source": [ "import contextlib\n", @@ -209,7 +269,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.326374361Z", + "start_time": "2023-11-09T18:21:57.563347017Z" + } + }, "outputs": [], "source": [ "with printer(5) as number:\n", @@ -229,7 +294,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.354444565Z", + "start_time": "2023-11-09T18:21:57.563714577Z" + } + }, "outputs": [], "source": [ "with open('tmp.txt', 'w') as textfile:\n", @@ -246,7 +316,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.355126940Z", + "start_time": "2023-11-09T18:21:57.563857095Z" + } + }, "outputs": [], "source": [ "import contextlib\n", @@ -269,7 +344,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.358277200Z", + "start_time": "2023-11-09T18:21:57.569931680Z" + } + }, "outputs": [], "source": [ "testdict = {'value': 11, 'name': 'the answer'}" @@ -291,7 +371,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.359563889Z", + "start_time": "2023-11-09T18:21:57.611213702Z" + } + }, "outputs": [], "source": [ "# SOLUTION\n", @@ -299,6 +384,7 @@ "def func(x):\n", " yield x\n", "\n", + "\n", "with func(5) as var1:\n", " print('inside')\n", "print(var1)" @@ -307,7 +393,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.359882492Z", + "start_time": "2023-11-09T18:21:57.611718752Z" + } + }, "outputs": [], "source": [ "@contextlib.contextmanager\n", @@ -330,7 +421,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.394698482Z", + "start_time": "2023-11-09T18:21:57.611948597Z" + } + }, "outputs": [], "source": [ "class MyContext:\n", @@ -351,7 +447,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.397073702Z", + "start_time": "2023-11-09T18:21:57.612144179Z" + } + }, "outputs": [], "source": [ "with MyContext(5) as x:\n", @@ -377,7 +478,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.397374445Z", + "start_time": "2023-11-09T18:21:57.659281779Z" + } + }, "outputs": [], "source": [ "def make_power_func(power):\n", @@ -389,7 +495,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.397643363Z", + "start_time": "2023-11-09T18:21:57.659526538Z" + } + }, "outputs": [], "source": [ "pow3 = make_power_func(3)" @@ -398,7 +509,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.398133567Z", + "start_time": "2023-11-09T18:21:57.659712907Z" + } + }, "outputs": [], "source": [ "pow3(2)" @@ -407,7 +523,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.398357818Z", + "start_time": "2023-11-09T18:21:57.659911070Z" + } + }, "outputs": [], "source": [ "def make_power_func(power):\n", @@ -420,7 +541,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.404364977Z", + "start_time": "2023-11-09T18:21:57.660055943Z" + } + }, "outputs": [], "source": [ "pow3 = make_power_func(3)" @@ -429,7 +555,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.406575801Z", + "start_time": "2023-11-09T18:21:57.707103066Z" + } + }, "outputs": [], "source": [ "pow3(2)" @@ -438,7 +569,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.419570582Z", + "start_time": "2023-11-09T18:21:57.707383203Z" + } + }, "outputs": [], "source": [ "# Exercise: test it here" @@ -474,7 +610,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.421942708Z", + "start_time": "2023-11-09T18:21:57.714163427Z" + } + }, "outputs": [], "source": [ "# SOLUTION\n", @@ -492,7 +633,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.422417608Z", + "start_time": "2023-11-09T18:21:57.755158988Z" + } + }, "outputs": [], "source": [ "def add_notime(x, y):\n", @@ -502,7 +648,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.422731709Z", + "start_time": "2023-11-09T18:21:57.755389720Z" + } + }, "outputs": [], "source": [ "add_timed = timed_func(add_notime)" @@ -511,7 +662,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.422948361Z", + "start_time": "2023-11-09T18:21:57.755554232Z" + } + }, "outputs": [], "source": [ "import time" @@ -520,7 +676,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.425073110Z", + "start_time": "2023-11-09T18:21:57.755705542Z" + } + }, "outputs": [], "source": [ "add_timed(y=4, x=5)" @@ -529,7 +690,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.425479931Z", + "start_time": "2023-11-09T18:21:57.756004166Z" + } + }, "outputs": [], "source": [ "# test it here" @@ -547,7 +713,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:21:58.425804868Z", + "start_time": "2023-11-09T18:21:57.756157832Z" + } + }, "outputs": [], "source": [ "@timed_func\n", @@ -586,7 +757,11 @@ "metadata": { "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "end_time": "2023-11-09T18:21:59.048528979Z", + "start_time": "2023-11-09T18:21:57.803005602Z" + } }, "outputs": [], "source": [ @@ -606,7 +781,10 @@ "metadata": { "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.751370683Z" + } }, "outputs": [], "source": [ @@ -631,7 +809,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795021637Z" + } + }, "outputs": [], "source": [ "class MyError(Exception):\n", @@ -644,7 +826,10 @@ "metadata": { "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795152334Z" + } }, "outputs": [], "source": [ @@ -665,7 +850,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795308728Z" + } + }, "outputs": [], "source": [ "class NegativeValueError(ValueError):\n", @@ -692,7 +881,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795410621Z" + } + }, "outputs": [], "source": [ "try:\n", @@ -726,7 +919,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795520523Z" + } + }, "outputs": [], "source": [ "try:\n", @@ -742,7 +939,10 @@ "metadata": { "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795603420Z" + } }, "outputs": [], "source": [ @@ -770,11 +970,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795680122Z" } }, "outputs": [], @@ -805,15 +1005,15 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" }, "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.795985110Z" + } }, "outputs": [], "source": [ @@ -843,15 +1043,15 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" }, "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.796186073Z" + } }, "outputs": [], "source": [ @@ -874,11 +1074,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.796374250Z" } }, "outputs": [], @@ -902,11 +1102,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.796560001Z" } }, "outputs": [], @@ -932,11 +1132,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.796721888Z" } }, "outputs": [], @@ -951,15 +1151,15 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" }, "tags": [ "raises-exception" - ] + ], + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.796792565Z" + } }, "outputs": [], "source": [ @@ -970,11 +1170,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "start_time": "2023-11-09T18:21:58.796936035Z" } }, "outputs": [], diff --git a/advanced-python/12AdvancedClasses.ipynb b/advanced-python/12AdvancedClasses.ipynb index 7597afde..cd96c254 100644 --- a/advanced-python/12AdvancedClasses.ipynb +++ b/advanced-python/12AdvancedClasses.ipynb @@ -20,23 +20,31 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.090086512Z", + "start_time": "2023-11-09T18:22:01.447375824Z" + } + }, "outputs": [], "source": [ "class NamedValue:\n", " def __init__(self, name):\n", " self.name = name\n", "\n", + "\n", "class ValueLeft(NamedValue): \n", " def __add__(self, other):\n", " print(f\"add called on {self.name}\")\n", " return 42\n", " \n", + "\n", "class ValueRight(NamedValue):\n", " def __radd__(self, other):\n", " print(\"radd called on {self.name}\")\n", " return 24\n", " \n", + "\n", "class Value(ValueRight, ValueLeft):\n", " pass" ] @@ -51,7 +59,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.096491782Z", + "start_time": "2023-11-09T18:22:01.447622989Z" + } + }, "outputs": [], "source": [ "valleft = ValueLeft('val left')\n", @@ -61,7 +74,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.097446715Z", + "start_time": "2023-11-09T18:22:01.447833842Z" + } + }, "outputs": [], "source": [ "valleft + valleft2" @@ -90,21 +108,29 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.097894774Z", + "start_time": "2023-11-09T18:22:01.448096154Z" + } + }, "outputs": [], "source": [ "class Name:\n", " def __init__(self, name):\n", " self.name = name\n", " \n", + "\n", "class NameRepr(Name):\n", " def __repr__(self):\n", " return self.name\n", "\n", + "\n", "class NameStr(Name):\n", " def __str__(self):\n", " return f'I am {self.name}'\n", " \n", + "\n", "class NameStrRepr(NameStr, NameRepr):\n", " pass" ] @@ -128,13 +154,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.098105946Z", + "start_time": "2023-11-09T18:22:01.491308102Z" + } + }, "outputs": [], "source": [ "class Callable:\n", " def __call__(self, *args, **kwargs):\n", " print(f\"called with args {args} and kwargs {kwargs}\")\n", " \n", + "\n", "class NotCallable:\n", " pass" ] @@ -142,7 +174,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.098275992Z", + "start_time": "2023-11-09T18:22:01.491619526Z" + } + }, "outputs": [], "source": [ "call = Callable()\n", @@ -152,7 +189,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.098786414Z", + "start_time": "2023-11-09T18:22:01.491851791Z" + } + }, "outputs": [], "source": [ "call()" @@ -161,7 +203,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.099302008Z", + "start_time": "2023-11-09T18:22:01.492017010Z" + } + }, "outputs": [], "source": [ "try:\n", @@ -189,7 +236,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.099478261Z", + "start_time": "2023-11-09T18:22:01.498433654Z" + } + }, "outputs": [], "source": [ "class Storage:\n", @@ -209,7 +261,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.099727280Z", + "start_time": "2023-11-09T18:22:01.503944286Z" + } + }, "outputs": [], "source": [ "storage = Storage('one')" @@ -218,7 +275,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.100369605Z", + "start_time": "2023-11-09T18:22:01.513503176Z" + } + }, "outputs": [], "source": [ "storage[2]" @@ -227,7 +289,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.100965196Z", + "start_time": "2023-11-09T18:22:01.555278832Z" + } + }, "outputs": [], "source": [ "storage[2] = 3" @@ -251,7 +318,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.101169939Z", + "start_time": "2023-11-09T18:22:01.555519700Z" + } + }, "outputs": [], "source": [ "class A:\n", @@ -265,7 +337,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.101370435Z", + "start_time": "2023-11-09T18:22:01.555728122Z" + } + }, "outputs": [], "source": [ "a = A(4)\n", @@ -275,7 +352,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.101792662Z", + "start_time": "2023-11-09T18:22:01.599103966Z" + } + }, "outputs": [], "source": [ "a.add(b)" @@ -284,7 +366,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.104487019Z", + "start_time": "2023-11-09T18:22:01.599306619Z" + } + }, "outputs": [], "source": [ "A.add(a, b)" @@ -331,7 +418,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.104939450Z", + "start_time": "2023-11-09T18:22:01.599476172Z" + } + }, "outputs": [], "source": [ "a.__dict__" @@ -347,7 +439,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.106823366Z", + "start_time": "2023-11-09T18:22:01.599607114Z" + } + }, "outputs": [], "source": [ "a.__class__.__dict__" @@ -356,7 +453,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.107525732Z", + "start_time": "2023-11-09T18:22:01.643176331Z" + } + }, "outputs": [], "source": [ "A.__dict__" @@ -379,7 +481,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.107832669Z", + "start_time": "2023-11-09T18:22:01.643423445Z" + } + }, "outputs": [], "source": [ "class GetAndSet:\n", @@ -402,7 +509,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.108062078Z", + "start_time": "2023-11-09T18:22:01.643613141Z" + } + }, "outputs": [], "source": [ "get = GetAndSet()" @@ -411,7 +523,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.108503703Z", + "start_time": "2023-11-09T18:22:01.643815676Z" + } + }, "outputs": [], "source": [ "get.add(get)" @@ -420,7 +537,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.108799427Z", + "start_time": "2023-11-09T18:22:01.644071731Z" + } + }, "outputs": [], "source": [ "get.addition(get)" @@ -429,7 +551,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.109426819Z", + "start_time": "2023-11-09T18:22:01.655190658Z" + } + }, "outputs": [], "source": [ "get.hello" @@ -445,7 +572,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.110263772Z", + "start_time": "2023-11-09T18:22:01.662211437Z" + } + }, "outputs": [], "source": [ "getattr(get, 'hello')" @@ -454,7 +586,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.110508234Z", + "start_time": "2023-11-09T18:22:01.679854902Z" + } + }, "outputs": [], "source": [ "get.hi" @@ -477,7 +614,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:02.111369769Z", + "start_time": "2023-11-09T18:22:01.685831397Z" + } + }, "outputs": [], "source": [ "import this" diff --git a/advanced-python/20DataAndPlotting.ipynb b/advanced-python/20DataAndPlotting.ipynb index 4c90ed76..f2d611df 100644 --- a/advanced-python/20DataAndPlotting.ipynb +++ b/advanced-python/20DataAndPlotting.ipynb @@ -32,7 +32,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:11.209901401Z", + "start_time": "2023-11-09T18:22:10.715117611Z" + } + }, "outputs": [], "source": [ "import mplhep\n", @@ -110,7 +115,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:11.213168925Z", + "start_time": "2023-11-09T18:22:10.715393509Z" + } + }, "outputs": [], "source": [ "my_file = uproot.open('https://cern.ch/starterkit/data/advanced-python-2018/real_data.root',\n", @@ -129,7 +139,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:11.577169Z", + "start_time": "2023-11-09T18:22:10.996475027Z" + } + }, "outputs": [], "source": [ "tree = my_file['DecayTree']\n", @@ -140,7 +155,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:24.452463022Z", + "start_time": "2023-11-09T18:22:11.577968658Z" + } + }, "outputs": [], "source": [ "# Load data as a pandas DataFrame\n", @@ -154,7 +174,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:24.470597938Z", + "start_time": "2023-11-09T18:22:24.447634455Z" + } + }, "outputs": [], "source": [ "data_df.columns" @@ -171,7 +196,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:24.855253849Z", + "start_time": "2023-11-09T18:22:24.453234659Z" + } + }, "outputs": [], "source": [ "# Start with a basic histogram\n", @@ -204,6 +234,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:22:24.855582435Z", + "start_time": "2023-11-09T18:22:24.819219840Z" } }, "outputs": [], @@ -219,6 +253,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:22:25.965614384Z", + "start_time": "2023-11-09T18:22:24.819392199Z" } }, "outputs": [], @@ -234,6 +272,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:22:26.500198095Z", + "start_time": "2023-11-09T18:22:25.966183784Z" } }, "outputs": [], @@ -249,6 +291,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:22:27.468758686Z", + "start_time": "2023-11-09T18:22:26.499064264Z" } }, "outputs": [], @@ -268,7 +314,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:29.632389925Z", + "start_time": "2023-11-09T18:22:27.444982487Z" + } + }, "outputs": [], "source": [ "def plot_mass(df):\n", @@ -278,6 +329,7 @@ " plt.xlabel('$J/\\\\psi$ mass [GeV]')\n", " plt.xlim(bins[0], bins[-1])\n", "\n", + "\n", "plot_mass(data_df)" ] }, @@ -291,7 +343,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:29.667859406Z", + "start_time": "2023-11-09T18:22:29.626043515Z" + } + }, "outputs": [], "source": [ "# When making the ROOT file we forgot to add some variables, no bother lets add them now!\n", @@ -309,7 +366,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:29.978476627Z", + "start_time": "2023-11-09T18:22:29.648794140Z" + } + }, "outputs": [], "source": [ "data_df.eval('mup_P = sqrt(mup_PX**2 + mup_PY**2 + mup_PZ**2)', inplace=True)\n", @@ -337,7 +399,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:30.368613646Z", + "start_time": "2023-11-09T18:22:29.678392856Z" + } + }, "outputs": [], "source": [ "plot_mass(data_df)\n", @@ -348,7 +415,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:31.531723191Z", + "start_time": "2023-11-09T18:22:30.368802764Z" + } + }, "outputs": [], "source": [ "plot_mass(data_df)\n", @@ -370,7 +442,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:32.129701102Z", + "start_time": "2023-11-09T18:22:31.166183480Z" + } + }, "outputs": [], "source": [ "def plot_mass(df, **kwargs):\n", @@ -380,6 +457,7 @@ " plt.xlabel('$J/\\\\psi$ mass [GeV]')\n", " plt.xlim(bins[0], bins[-1])\n", "\n", + "\n", "plot_mass(data_df, label='No cuts', density=1)\n", "data_with_cuts_df = data_df.query('Jpsi_PT > 4')\n", "plot_mass(data_with_cuts_df, label='$J/\\\\psi$ p$_T$ only', density=1)\n", @@ -398,7 +476,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:32.156206263Z", + "start_time": "2023-11-09T18:22:32.129165982Z" + } + }, "outputs": [], "source": [ "data_df.columns" @@ -407,7 +490,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:22:33.885426832Z", + "start_time": "2023-11-09T18:22:32.134501210Z" + } + }, "outputs": [], "source": [ "from python_lesson import check_truth\n", @@ -438,7 +526,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:21.141300103Z", + "start_time": "2023-11-09T18:22:33.874056558Z" + } + }, "outputs": [], "source": [ "with uproot.open('https://starterkit.web.cern.ch/starterkit/data/advanced-python-2018/simulated_data.root') as mc_file:\n", @@ -465,7 +558,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:21.631231066Z", + "start_time": "2023-11-09T18:23:21.182938710Z" + } + }, "outputs": [], "source": [ "bkg_df = data_df.query('~(3.0 < Jpsi_M < 3.2)')\n", @@ -491,7 +589,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:22.134866766Z", + "start_time": "2023-11-09T18:23:21.630331279Z" + } + }, "outputs": [], "source": [ "var = 'Jpsi_PT'\n", @@ -509,7 +612,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:22.642315719Z", + "start_time": "2023-11-09T18:23:22.130434993Z" + } + }, "outputs": [], "source": [ "# Those are hard to compare!\n", @@ -532,7 +640,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:22.648142526Z", + "start_time": "2023-11-09T18:23:22.640877567Z" + } + }, "outputs": [], "source": [ "def plot_comparision(var, mc_df, bkg_df):\n", @@ -550,7 +663,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:22.657440397Z", + "start_time": "2023-11-09T18:23:22.644203876Z" + } + }, "outputs": [], "source": [] }, @@ -564,7 +682,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:39.984761182Z", + "start_time": "2023-11-09T18:23:22.649403635Z" + } + }, "outputs": [], "source": [ "for var in data_df.columns:\n", @@ -594,7 +717,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:23:39.987782824Z", + "start_time": "2023-11-09T18:23:39.981983050Z" + } + }, "outputs": [], "source": [] }, @@ -611,6 +739,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:23:40.835372688Z", + "start_time": "2023-11-09T18:23:39.986976807Z" } }, "outputs": [], @@ -626,6 +758,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:23:40.835551238Z", + "start_time": "2023-11-09T18:23:40.835014982Z" } }, "outputs": [], @@ -637,6 +773,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:23:40.835677817Z", + "start_time": "2023-11-09T18:23:40.835124429Z" } }, "outputs": [], @@ -664,4 +804,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/advanced-python/30Classification.ipynb b/advanced-python/30Classification.ipynb index 6ab9fe4a..7679df59 100644 --- a/advanced-python/30Classification.ipynb +++ b/advanced-python/30Classification.ipynb @@ -19,7 +19,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:09.454693438Z", + "start_time": "2023-11-09T18:25:07.925906320Z" + } + }, "outputs": [], "source": [ "%store -r bkg_df\n", @@ -30,7 +35,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:09.455020356Z", + "start_time": "2023-11-09T18:25:08.698996144Z" + } + }, "outputs": [], "source": [ "import mplhep\n", @@ -61,7 +71,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:11.748577377Z", + "start_time": "2023-11-09T18:25:09.007997107Z" + } + }, "outputs": [], "source": [ "plt.scatter(mc_df['mup_PT'], mc_df['mum_PT'], s=1, marker=',', label='Signal')\n", @@ -95,7 +110,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:11.762307655Z", + "start_time": "2023-11-09T18:25:11.747609552Z" + } + }, "outputs": [], "source": [ "training_columns = [\n", @@ -110,12 +130,17 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:11.882528359Z", + "start_time": "2023-11-09T18:25:11.752531431Z" + } + }, "outputs": [], "source": [ "# We then define the classifier we want to use\n", - "bdt = GradientBoostingClassifier()\n", - "# bdt = XGBClassifier() # we could also use this one" + "# bdt = GradientBoostingClassifier() # we could also use this one\n", + "bdt = XGBClassifier(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better" ] }, { @@ -130,7 +155,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:11.882858520Z", + "start_time": "2023-11-09T18:25:11.795008544Z" + } + }, "outputs": [], "source": [ "bkg_df = bkg_df.copy()\n", @@ -141,7 +171,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:12.365576579Z", + "start_time": "2023-11-09T18:25:11.795121407Z" + } + }, "outputs": [], "source": [ "# Now merge the data together\n", @@ -153,7 +188,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:13.766805931Z", + "start_time": "2023-11-09T18:25:12.375467218Z" + } + }, "outputs": [], "source": [ "# We can now fit the BDT\n", @@ -170,7 +210,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:13.798097938Z", + "start_time": "2023-11-09T18:25:13.764223021Z" + } + }, "outputs": [], "source": [ "bdt.predict_proba(data_df[training_columns].head())" @@ -188,7 +233,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:13.866038246Z", + "start_time": "2023-11-09T18:25:13.787561597Z" + } + }, "outputs": [], "source": [ "# We can now use slicing to select column 1 in the array from for all rows\n", @@ -206,7 +256,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:14.147837572Z", + "start_time": "2023-11-09T18:25:13.865725416Z" + } + }, "outputs": [], "source": [ "mc_df['BDT'] = bdt.predict_proba(mc_df[training_columns])[:,1]\n", @@ -225,7 +280,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:14.352377276Z", + "start_time": "2023-11-09T18:25:14.147685274Z" + } + }, "outputs": [], "source": [ "for df in [mc_df, bkg_df, data_df, training_data]:\n", @@ -244,7 +304,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:14.364834431Z", + "start_time": "2023-11-09T18:25:14.351502685Z" + } + }, "outputs": [], "source": [ "def plot_comparision(var, mc_df, bkg_df):\n", @@ -258,6 +323,7 @@ " plt.xlim(bins[0], bins[-1])\n", " plt.legend(loc='best')\n", "\n", + "\n", "def plot_mass(df, **kwargs):\n", " h, bins = np.histogram(df['Jpsi_M'], bins=100, range=[2.75, 3.5])\n", " mplhep.histplot(h, bins, yerr=True, **kwargs) # feel free to adjust\n", @@ -269,7 +335,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:14.602892746Z", + "start_time": "2023-11-09T18:25:14.359199993Z" + } + }, "outputs": [], "source": [ "plot_comparision('BDT', mc_df, bkg_df)" @@ -285,7 +356,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:16.410750494Z", + "start_time": "2023-11-09T18:25:14.601766515Z" + } + }, "outputs": [], "source": [ "plot_mass(data_df, label='No cuts', density=1)\n", @@ -312,7 +388,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:17.390975490Z", + "start_time": "2023-11-09T18:25:16.409914497Z" + } + }, "outputs": [], "source": [ "# That would be too nice to use in analysis\n", @@ -336,7 +417,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:17.576076450Z", + "start_time": "2023-11-09T18:25:17.380970864Z" + } + }, "outputs": [], "source": [ "plot_comparision('BDT', mc_df, bkg_df)" @@ -356,7 +442,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:17.674307301Z", + "start_time": "2023-11-09T18:25:17.573465928Z" + } + }, "outputs": [], "source": [ "y_score = bdt.predict_proba(training_data[training_columns])[:,1]\n", @@ -378,7 +469,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:17.825716688Z", + "start_time": "2023-11-09T18:25:17.695016918Z" + } + }, "outputs": [], "source": [ "plt.plot([0, 1], [0, 1], color='grey', linestyle='--', label='Randomly guess')\n", @@ -402,7 +498,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:17.883519883Z", + "start_time": "2023-11-09T18:25:17.824214208Z" + } + }, "outputs": [], "source": [ "area = auc(fpr, tpr)" @@ -411,7 +512,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:17.996072891Z", + "start_time": "2023-11-09T18:25:17.871252944Z" + } + }, "outputs": [], "source": [ "plt.plot([0, 1], [0, 1], color='grey', linestyle='--')\n", @@ -437,7 +543,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.048225663Z", + "start_time": "2023-11-09T18:25:17.994427329Z" + } + }, "outputs": [], "source": [ "n_sig = 1200\n", @@ -454,7 +565,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.048735130Z", + "start_time": "2023-11-09T18:25:18.035311051Z" + } + }, "outputs": [], "source": [ "S = n_sig*tpr\n", @@ -472,7 +588,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.256753265Z", + "start_time": "2023-11-09T18:25:18.035655070Z" + } + }, "outputs": [], "source": [ "plt.plot(thresholds, metric)\n", @@ -491,7 +612,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.257317434Z", + "start_time": "2023-11-09T18:25:18.243137106Z" + } + }, "outputs": [], "source": [ "optimal_index = np.argmax(metric)\n", @@ -503,7 +629,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.635953003Z", + "start_time": "2023-11-09T18:25:18.243349315Z" + } + }, "outputs": [], "source": [ "plot_mass(data_df, label='No cuts', density=1)\n", @@ -517,7 +648,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.742133399Z", + "start_time": "2023-11-09T18:25:18.551392406Z" + } + }, "outputs": [], "source": [ "plot_mass(data_with_cuts_df, label='Using BDT only')" @@ -535,7 +671,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.813621527Z", + "start_time": "2023-11-09T18:25:18.743820685Z" + } + }, "outputs": [], "source": [ "def plot_roc(bdt, training_data, training_columns, label=None):\n", @@ -560,7 +701,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:18.813858248Z", + "start_time": "2023-11-09T18:25:18.791163354Z" + } + }, "outputs": [], "source": [ "def plot_significance(bdt, training_data, training_columns, label=None):\n", @@ -585,7 +731,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:30:20.710903847Z", + "start_time": "2023-11-09T18:25:18.791369479Z" + } + }, "outputs": [], "source": [ "with uproot.open('https://cern.ch/starterkit/data/advanced-python-2018/real_data.root') as datafile:\n", @@ -632,7 +783,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:30:21.685114217Z", + "start_time": "2023-11-09T18:30:20.708314393Z" + } + }, "outputs": [], "source": [ "%store bkg_df\n", @@ -643,7 +799,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:30:21.727449098Z", + "start_time": "2023-11-09T18:30:21.679809042Z" + } + }, "outputs": [], "source": [] } @@ -669,4 +830,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/advanced-python/31ClassificationExtension.ipynb b/advanced-python/31ClassificationExtension.ipynb index 870e7c9b..2b9421ea 100644 --- a/advanced-python/31ClassificationExtension.ipynb +++ b/advanced-python/31ClassificationExtension.ipynb @@ -25,7 +25,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:00.013914071Z", + "start_time": "2023-11-09T18:39:58.305187065Z" + } + }, "outputs": [], "source": [ "%store -r training_data\n", @@ -38,7 +43,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:00.401140920Z", + "start_time": "2023-11-09T18:40:00.001945534Z" + } + }, "outputs": [], "source": [ "import mplhep\n", @@ -52,7 +62,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:00.424165539Z", + "start_time": "2023-11-09T18:40:00.375142416Z" + } + }, "outputs": [], "source": [ "def plot_comparision(var, mc_df, bkg_df):\n", @@ -66,15 +81,16 @@ " plt.xlim(bins[0], bins[-1])\n", " plt.legend(loc='best')\n", "\n", + "\n", "def plot_significance(bdt, training_data, training_columns, label=None):\n", - " y_score = bdt.predict_proba(training_data[training_columns])[:,1]\n", + " y_score = bdt.predict_proba(training_data[training_columns])[:, 1]\n", " fpr, tpr, thresholds = roc_curve(training_data['catagory'], y_score)\n", "\n", " n_sig = 1200\n", " n_bkg = 23000\n", - " S = n_sig*tpr\n", - " B = n_bkg*fpr\n", - " metric = S/np.sqrt(S+B)\n", + " S = n_sig * tpr\n", + " B = n_bkg * fpr\n", + " metric = S / np.sqrt(S + B)\n", "\n", " plt.plot(thresholds, metric, label=label)\n", " plt.xlabel('BDT cut value')\n", @@ -84,8 +100,9 @@ " optimal_cut = thresholds[np.argmax(metric)]\n", " plt.axvline(optimal_cut, color='black', linestyle='--')\n", "\n", + "\n", "def plot_roc(bdt, training_data, training_columns, label=None):\n", - " y_score = bdt.predict_proba(training_data[training_columns])[:,1]\n", + " y_score = bdt.predict_proba(training_data[training_columns])[:, 1]\n", " fpr, tpr, thresholds = roc_curve(training_data['catagory'], y_score)\n", " area = auc(fpr, tpr)\n", "\n", @@ -106,46 +123,66 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:22.730604251Z", + "start_time": "2023-11-09T18:40:00.386255547Z" + } + }, "outputs": [], "source": [ "# Train the Gradient Booster\n", - "bdt_1 = GradientBoostingClassifier()\n", + "bdt_1 = GradientBoostingClassifier(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better\n", "bdt_1.fit(training_data[training_columns], training_data['catagory'])\n", "for df in [mc_df, bkg_df, data_df, training_data]:\n", - " df['BDT'] = bdt_1.predict_proba(df[training_columns])[:,1]" + " df['BDT'] = bdt_1.predict_proba(df[training_columns])[:, 1]" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:32.407167754Z", + "start_time": "2023-11-09T18:40:22.738816458Z" + } + }, "outputs": [], "source": [ "# Train the Adaptive Booster\n", - "bdt_2 = AdaBoostClassifier()\n", + "bdt_2 = AdaBoostClassifier(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better\n", "bdt_2.fit(training_data[training_columns], training_data['catagory'])\n", "for df in [mc_df, bkg_df, data_df, training_data]:\n", - " df['BDT_2'] = bdt_2.predict_proba(df[training_columns])[:,1]" + " df['BDT_2'] = bdt_2.predict_proba(df[training_columns])[:, 1]" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:33.896052239Z", + "start_time": "2023-11-09T18:40:32.406995877Z" + } + }, "outputs": [], "source": [ "# Train XGBoost Classifier\n", - "xgboost_bdt = XGBClassifier()\n", + "xgboost_bdt = XGBClassifier(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better\n", "xgboost_bdt.fit(training_data[training_columns], training_data['catagory'])\n", "for df in [mc_df, bkg_df, data_df, training_data]:\n", - " df['XGBoost_BDT'] = xgboost_bdt.predict_proba(df[training_columns])[:,1]" + " df['XGBoost_BDT'] = xgboost_bdt.predict_proba(df[training_columns])[:, 1]" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:37.959441690Z", + "start_time": "2023-11-09T18:40:33.890325623Z" + } + }, "outputs": [], "source": [ "plt.figure()\n", @@ -194,7 +231,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:38.867894240Z", + "start_time": "2023-11-09T18:40:37.963705203Z" + } + }, "outputs": [], "source": [ "for df in [mc_df, bkg_df, data_df, training_data]:\n", @@ -226,27 +268,32 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.395771938Z", + "start_time": "2023-11-09T18:40:38.878807747Z" + } + }, "outputs": [], "source": [ - "bdt_1 = GradientBoostingClassifier()\n", + "# bdtclass = GradientBoostingClassifier()\n", + "bdtclass = XGBClassifier # we could also use this one\n", + "bdt_1 = bdtclass(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better\n", "bdt_1.fit(training_data[training_columns], training_data['catagory'])\n", "for df in [mc_df, bkg_df, data_df, training_data]:\n", - " df['BDT'] = bdt_1.predict_proba(df[training_columns])[:,1]\n", + " df['BDT'] = bdt_1.predict_proba(df[training_columns])[:, 1]\n", "\n", - "bdt_2 = GradientBoostingClassifier()\n", + "bdt_2 = bdtclass(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better\n", "training_columns_2 = training_columns + ['IPmin']\n", "bdt_2.fit(training_data[training_columns_2], training_data['catagory'])\n", "for df in [mc_df, bkg_df, data_df, training_data]:\n", - " df['BDT_2'] = bdt_2.predict_proba(df[training_columns_2])[:,1]\n", - "\n", + " df['BDT_2'] = bdt_2.predict_proba(df[training_columns_2])[:, 1]\n", "\n", - "bdt_3 = GradientBoostingClassifier()\n", + "bdt_3 = bdtclass(n_estimators=20) # less estimator is faster, for demonstration, but 100-300 is usually better\n", "training_columns_3 = training_columns + ['IPdiff']\n", "bdt_3.fit(training_data[training_columns_3], training_data['catagory'])\n", "for df in [mc_df, bkg_df, data_df, training_data]:\n", - " df['BDT_3'] = bdt_3.predict_proba(df[training_columns_3])[:,1]\n", - "\n", + " df['BDT_3'] = bdt_3.predict_proba(df[training_columns_3])[:, 1]\n", "\n", "plt.figure()\n", "plot_comparision('BDT', mc_df, bkg_df)\n", @@ -328,7 +375,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.395898908Z", + "start_time": "2023-11-09T18:40:44.896492177Z" + } + }, "outputs": [], "source": [] } @@ -354,4 +406,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/advanced-python/32BoostingToUniformity.ipynb b/advanced-python/32BoostingToUniformity.ipynb old mode 100755 new mode 100644 index b04054e6..e1b81ff5 --- a/advanced-python/32BoostingToUniformity.ipynb +++ b/advanced-python/32BoostingToUniformity.ipynb @@ -25,7 +25,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:39.380065511Z", + "start_time": "2023-11-09T18:25:38.723060607Z" + } + }, "outputs": [], "source": [ "import uproot\n", @@ -48,7 +53,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:26:25.983039940Z", + "start_time": "2023-11-09T18:25:39.055897465Z" + } + }, "outputs": [], "source": [ "used_columns = [\"Y1\", \"Y2\", \"Y3\", \"M2AB\", \"M2AC\"]\n", @@ -70,7 +80,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:26:26.722476388Z", + "start_time": "2023-11-09T18:26:26.022942250Z" + } + }, "outputs": [], "source": [ "def plot_distribution(data_frame, var_name1='M2AB', var_name2='M2AC', bins=40):\n", @@ -80,6 +95,7 @@ " plt.ylabel(var_name2)\n", " plt.colorbar()\n", "\n", + "\n", "plt.figure(figsize=(12, 6))\n", "plt.subplot(1, 2, 1), plt.title(\"signal\"), plot_distribution(data[labels==1])\n", "plt.subplot(1, 2, 2), plt.title(\"background\"), plot_distribution(data[labels==0]);" @@ -95,7 +111,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:26:26.738984299Z", + "start_time": "2023-11-09T18:26:26.722774930Z" + } + }, "outputs": [], "source": [ "trainX, testX, trainY, testY = train_test_split(data, labels, random_state=42, test_size=0.5)" @@ -111,7 +132,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:26:26.847926700Z", + "start_time": "2023-11-09T18:26:26.737604646Z" + } + }, "outputs": [], "source": [ "uniform_features = [\"M2AB\", \"M2AC\"]\n", @@ -130,7 +156,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:30:50.832483520Z", + "start_time": "2023-11-09T18:26:26.744692640Z" + } + }, "outputs": [], "source": [ "classifiers = {}\n", @@ -171,7 +202,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:30:57.270917786Z", + "start_time": "2023-11-09T18:30:50.874988212Z" + } + }, "outputs": [], "source": [ "from sklearn.metrics import roc_auc_score\n", @@ -181,7 +217,7 @@ " output = clf.predict_proba(trainX[train_features])\n", " else:\n", " output = clf.predict_proba(trainX[train_features + uniform_features])\n", - " print('Area under curve: {}'.format(roc_auc_score(trainY, output[:,1])))" + " print(f'Area under curve: {roc_auc_score(trainY, output[:,1])}')" ] }, { @@ -194,7 +230,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:31:04.075394547Z", + "start_time": "2023-11-09T18:30:57.271060612Z" + } + }, "outputs": [], "source": [ "from sklearn.metrics import roc_curve\n", @@ -219,6 +260,7 @@ " # We can make the plot look nicer by forcing the grid to be square\n", " plt.gca().set_aspect('equal', adjustable='box')\n", "\n", + "\n", "plt.figure(figsize=(8,8))\n", "plot_roc(classifiers['AdaBoost'], testY, testX, train_features, 'AdaBoost')\n", "plot_roc(classifiers['uGB+knnAda'], testY, testX, train_features+uniform_features, 'uGB+knnAda')\n", @@ -229,7 +271,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:31:04.079427234Z", + "start_time": "2023-11-09T18:31:04.071407036Z" + } + }, "outputs": [], "source": [] } @@ -258,4 +305,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/advanced-python/40Histograms.ipynb b/advanced-python/40Histograms.ipynb index b1d559e4..808cd467 100644 --- a/advanced-python/40Histograms.ipynb +++ b/advanced-python/40Histograms.ipynb @@ -34,6 +34,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.090763040Z", + "start_time": "2023-11-09T18:40:41.774097506Z" } }, "outputs": [], @@ -55,6 +59,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.092581529Z", + "start_time": "2023-11-09T18:40:42.719110990Z" } }, "outputs": [], @@ -121,14 +129,24 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.092804229Z", + "start_time": "2023-11-09T18:40:42.910621980Z" + } + }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.092969634Z", + "start_time": "2023-11-09T18:40:42.916739161Z" + } + }, "outputs": [], "source": [ "start, stop = data_df['Jpsi_M'].min(), data_df['Jpsi_M'].max()\n", @@ -148,6 +166,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.093124685Z", + "start_time": "2023-11-09T18:40:42.927046922Z" } }, "outputs": [], @@ -158,7 +180,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.094256975Z", + "start_time": "2023-11-09T18:40:42.937126378Z" + } + }, "outputs": [], "source": [ "data_h.fill(data_df['Jpsi_M'])" @@ -167,7 +194,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.094471841Z", + "start_time": "2023-11-09T18:40:42.949363041Z" + } + }, "outputs": [], "source": [ "mc_h = hist.Hist(axis1).fill(mc_df['Jpsi_M']) # we can also chain the commands" @@ -187,7 +219,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.095025556Z", + "start_time": "2023-11-09T18:40:42.955171365Z" + } + }, "outputs": [], "source": [ "mplhep.histplot(data_h)" @@ -206,7 +243,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.095656170Z", + "start_time": "2023-11-09T18:40:43.211187900Z" + } + }, "outputs": [], "source": [ "data_h.plot1d()" @@ -215,7 +257,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.096292049Z", + "start_time": "2023-11-09T18:40:43.415174250Z" + } + }, "outputs": [], "source": [ "data_h.plot1d()\n", @@ -225,7 +272,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.096697901Z", + "start_time": "2023-11-09T18:40:43.622364563Z" + } + }, "outputs": [], "source": [ "mc_df.columns" @@ -243,7 +295,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.097017931Z", + "start_time": "2023-11-09T18:40:43.629433139Z" + } + }, "outputs": [], "source": [ "start, stop = data_df['BDT'].min(), data_df['BDT'].max()\n", @@ -253,7 +310,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.097244707Z", + "start_time": "2023-11-09T18:40:43.671235206Z" + } + }, "outputs": [], "source": [ "mc_h2d = hist.Hist(axis1, axis_bdt).fill(BDT=mc_df['BDT'], mass=mc_df['Jpsi_M']) # using names" @@ -262,7 +324,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.136940250Z", + "start_time": "2023-11-09T18:40:43.671469880Z" + } + }, "outputs": [], "source": [ "data_h2d = hist.Hist(axis1, axis_bdt)\n", @@ -272,7 +339,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.138342837Z", + "start_time": "2023-11-09T18:40:43.712937233Z" + } + }, "outputs": [], "source": [ "mplhep.hist2dplot(data_h2d)" @@ -290,7 +362,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.138747800Z", + "start_time": "2023-11-09T18:40:44.022960001Z" + } + }, "outputs": [], "source": [ "# Access by bin number\n", @@ -311,7 +388,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.140841528Z", + "start_time": "2023-11-09T18:40:44.029760988Z" + } + }, "outputs": [], "source": [ "data_h2d.density()" @@ -329,7 +411,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.142148636Z", + "start_time": "2023-11-09T18:40:44.062252938Z" + } + }, "outputs": [], "source": [ "data_h2d.project(\"mass\") # we will here retain the 1D histogram" @@ -347,7 +434,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.142629942Z", + "start_time": "2023-11-09T18:40:44.071307572Z" + } + }, "outputs": [], "source": [ "data_h2d.axes" @@ -356,7 +448,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.143016256Z", + "start_time": "2023-11-09T18:40:44.076331708Z" + } + }, "outputs": [], "source": [ "data_h2d.axes['mass']" @@ -365,7 +462,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.143633118Z", + "start_time": "2023-11-09T18:40:44.087011298Z" + } + }, "outputs": [], "source": [ "data_h2d.axes['mass'].edges" @@ -374,7 +476,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.144130758Z", + "start_time": "2023-11-09T18:40:44.098073547Z" + } + }, "outputs": [], "source": [ "data_h2d.axes['mass'].centers # bin centers" @@ -383,7 +490,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.144584441Z", + "start_time": "2023-11-09T18:40:44.102908180Z" + } + }, "outputs": [], "source": [ "data_h2d.axes['mass'].widths # bin widths" @@ -401,7 +513,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.145653144Z", + "start_time": "2023-11-09T18:40:44.111345363Z" + } + }, "outputs": [], "source": [ "data_h2d.axes.edges\n", @@ -423,7 +540,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.145851826Z", + "start_time": "2023-11-09T18:40:44.159338713Z" + } + }, "outputs": [], "source": [] }, @@ -441,7 +563,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.172428378Z", + "start_time": "2023-11-09T18:40:44.159650878Z" + } + }, "outputs": [], "source": [ "data_df_bdt = data_df.query(\"BDT > 0.9\")\n", @@ -453,7 +580,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.173168648Z", + "start_time": "2023-11-09T18:40:44.190637837Z" + } + }, "outputs": [], "source": [ "ratio = data_bdt_h2d.project(\"mass\") / data_h2d.project(\"mass\")" @@ -462,7 +594,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.200611311Z", + "start_time": "2023-11-09T18:40:44.231188307Z" + } + }, "outputs": [], "source": [ "ratio.plot1d()" @@ -471,7 +608,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.213022780Z", + "start_time": "2023-11-09T18:40:44.419136722Z" + } + }, "outputs": [], "source": [ "ratio_large = ratio * 10\n", @@ -488,7 +630,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.213326313Z", + "start_time": "2023-11-09T18:40:44.667191807Z" + } + }, "outputs": [], "source": [] }, @@ -506,7 +653,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.213535481Z", + "start_time": "2023-11-09T18:40:44.667447153Z" + } + }, "outputs": [], "source": [ "weight = np.random.normal(1., 0.1, size=mc_df.shape[0])\n", @@ -517,7 +669,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.231990095Z", + "start_time": "2023-11-09T18:40:44.667709042Z" + } + }, "outputs": [], "source": [ "mc_h2d" @@ -526,7 +683,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.395015033Z", + "start_time": "2023-11-09T18:40:44.698968002Z" + } + }, "outputs": [], "source": [ "mc_h2d.variances()" @@ -542,7 +704,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:45.395215588Z", + "start_time": "2023-11-09T18:40:44.743359631Z" + } + }, "outputs": [], "source": [] } @@ -568,4 +735,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/advanced-python/45DemoReweighting.ipynb b/advanced-python/45DemoReweighting.ipynb index 63cb50be..abcf3a2f 100644 --- a/advanced-python/45DemoReweighting.ipynb +++ b/advanced-python/45DemoReweighting.ipynb @@ -25,7 +25,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:25:53.648661700Z", + "start_time": "2023-11-09T18:25:53.142978500Z" + } + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -47,7 +52,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:27:45.758389346Z", + "start_time": "2023-11-09T18:25:53.489875451Z" + } + }, "outputs": [], "source": [ "columns = ['hSPD', 'pt_b', 'pt_phi', 'vchi2_b', 'mu_pt_sum']\n", @@ -80,7 +90,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:27:45.959472949Z", + "start_time": "2023-11-09T18:27:45.799133696Z" + } + }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", @@ -97,13 +112,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:27:45.975481005Z", + "start_time": "2023-11-09T18:27:45.965651595Z" + } + }, "outputs": [], "source": [ "from hep_ml.metrics_utils import ks_2samp_weighted\n", "\n", "hist_settings = {'bins': 100, 'density': True, 'alpha': 0.7}\n", "\n", + "\n", "def draw_distributions(original, target, new_original_weights):\n", " plt.figure(figsize=[15, 7])\n", " for id, column in enumerate(columns, 1):\n", @@ -127,7 +148,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:27:45.990266338Z", + "start_time": "2023-11-09T18:27:45.971355495Z" + } + }, "outputs": [], "source": [ "# pay attention, actually we have very few data\n", @@ -137,7 +163,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:27:52.291018726Z", + "start_time": "2023-11-09T18:27:45.983111062Z" + } + }, "outputs": [], "source": [ "draw_distributions(original, target, original_weights)" @@ -153,7 +184,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:27:56.786090353Z", + "start_time": "2023-11-09T18:27:52.282166289Z" + } + }, "outputs": [], "source": [ "draw_distributions(original_train, target_train, original_weights_train)" @@ -169,7 +205,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:28:00.210019779Z", + "start_time": "2023-11-09T18:27:56.779869734Z" + } + }, "outputs": [], "source": [ "draw_distributions(original_test, target_test, original_weights_test)" @@ -200,7 +241,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:28:21.654914654Z", + "start_time": "2023-11-09T18:28:00.206358826Z" + } + }, "outputs": [], "source": [ "bins_reweighter = reweight.BinsReweighter(n_bins=20, n_neighs=1.)\n", @@ -229,7 +275,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:22.286971428Z", + "start_time": "2023-11-09T18:28:21.649791476Z" + } + }, "outputs": [], "source": [ "reweighter = reweight.GBReweighter(n_estimators=250, learning_rate=0.1, max_depth=3, min_samples_leaf=1000,\n", @@ -253,7 +304,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:22.445738489Z", + "start_time": "2023-11-09T18:33:22.284510361Z" + } + }, "outputs": [], "source": [ "def check_ks_of_expression(expression):\n", @@ -271,7 +327,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:22.447484398Z", + "start_time": "2023-11-09T18:33:22.327051319Z" + } + }, "outputs": [], "source": [ "check_ks_of_expression('hSPD')" @@ -280,7 +341,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:22.658006592Z", + "start_time": "2023-11-09T18:33:22.415016102Z" + } + }, "outputs": [], "source": [ "check_ks_of_expression('hSPD * pt_phi')" @@ -289,7 +355,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:22.903957662Z", + "start_time": "2023-11-09T18:33:22.656149470Z" + } + }, "outputs": [], "source": [ "check_ks_of_expression('hSPD * pt_phi * vchi2_b')" @@ -298,7 +369,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:23.155559842Z", + "start_time": "2023-11-09T18:33:22.899381940Z" + } + }, "outputs": [], "source": [ "check_ks_of_expression('pt_b * pt_phi / hSPD ')" @@ -307,7 +383,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:23.345847337Z", + "start_time": "2023-11-09T18:33:23.155057276Z" + } + }, "outputs": [], "source": [ "check_ks_of_expression('hSPD * pt_b * vchi2_b / pt_phi')" @@ -329,7 +410,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:42.202790691Z", + "start_time": "2023-11-09T18:33:23.345111007Z" + } + }, "outputs": [], "source": [ "from sklearn.ensemble import GradientBoostingClassifier\n", @@ -377,7 +463,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:43.726731060Z", + "start_time": "2023-11-09T18:33:42.207967134Z" + } + }, "outputs": [], "source": [ "plt.hist(weights['gb_weights'], bins=50)\n", @@ -388,7 +479,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:33:43.730253817Z", + "start_time": "2023-11-09T18:33:43.723035309Z" + } + }, "outputs": [], "source": [ "np.max(weights['gb_weights']), np.sum(weights['gb_weights'])" @@ -425,7 +521,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:38:48.740721947Z", + "start_time": "2023-11-09T18:33:43.728815796Z" + } + }, "outputs": [], "source": [ "# define base reweighter\n", @@ -454,7 +555,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:00.647769707Z", + "start_time": "2023-11-09T18:38:48.738767636Z" + } + }, "outputs": [], "source": [ "data = np.concatenate([original, target])\n", @@ -476,7 +582,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:40:01.473145845Z", + "start_time": "2023-11-09T18:40:00.647093226Z" + } + }, "outputs": [], "source": [ "plt.hist(weights['2-folding'], bins=50)\n", @@ -506,4 +617,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/advanced-python/50LikelihoodInference.ipynb b/advanced-python/50LikelihoodInference.ipynb index 669244a9..1f70ce61 100644 --- a/advanced-python/50LikelihoodInference.ipynb +++ b/advanced-python/50LikelihoodInference.ipynb @@ -41,7 +41,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.548448189Z", + "start_time": "2023-11-09T18:41:03.508890856Z" + } + }, "outputs": [], "source": [ "%store -r bkg_df\n", @@ -52,7 +57,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.555465994Z", + "start_time": "2023-11-09T18:41:04.545771470Z" + } + }, "outputs": [], "source": [ "import hepstats\n", @@ -65,7 +75,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.607549796Z", + "start_time": "2023-11-09T18:41:04.552563192Z" + } + }, "outputs": [], "source": [ "# apply cuts\n", @@ -88,7 +103,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.643172087Z", + "start_time": "2023-11-09T18:41:04.628270711Z" + } + }, "outputs": [], "source": [ "obs = zfit.Space('Jpsi_M', limits=(2.8, 3.5)) # defining the observable" @@ -97,7 +117,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.643396920Z", + "start_time": "2023-11-09T18:41:04.636406001Z" + } + }, "outputs": [], "source": [ "# bkg = zfit.Data.from_pandas(bkg_df['Jpsi_M'], obs=obs)\n", @@ -109,7 +134,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.866062850Z", + "start_time": "2023-11-09T18:41:04.643452582Z" + } + }, "outputs": [], "source": [ "mc = zfit.Data.from_pandas(mc_df['Jpsi_M'], obs=obs)\n", @@ -130,7 +160,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.959244148Z", + "start_time": "2023-11-09T18:41:04.910984590Z" + } + }, "outputs": [], "source": [ "lambd = zfit.Parameter('lambda', -0.1, -2, 2)\n", @@ -144,7 +179,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.959518485Z", + "start_time": "2023-11-09T18:41:04.959061502Z" + } + }, "outputs": [], "source": [ "bkg_pdf = zfit.pdf.Exponential(lambd, obs=obs)\n", @@ -154,7 +194,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.959692598Z", + "start_time": "2023-11-09T18:41:04.959203182Z" + } + }, "outputs": [], "source": [ "sig_pdf = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma)\n", @@ -164,7 +209,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.965883067Z", + "start_time": "2023-11-09T18:41:04.959353512Z" + } + }, "outputs": [], "source": [ "model = zfit.pdf.SumPDF([bkg_pdf, sig_pdf])" @@ -182,7 +232,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:04.966167348Z", + "start_time": "2023-11-09T18:41:04.959441956Z" + } + }, "outputs": [], "source": [ "def plot_fit(model, data, nbins=30, ax=None):\n", @@ -190,7 +245,6 @@ " if ax is None:\n", " ax = plt.gca()\n", "\n", - "\n", " lower, upper = data.space.limit1d\n", "\n", " # Creates and histogram of the data and plots it with mplhep.\n", @@ -219,7 +273,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:07.019191120Z", + "start_time": "2023-11-09T18:41:04.959591758Z" + } + }, "outputs": [], "source": [ "plot_fit(model, data) # before the fit" @@ -237,7 +296,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:07.303510860Z", + "start_time": "2023-11-09T18:41:07.018115082Z" + } + }, "outputs": [], "source": [ "sig_nll = zfit.loss.UnbinnedNLL(sig_pdf, mc)" @@ -253,7 +317,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:07.315026018Z", + "start_time": "2023-11-09T18:41:07.282948708Z" + } + }, "outputs": [], "source": [ "minimizer = zfit.minimize.Minuit()\n", @@ -264,7 +333,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:08.077766311Z", + "start_time": "2023-11-09T18:41:07.283066840Z" + } + }, "outputs": [], "source": [ "minimizer.minimize(sig_nll)" @@ -282,7 +356,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:08.081842858Z", + "start_time": "2023-11-09T18:41:08.067454337Z" + } + }, "outputs": [], "source": [ "sigma.floating = False" @@ -291,7 +370,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:08.677325245Z", + "start_time": "2023-11-09T18:41:08.071212416Z" + } + }, "outputs": [], "source": [ "nll = zfit.loss.ExtendedUnbinnedNLL(model, data)" @@ -300,7 +384,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:09.481473179Z", + "start_time": "2023-11-09T18:41:08.679126453Z" + } + }, "outputs": [], "source": [ "result = minimizer.minimize(nll)" @@ -309,7 +398,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:09.501631838Z", + "start_time": "2023-11-09T18:41:09.483882912Z" + } + }, "outputs": [], "source": [ "result" @@ -318,7 +412,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:10.998164125Z", + "start_time": "2023-11-09T18:41:09.499282781Z" + } + }, "outputs": [], "source": [ "result.hesse() # calculate hessian error\n", @@ -329,7 +428,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:11.291271535Z", + "start_time": "2023-11-09T18:41:10.991252129Z" + } + }, "outputs": [], "source": [ "plot_fit(model, data)" @@ -346,8 +450,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:11.471911896Z", + "start_time": "2023-11-09T18:41:11.293166815Z" } }, "outputs": [], @@ -377,8 +482,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:13.160970054Z", + "start_time": "2023-11-09T18:41:11.474037243Z" } }, "outputs": [], diff --git a/advanced-python/60sPlot.ipynb b/advanced-python/60sPlot.ipynb index da7adf6f..2b713810 100644 --- a/advanced-python/60sPlot.ipynb +++ b/advanced-python/60sPlot.ipynb @@ -15,7 +15,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:33.690188079Z", + "start_time": "2023-11-09T18:41:33.174962617Z" + } + }, "outputs": [], "source": [ "import mplhep\n", @@ -27,7 +32,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:33.690535258Z", + "start_time": "2023-11-09T18:41:33.175105562Z" + } + }, "outputs": [], "source": [ "size = 10000\n", @@ -50,7 +60,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:35.114704154Z", + "start_time": "2023-11-09T18:41:33.175205312Z" + } + }, "outputs": [], "source": [ "plt.hist(sig_data, color='b', alpha=0.5, bins=30, label='electron')\n", @@ -77,7 +92,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:35.169658981Z", + "start_time": "2023-11-09T18:41:35.114998403Z" + } + }, "outputs": [], "source": [ "n_sig1, n_bck1 = 8000, 2000\n", @@ -89,7 +109,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:35.640708376Z", + "start_time": "2023-11-09T18:41:35.159083078Z" + } + }, "outputs": [], "source": [ "plt.figure(figsize=[15, 6])\n", @@ -140,7 +165,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:35.761702659Z", + "start_time": "2023-11-09T18:41:35.641363489Z" + } + }, "outputs": [], "source": [ "def plot_with_weights(datas, weights, **kargs):\n", @@ -153,7 +183,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:35.974742303Z", + "start_time": "2023-11-09T18:41:35.682988823Z" + } + }, "outputs": [], "source": [ "plot_with_weights([first_bin, second_bin], [n_bck2, -n_bck1], density=True, label='reconstructed electron')\n", @@ -174,7 +209,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:36.278752546Z", + "start_time": "2023-11-09T18:41:35.973225726Z" + } + }, "outputs": [], "source": [ "plot_with_weights([first_bin, second_bin], [n_bck2, -n_bck1], density=True, label='reconstructed electons', edgecolor='none')\n", @@ -194,7 +234,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:36.370668105Z", + "start_time": "2023-11-09T18:41:36.281084677Z" + } + }, "outputs": [], "source": [ "plt.bar([0, 2, 4], [3, 2, 1], width=1, color='b', alpha=0.5)\n", @@ -237,7 +282,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:36.463010940Z", + "start_time": "2023-11-09T18:41:36.367032133Z" + } + }, "outputs": [], "source": [ "mu = zfit.Parameter('mu', 5279, 5100, 5400)\n", @@ -249,7 +299,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:36.474956577Z", + "start_time": "2023-11-09T18:41:36.420048637Z" + } + }, "outputs": [], "source": [ "obs = zfit.Space('mass', (5000, 6000))\n", @@ -273,8 +328,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:39.707390072Z", + "start_time": "2023-11-09T18:41:36.467019213Z" } }, "outputs": [], @@ -321,8 +377,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:41.699026635Z", + "start_time": "2023-11-09T18:41:39.708082190Z" } }, "outputs": [], @@ -343,7 +400,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:41.701583440Z", + "start_time": "2023-11-09T18:41:41.690949558Z" + } + }, "outputs": [], "source": [ "def plot_fit_projection(model, data, nbins=30, ax=None):\n", @@ -379,8 +441,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:42.049462266Z", + "start_time": "2023-11-09T18:41:41.695055246Z" } }, "outputs": [], @@ -416,10 +479,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, - "scrolled": true + "scrolled": true, + "ExecuteTime": { + "end_time": "2023-11-09T18:41:42.306789925Z", + "start_time": "2023-11-09T18:41:42.047242018Z" + } }, "outputs": [], "source": [ @@ -434,8 +498,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:42.308477707Z", + "start_time": "2023-11-09T18:41:42.255058309Z" } }, "outputs": [], @@ -455,8 +520,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:43.917695568Z", + "start_time": "2023-11-09T18:41:42.302977717Z" } }, "outputs": [], @@ -491,8 +557,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:43.935640680Z", + "start_time": "2023-11-09T18:41:43.914957342Z" } }, "outputs": [], @@ -511,8 +578,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2023-11-09T18:41:44.217521866Z", + "start_time": "2023-11-09T18:41:43.919368369Z" } }, "outputs": [], @@ -536,7 +604,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:44.222398796Z", + "start_time": "2023-11-09T18:41:44.214787904Z" + } + }, "outputs": [], "source": [ "from scipy.stats import expon, norm" @@ -545,7 +618,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:44.615692777Z", + "start_time": "2023-11-09T18:41:44.221274727Z" + } + }, "outputs": [], "source": [ "plt.figure(figsize=[15, 6])\n", @@ -584,7 +662,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:44.950482870Z", + "start_time": "2023-11-09T18:41:44.614171146Z" + } + }, "outputs": [], "source": [ "plt.figure(figsize=[15, 6])\n", @@ -618,7 +701,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:45.182653107Z", + "start_time": "2023-11-09T18:41:44.945907133Z" + } + }, "outputs": [], "source": [ "x = np.linspace(0, 10)\n", @@ -643,7 +731,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:45.200613740Z", + "start_time": "2023-11-09T18:41:45.185498912Z" + } + }, "outputs": [], "source": [ "import pandas\n", @@ -655,7 +748,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:45.535033034Z", + "start_time": "2023-11-09T18:41:45.195431060Z" + } + }, "outputs": [], "source": [ "plt.plot(mass, probs.sig, label='sig probability')\n", @@ -675,7 +773,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:45.636452372Z", + "start_time": "2023-11-09T18:41:45.536975210Z" + } + }, "outputs": [], "source": [ "from hep_ml import splot\n", @@ -693,7 +796,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:46.023070838Z", + "start_time": "2023-11-09T18:41:45.649725765Z" + } + }, "outputs": [], "source": [ "plt.plot(mass, sWeights.sig, label='sig sWeight')\n", @@ -713,7 +821,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:46.636288872Z", + "start_time": "2023-11-09T18:41:46.023629125Z" + } + }, "outputs": [], "source": [ "plt.figure(figsize=[15, 7])\n", @@ -744,7 +857,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:46.670488310Z", + "start_time": "2023-11-09T18:41:46.636425810Z" + } + }, "outputs": [], "source": [ "np.corrcoef(abs(mass - 4), p) [0, 1]" @@ -760,7 +878,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:46.808545533Z", + "start_time": "2023-11-09T18:41:46.644686446Z" + } + }, "outputs": [], "source": [ "print(np.corrcoef(abs(sig_mass - 4), sig_p)[0, 1])\n", @@ -777,7 +900,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:47.029394075Z", + "start_time": "2023-11-09T18:41:46.669469133Z" + } + }, "outputs": [], "source": [ "hist_conf = dict(bins=30, alpha=0.5, range=[-1, 7])\n", @@ -933,4 +1061,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/advanced-python/70ScikitHEPUniverse.ipynb b/advanced-python/70ScikitHEPUniverse.ipynb index 4c515899..d311cbf3 100644 --- a/advanced-python/70ScikitHEPUniverse.ipynb +++ b/advanced-python/70ScikitHEPUniverse.ipynb @@ -28,7 +28,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.509856814Z", + "start_time": "2023-11-09T18:41:36.790957906Z" + } + }, "outputs": [], "source": [ "import formulate\n", @@ -40,7 +45,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.519217132Z", + "start_time": "2023-11-09T18:41:37.505747578Z" + } + }, "outputs": [], "source": [ "momentum.to_numexpr() # as used in Pandas eval/query" @@ -49,7 +59,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.569995559Z", + "start_time": "2023-11-09T18:41:37.510628475Z" + } + }, "outputs": [], "source": [ "momentum.to_root() # as used in ROOT" @@ -67,7 +82,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.733545504Z", + "start_time": "2023-11-09T18:41:37.522736067Z" + } + }, "outputs": [], "source": [ "# Particle\n", @@ -87,7 +107,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.779734080Z", + "start_time": "2023-11-09T18:41:37.779040900Z" + } + }, "outputs": [], "source": [ "piplus.mass" @@ -96,7 +121,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.780222164Z", + "start_time": "2023-11-09T18:41:37.779268397Z" + } + }, "outputs": [], "source": [ "piplus.charge" @@ -105,7 +135,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.785865315Z", + "start_time": "2023-11-09T18:41:37.779388976Z" + } + }, "outputs": [], "source": [ "piplus.width" @@ -114,7 +149,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.786318570Z", + "start_time": "2023-11-09T18:41:37.779605715Z" + } + }, "outputs": [], "source": [ "piplus.name" @@ -131,7 +171,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.903977630Z", + "start_time": "2023-11-09T18:41:37.779707095Z" + } + }, "outputs": [], "source": [ "Particle.findall(lambda p: p.pdgid.has_bottom and p.charge==0)" @@ -152,7 +197,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.905992034Z", + "start_time": "2023-11-09T18:41:37.835409659Z" + } + }, "outputs": [], "source": [ "from hepunits.constants import c_light\n", @@ -163,7 +213,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.907634350Z", + "start_time": "2023-11-09T18:41:37.841358263Z" + } + }, "outputs": [], "source": [ "import hepunits as u # u for units" @@ -172,7 +227,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:37.910230835Z", + "start_time": "2023-11-09T18:41:37.883077241Z" + } + }, "outputs": [], "source": [ "150 * u.MeV + 1.1 * u.GeV # result in MeV" @@ -195,7 +255,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:38.058044969Z", + "start_time": "2023-11-09T18:41:37.883233940Z" + } + }, "outputs": [], "source": [ "import vector" @@ -207,6 +272,10 @@ "metadata": { "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2023-11-09T18:41:38.087743681Z", + "start_time": "2023-11-09T18:41:38.000640796Z" } }, "outputs": [], @@ -232,7 +301,7 @@ " \"phi\": [2.1, 2.2, 2.3, 2.4, 2.5],\n", " \"eta\": [3.1, 3.2, 3.3, 3.4, 3.5],\n", " \"M\": [4.1, 4.2, 4.3, 4.4, 4.5],\n", - "})\n" + "})" ] }, { @@ -247,7 +316,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:38.088074302Z", + "start_time": "2023-11-09T18:41:38.047094192Z" + } + }, "outputs": [], "source": [ "vector.obj(x=1, y=2, z=3).theta\n", @@ -257,7 +331,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:38.088361257Z", + "start_time": "2023-11-09T18:41:38.047282206Z" + } + }, "outputs": [], "source": [ "vector.obj(x=3, y=4, z=-2, t=10) # Cartesian 4D vector" @@ -266,7 +345,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-09T18:41:38.088472813Z", + "start_time": "2023-11-09T18:41:38.047400199Z" + } + }, "outputs": [], "source": [] } @@ -292,4 +376,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/conf.py b/conf.py index 2e15c5fe..5c5caef3 100644 --- a/conf.py +++ b/conf.py @@ -21,6 +21,7 @@ 'conf_py_path': '/', } + html_static_path += [ f'_static', ] @@ -42,5 +43,10 @@ def hsf_ci_setup(app): nbsphinx_execute = 'always' nbsphinx_timeout = 60*20 -# FIXME: This should be removed -# nbsphinx_execute = 'never' + +_PLAUSIBLE_SNIPPET = '' + +old_setup = setup # imported from conf.py +def setup(app): + app.add_js_file(None, body=_PLAUSIBLE_SNIPPET) + old_setup(app) diff --git a/environment.yml b/environment.yml index 627c4808..0e4a96ef 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - mplhep - nb_conda - nb_conda_kernels + - notebook<7.0.0 # fixes failed nb_conda install https://github.com/DeepLabCut/DeepLabCut/issues/2322 - numpy - pandas - particle @@ -22,12 +23,11 @@ dependencies: - uproot3 - vector - wget - - xgboost +# - xgboost + - zfit >=0.14.0 + - hepstats - pip: - git+https://github.com/hsf-training/python-lesson.git - - tensorflow - - zfit >=0.8,<0.10 - - keras <2.7 # temporary fix for https://github.com/keras-team/keras/issues/15579 - - hepstats - formulate - starterkit-ci + - xgboost diff --git a/python/classes.ipynb b/python/classes.ipynb index eb864a0a..4fe59ea0 100644 --- a/python/classes.ipynb +++ b/python/classes.ipynb @@ -42,6 +42,7 @@ "pi1_pz = 30\n", "pi1_E = 100\n", "\n", + "\n", "def calc_mass_simple(px, py, pz, E):\n", " return np.sqrt(E ** 2 - (px ** 2 + py ** 2 + pz ** 2))" ] @@ -176,6 +177,7 @@ " 'E': None,\n", " 'mass': calc_mass}\n", "\n", + "\n", "def initialize_particle(particle, px, py, pz, E):\n", " particle['px'] = px\n", " particle['py'] = py\n", @@ -183,6 +185,7 @@ " particle['E'] = E\n", " return particle\n", "\n", + "\n", "particle1 = initialize_particle(make_particle(), px=20, py=30, pz=20, E=50)" ] }, diff --git a/python/further_reading.md b/python/further_reading.md index 5de7032a..0b3233c6 100644 --- a/python/further_reading.md +++ b/python/further_reading.md @@ -4,8 +4,11 @@ * argsparse, datetime, fnmatch, glob, os, re, sys, subprocess ## Nice libraries for data analysis -* numpy, pandas, matplotlib +* [NumPy](https://numpy.org/) +* [pandas](https://pandas.pydata.org/docs/) +* [matplotlib](https://matplotlib.org/) ## Python and ROOT -* pyROOT -* root_numpy +* pyROOT: Python interface for ROOT +* [uproot](https://uproot.readthedocs.io/en/latest/index.html): A library for reading data from root files into Python NumPy arrays, Awkward arrays and Pandas dataframes +* root_numpy (deprecated)