diff --git a/.gitignore b/.gitignore index c4e70e44..a9273629 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,7 @@ -create_binaries/*.app -create_binaries/build/* -create_binaries/dist/* -tests/data/* -debug_tests/*.txt +_old/create_binaries/*.app +_old/create_binaries/build/* +_old/create_binaries/dist/* *.hdf5 *.pyc @@ -32,3 +30,5 @@ tierpsy/analysis/ske_create/segWormPython/cython_files/cleanWorm_cython.c tierpsy/analysis/stage_aligment/get_mask_diff_var.c tierpsy/analysis/ske_create/segWormPython/cython_files/curvspace.c tierpsy/analysis/ske_create/segWormPython/cython_files/circCurvature.c + +tierpsy/tests/data/* diff --git a/README.md b/README.md index e4a7d680..c6ae1784 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ # Tierpsy Tracker -### *Double-click executables are available for Windows (7 or latest) and OSX (Yosemite or latest) can be found in the [releases page](https://github.com/ver228/tierpsy-tracker/releases).* - Tierpsy Tracker is a multi-animal tracker developed in the [MRC-LMS](http://lms.mrc.ac.uk/) [Behavioural Genomics Group](http://behave.csc.mrc.ac.uk/) at Imperial College London. The project combines the throughput of multiworm tracking with the resolution of single worm tracking, which means you can extract detailed phenotypic fingerprints from more animals. A large field of view also makes it possible to analyse collective behaviours that depend on animal interactions. The code is open source and extensible and we have ongoing projects using the tracker to analyse fish, *Drosophila* larvae, and fluorescently labelled worms. +The easiest way to install Tierpsy Tracker is to use the conda-forge as: +``` +conda install tierpsy -c conda-forge +``` +For more details see the [installation](docs/INSTALLATION.md) instructions section. + ## [Installation Instructions](docs/INSTALLATION.md) ## [How to Use](docs/HOWTO.md) ## [Algorithm Explanation](docs/EXPLANATION.md) diff --git a/create_binaries/ProcessWorker.spec b/_old/create_binaries/ProcessWorker.spec similarity index 100% rename from create_binaries/ProcessWorker.spec rename to _old/create_binaries/ProcessWorker.spec diff --git a/create_binaries/TierpsyTracker.spec b/_old/create_binaries/TierpsyTracker.spec similarity index 100% rename from create_binaries/TierpsyTracker.spec rename to _old/create_binaries/TierpsyTracker.spec diff --git a/create_binaries/_test/MWConsole_test.spec b/_old/create_binaries/_test/MWConsole_test.spec similarity index 100% rename from create_binaries/_test/MWConsole_test.spec rename to _old/create_binaries/_test/MWConsole_test.spec diff --git a/create_binaries/_test/test.avi b/_old/create_binaries/_test/test.avi similarity index 100% rename from create_binaries/_test/test.avi rename to _old/create_binaries/_test/test.avi diff --git a/create_binaries/_test/test.bat b/_old/create_binaries/_test/test.bat similarity index 100% rename from create_binaries/_test/test.bat rename to _old/create_binaries/_test/test.bat diff --git a/create_binaries/_test/test.h5 b/_old/create_binaries/_test/test.h5 similarity index 100% rename from create_binaries/_test/test.h5 rename to _old/create_binaries/_test/test.h5 diff --git a/create_binaries/_test/test.sh b/_old/create_binaries/_test/test.sh similarity index 100% rename from create_binaries/_test/test.sh rename to _old/create_binaries/_test/test.sh diff --git a/create_binaries/_test/test_pyinstaller.py b/_old/create_binaries/_test/test_pyinstaller.py similarity index 100% rename from create_binaries/_test/test_pyinstaller.py rename to _old/create_binaries/_test/test_pyinstaller.py diff --git a/create_binaries/_test/test_pyinstaller.spec b/_old/create_binaries/_test/test_pyinstaller.spec similarity index 100% rename from create_binaries/_test/test_pyinstaller.spec rename to _old/create_binaries/_test/test_pyinstaller.spec diff --git a/create_binaries/_test/test_pyinstaller_reader.py b/_old/create_binaries/_test/test_pyinstaller_reader.py similarity index 100% rename from create_binaries/_test/test_pyinstaller_reader.py rename to _old/create_binaries/_test/test_pyinstaller_reader.py diff --git a/create_binaries/_test/test_pyinstaller_reader.spec b/_old/create_binaries/_test/test_pyinstaller_reader.spec similarity index 100% rename from create_binaries/_test/test_pyinstaller_reader.spec rename to _old/create_binaries/_test/test_pyinstaller_reader.spec diff --git a/create_binaries/_test/test_reader.spec b/_old/create_binaries/_test/test_reader.spec similarity index 100% rename from create_binaries/_test/test_reader.spec rename to _old/create_binaries/_test/test_reader.spec diff --git a/create_binaries/_test/test_reader_dir.bat b/_old/create_binaries/_test/test_reader_dir.bat similarity index 100% rename from create_binaries/_test/test_reader_dir.bat rename to _old/create_binaries/_test/test_reader_dir.bat diff --git a/create_binaries/_test/test_reader_file.bat b/_old/create_binaries/_test/test_reader_file.bat similarity index 100% rename from create_binaries/_test/test_reader_file.bat rename to _old/create_binaries/_test/test_reader_file.bat diff --git a/create_binaries/_test/test_reader_spec.bat b/_old/create_binaries/_test/test_reader_spec.bat similarity index 100% rename from create_binaries/_test/test_reader_spec.bat rename to _old/create_binaries/_test/test_reader_spec.bat diff --git a/create_binaries/create_binaries.bat b/_old/create_binaries/create_binaries.bat similarity index 100% rename from create_binaries/create_binaries.bat rename to _old/create_binaries/create_binaries.bat diff --git a/create_binaries/create_binaries.sh b/_old/create_binaries/create_binaries.sh similarity index 100% rename from create_binaries/create_binaries.sh rename to _old/create_binaries/create_binaries.sh diff --git a/create_binaries/issues.txt b/_old/create_binaries/issues.txt similarity index 100% rename from create_binaries/issues.txt rename to _old/create_binaries/issues.txt diff --git a/installation/installation_script.bat b/_old/installation/installation_script.bat similarity index 100% rename from installation/installation_script.bat rename to _old/installation/installation_script.bat diff --git a/installation/installation_script.sh b/_old/installation/installation_script.sh similarity index 100% rename from installation/installation_script.sh rename to _old/installation/installation_script.sh diff --git a/installation/installation_test.py b/_old/installation/installation_test.py similarity index 100% rename from installation/installation_test.py rename to _old/installation/installation_test.py diff --git a/_old/scripts/tierpsy_gui.py b/_old/scripts/tierpsy_gui.py new file mode 100755 index 00000000..865bcb69 --- /dev/null +++ b/_old/scripts/tierpsy_gui.py @@ -0,0 +1,5 @@ +from tierpsy.gui.SelectApp import tierpsy_gui +import sys + +if __name__ == '__main__': + sys.exit(tierpsy_gui()) diff --git a/_old/scripts/tierpsy_gui_simple.py b/_old/scripts/tierpsy_gui_simple.py new file mode 100755 index 00000000..5dd2da11 --- /dev/null +++ b/_old/scripts/tierpsy_gui_simple.py @@ -0,0 +1,6 @@ +import sys + +from tierpsy.gui.HDF5VideoPlayer import tierpsy_gui_simple + +if __name__ == '__main__': + sys.exit(tierpsy_gui_simple()) \ No newline at end of file diff --git a/_old/scripts/tierpsy_process.py b/_old/scripts/tierpsy_process.py new file mode 100644 index 00000000..e6fc61db --- /dev/null +++ b/_old/scripts/tierpsy_process.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Jun 9 15:12:48 2015 + +@author: ajaver +""" +from tierpsy.processing.processMultipleFilesFun import tierpsy_process + +if __name__ == '__main__': + tierpsy_process() diff --git a/_old/scripts/tierpsy_tests.py b/_old/scripts/tierpsy_tests.py new file mode 100644 index 00000000..568724d0 --- /dev/null +++ b/_old/scripts/tierpsy_tests.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Jun 9 15:12:48 2015 + +@author: ajaver +""" +from tierpsy.tests.run_tests import tierpsy_tests + +if __name__ == '__main__': + tierpsy_tests() diff --git a/cmd_scripts/HDF5VideoPlayer.py b/cmd_scripts/HDF5VideoPlayer.py deleted file mode 100755 index 26b7c57a..00000000 --- a/cmd_scripts/HDF5VideoPlayer.py +++ /dev/null @@ -1,14 +0,0 @@ -import os -import sys - - -from tierpsy.gui.HDF5VideoPlayer import HDF5VideoPlayerGUI -from PyQt5.QtWidgets import QApplication - -if __name__ == '__main__': - app = QApplication(sys.argv) - - ui = HDF5VideoPlayerGUI() - ui.show() - - sys.exit(app.exec_()) diff --git a/cmd_scripts/TierpsyTrackerConsole.py b/cmd_scripts/TierpsyTrackerConsole.py deleted file mode 100755 index 97900238..00000000 --- a/cmd_scripts/TierpsyTrackerConsole.py +++ /dev/null @@ -1,13 +0,0 @@ -import os -import sys - -from tierpsy.gui.SelectApp import SelectApp -from PyQt5.QtWidgets import QApplication - -if __name__ == '__main__': - app = QApplication(sys.argv) - - ui = SelectApp() - ui.show() - - sys.exit(app.exec_()) diff --git a/cmd_scripts/processMultipleFiles.py b/cmd_scripts/processMultipleFiles.py deleted file mode 100644 index 31b3aac2..00000000 --- a/cmd_scripts/processMultipleFiles.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Jun 9 15:12:48 2015 - -@author: ajaver -""" -from tierpsy.processing.processMultipleFilesFun import processMultipleFilesFun, ProcessMultipleFilesParser - -if __name__ == '__main__': - args = ProcessMultipleFilesParser().parse_args() - processMultipleFilesFun(**vars(args)) diff --git a/docs/HOWTO.md b/docs/HOWTO.md index e245fcc4..2fd48490 100755 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -18,22 +18,9 @@ The processing times for in MacBook Pro (15-inch, 2017) were 04:31 minutes for t ## Getting Started -If you are using any of the [binary executables](https://github.com/ver228/tierpsy-tracker/releases) you only have to double click the file and the program should start after a few seconds. - -If you [installed tierpsy tracker from the source](https://github.com/ver228/tierpsy-tracker/blob/master/docs/INSTALLATION.md) in OSX or Windows there should be a clickable executable named `TierpsyTracker` on the Desktop. If the executable is missing you can re-create it by running: - -```bash -#(OSX/Linux) -installation/installation_script.sh --link_desktop - -#(Windows) -installation/installation_script.bat --link_desktop -``` - -Alternatively open a terminal, go to the directory where Tierpsy Tracker is installed and type: - +Follow the installation [instuctions](INSTALLATION.md) and open a terminal or an Anaconda prompt (Windows) and type: ```bash -python3 cmd_scripts/TierpsyTrackerConsole.py +tierpsy_gui ``` The main widget should look like the one below: diff --git a/docs/INSTALLATION.md b/docs/INSTALLATION.md index 7edac4db..8829ce1d 100644 --- a/docs/INSTALLATION.md +++ b/docs/INSTALLATION.md @@ -1,63 +1,52 @@ -*The instructions below are to install Tierpsy Tracker from the source code. I would recommend to do this only if you are using Linux or want to run the development version, otherwise use the double-click executables available for Windows (7 or latest) and OSX (Yosemite or latest) in the [releases page](https://github.com/ver228/tierpsy-tracker/releases).* +# Installation Instructions -# System Requirements -- Python 3.6 (I would recommend to use [miniconda](https://conda.io/miniconda.html)). -- Optional [ffmpeg](https://ffmpeg.org/download.html): ffprobe must be accessible from the command line to calculate the video timestamps. -- [C compiler compatible with cython](http://cython.readthedocs.io/en/latest/src/quickstart/install.html). In Windows, you can use [Visual C++ 2015 Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/). In OSX, if you install [homebrew](https://brew.sh/) it will setup the C compiler without the need to download XCode from the appstore. -- [Git](https://git-scm.com/). [Here](https://gist.github.com/derhuerst/1b15ff4652a867391f03) are some instructions to install it. +## Installation from precompiled packages +- Download python 3.6>= using [anaconda](https://www.anaconda.com/download/) or [miniconda](https://conda.io/miniconda.html) if you prefer a lighter installation. +- Open a Terminal in OSX or Linux. In Windows you need to open the Anaconda Prompt. +- [Optional] I would recommend to create and activate an [enviroment](https://conda.io/docs/user-guide/tasks/manage-environments.html) as: -# Installation - -1. Install dependencies: - -Using pip: ```bash -pip install numpy matplotlib tables pandas pyqt5 h5py scipy scikit-learn scikit-image seaborn xlrd cython statsmodels numba keras==2.1.5 tensorflow opencv-python-headless -``` +#Windows +conda create -n tierpsy +conda activate tierpsy -Using anaconda: -```bash -conda install --channel conda-forge numpy matplotlib pytables pandas gitpython pyqt h5py \ -scipy scikit-learn scikit-image seaborn xlrd cython statsmodels numba keras=2.1.5 opencv tensorflow +#OSX or Linux +source create -n tierpsy +source activate tierpsy ``` - -2. Clone this repository either using the [Github Desktop](https://desktop.github.com/) or from the command line as: - +- Finally, donwload the package from conda-forge ```bash -git clone https://github.com/ver228/tierpsy-tracker +conda install tierpsy -c conda-forge ``` +- After you can start tierpsy tracker by typing: +```bash +tierpsy_gui +``` +On OSX the first time tierpsy is intialized it will create a file in the Desktop called tierpsy_gui.command. By double-cliking on this file tierpsy can be started without having to open a terminal. +Do not forget to activate the enviroment every time you start a new session. -3. Install the rest of the modules. Firts move to the tierpsy-tracker root folder (the folder with the cloned repository) as: -```bash -cd tierpsy-tracker -``` -Then install the rest of the modules using the following command according to your operative system. +Note: it seems that there might be some problems with the opencv version available through `conda`. If you have problems reading video files or encounter error related with `import cv2`, then you can try to install opencv using pip as: ```bash -bash installation/installation_script.sh #OSX or Linux - -installation/installation_script.bat #Windows +pip install opencv-python-headless ``` -## Possible Issues - - -- The most common problem in the installation is OpenCV (error in import cv2). Try a fresh miniconda installation (or a fresh enviroment) and make sure your are using the [conda-forge](https://conda-forge.org/) packages. It this does not work I am afraid you would have to solve the problem by yourself (Google is your friend). - -- You do not need to install manually the [Open Worm Analysis Toolbox](https://github.com/openworm/open-worm-analysis-toolbox). However if you do (and I do not recommend it), be aware that there is a bug with the pip installer: it is missing some dependencies and it will create a corrupt [.egg](https://stackoverflow.com/questions/2051192/what-is-a-python-egg) in your packages folder. Manually delete the .egg (use the error traceback to find the its location) and re-run `installation_script.sh`. The script will download the Open Worm Analysis Toolbox repository and install it using `python setup.py develop`. - - -# Test Data -On Mac OSX or Linux, some test data can be downloaded by running +## Installation from source +- Download python 3.6>= using [anaconda](https://www.anaconda.com/download/)) or [miniconda](https://conda.io/miniconda.html)). +- Install [git](https://git-scm.com/). [Here](https://gist.github.com/derhuerst/1b15ff4652a867391f03) are some instructions to install it. +- Install a [C compiler compatible with cython](http://cython.readthedocs.io/en/latest/src/quickstart/install.html). In Windows, you can use [Visual C++ 2015 Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/). In OSX, if you install [homebrew](https://brew.sh/) it will setup the C compiler without the need to download XCode from the appstore. +- Open a Terminal or Anaconda prompt and type: ```bash -installation/installation_script.sh --download_examples +git clone https://github.com/ver228/tierpsy-tracker +cd tracker +source create -n tierpsy #[optional] +conda install --file requirements.txt +pip install -e . ``` -The tests can also be manually downloaded using [this link](https://imperiallondon-my.sharepoint.com/personal/ajaver_ic_ac_uk/_layouts/15/guestaccess.aspx?guestaccesstoken=ldZ18fLY%2bzlu7XuO9mbKVdyiKoH4naiesqiLXWU4vGQ%3d&docid=0cec4e52f4ccf4d5b8bb3a737020fc12f&rev=1). Uncompress the data and save it in the main repository folder `tests/data` . - -You can then run the tests by running: +# Batch processing from the command line. +The script `TierpsyTrackerConsole.py` was deprecated in favor of using the command `tierpsy_process`. Type `tierpsy_process -h` for help. -```bash -python tests/run_tests.py -``` +# Tests +After installing you can run the testing scripts using the command `tierpsy_tests` on the terminal. Type `tierpsy_tests -h` for help. I would recommed to run one test at the time since there is not currently a way to summarize the results of all the tests. diff --git a/recipe/bld.bat b/recipe/bld.bat new file mode 100644 index 00000000..cea21ed9 --- /dev/null +++ b/recipe/bld.bat @@ -0,0 +1,2 @@ +"%PYTHON%" setup.py install --single-version-externally-managed --record=record.txt +if errorlevel 1 exit 1 \ No newline at end of file diff --git a/recipe/build.sh b/recipe/build.sh new file mode 100644 index 00000000..9493ff12 --- /dev/null +++ b/recipe/build.sh @@ -0,0 +1,2 @@ +#!/bin/bash +$PYTHON setup.py install --single-version-externally-managed --record=record.txt # Python command to install the script. \ No newline at end of file diff --git a/recipe/meta.yaml b/recipe/meta.yaml new file mode 100644 index 00000000..5be24d10 --- /dev/null +++ b/recipe/meta.yaml @@ -0,0 +1,56 @@ +package: + name: tierpsy + version: "1.5.1-beta" + +source: + path: .. + #git_rev: "v1.5.0" + #git_url: "https://github.com/ver228/tierpsy-tracker" + +build: + osx_is_app: True + +requirements: + build: + - python + - numpy >=1.14 + - setuptools + - cython + + run: + - python + - keras=2.1.5 + - opencv + - tensorflow + - numpy >=1.14 + - matplotlib + - pytables + - pandas + - pyqt + - scipy + - scikit-learn + - scikit-image + - statsmodels + - numba + - tqdm + - requests + +app: + entry: tierpsy_gui + summary: Tierpsy Tracker Multi-Worm Tracker + type: desk + +test: + imports: + - tierpsy + - tierpsy.features.tierpsy_features + - tierpsy.features.open_worm_analysis_toolbox + commands: + - tierpsy_process -h + - tierpsy_tests -h + +about: + home: https://github.com/ver228/tierpsy-tracker + license: MIT + license_file: LICENSE + summary: Tierpsy Tracker Multi-Worm Tracker \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..10a9c967 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +cython +keras=2.1.5 +opencv +tensorflow +numpy >=1.14 +matplotlib +pytables +pandas +pyqt +scipy +scikit-learn +scikit-image +statsmodels +numba +tqdm +requests \ No newline at end of file diff --git a/setup.py b/setup.py index 41482c4d..9ad01f0b 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from setuptools import setup +from setuptools import setup, find_packages from distutils.extension import Extension from Cython.Build import cythonize from Cython.Distutils import build_ext @@ -43,18 +43,36 @@ def _get_mod_path(name): for name, files in ext_files.items()] return ext_modules +PKG_DATA = [ + 'extras/*', + 'extras/param_files/*', + 'features/tierpsy_features/extras/*', + 'features/open_worm_analysis_toolbox/features/master_eigen_worms_N2.mat', + 'features/open_worm_analysis_toolbox/features/feature_metadata/features_list.csv' +] #install setup -setup(name=MODULE_NAME, - version=VERSION, - description=DESCRIPTION, - author=AUTHOR, - author_email=AUTHOR_EMAIL, - url=URL, - packages=['tierpsy'], - cmdclass={'build_ext': build_ext}, - ext_modules=_get_ext_modules(), - include_dirs=[numpy.get_include()], - include_package_data=True +setup(name = MODULE_NAME, + version = VERSION, + description = DESCRIPTION, + author = AUTHOR, + author_email = AUTHOR_EMAIL, + url = URL, + packages = find_packages(), + cmdclass = {'build_ext': build_ext}, + ext_modules = _get_ext_modules(), + include_dirs = [numpy.get_include()], + package_data = {'tierpsy': PKG_DATA}, + entry_points= { + 'gui_scripts': [ + 'tierpsy_gui = tierpsy.gui.SelectApp:tierpsy_gui', + 'tierpsy_gui_simple = tierpsy.gui.HDF5VideoPlayer:tierpsy_gui_simple' + ], + 'console_scripts': [ + 'tierpsy_process = tierpsy.processing.processMultipleFilesFun:tierpsy_process', + 'tierpsy_tests = tierpsy.tests.run_tests:tierpsy_tests' + ] + } ) + \ No newline at end of file diff --git a/tierpsy/__init__.py b/tierpsy/__init__.py index 1d976c6a..eda89f87 100755 --- a/tierpsy/__init__.py +++ b/tierpsy/__init__.py @@ -9,6 +9,12 @@ import warnings from .version import __version__ +#this is an anaconda program that should not cause problems. https://github.com/ContinuumIO/anaconda-issues/issues/6678 +warnings.filterwarnings('ignore', message='numpy.dtype size changed, may indicate binary incompatibility.*') +def warning_on_one_line(message, category, filename, lineno, file=None, line=''): + return '{}:{}: {}:{}\n'.format(filename, lineno, category.__name__, message) +warnings.formatwarning = warning_on_one_line + #I want to be sure tierpsy loads tensorflow flow backend os.environ['KERAS_BACKEND']='tensorflow' diff --git a/tierpsy/analysis/compress/processVideo.py b/tierpsy/analysis/compress/processVideo.py index 9ed7be12..d275550f 100644 --- a/tierpsy/analysis/compress/processVideo.py +++ b/tierpsy/analysis/compress/processVideo.py @@ -7,12 +7,10 @@ import json import os - -import h5py import tables from tierpsy.analysis.compress.compressVideo import compressVideo, initMasksGroups -from tierpsy.analysis.compress.selectVideoReader import selectVideoReader +from tierpsy.analysis.compress.selectVideoReader import selectVideoReader from tierpsy.helper.misc import TimeCounter, print_flush #default parameters if wormencoder.ini does not exist diff --git a/tierpsy/analysis/feat_create/obtainFeatures.py b/tierpsy/analysis/feat_create/obtainFeatures.py index 6e6ecf7d..3ee0e109 100755 --- a/tierpsy/analysis/feat_create/obtainFeatures.py +++ b/tierpsy/analysis/feat_create/obtainFeatures.py @@ -9,7 +9,7 @@ from tierpsy.analysis.feat_create.obtainFeaturesHelper import WormStats, WormFromTable from tierpsy.helper.params import copy_unit_conversions, read_fps, min_num_skel_defaults -import open_worm_analysis_toolbox as mv +import tierpsy.features.open_worm_analysis_toolbox as mv import os diff --git a/tierpsy/analysis/feat_create/obtainFeaturesHelper.py b/tierpsy/analysis/feat_create/obtainFeaturesHelper.py index 08be85c9..98c7bad2 100755 --- a/tierpsy/analysis/feat_create/obtainFeaturesHelper.py +++ b/tierpsy/analysis/feat_create/obtainFeaturesHelper.py @@ -8,7 +8,7 @@ from tierpsy.analysis.stage_aligment.alignStageMotion import isGoodStageAligment, _h_get_stage_inv from tierpsy.helper.params import read_fps, read_microns_per_pixel, read_ventral_side from tierpsy import AUX_FILES_DIR -import open_worm_analysis_toolbox as mv +import tierpsy.features.open_worm_analysis_toolbox as mv import copy diff --git a/tierpsy/analysis/feat_init/smooth_skeletons_table.py b/tierpsy/analysis/feat_init/smooth_skeletons_table.py index 0ed59650..a22e5784 100644 --- a/tierpsy/analysis/feat_init/smooth_skeletons_table.py +++ b/tierpsy/analysis/feat_init/smooth_skeletons_table.py @@ -11,8 +11,8 @@ import warnings from scipy.interpolate import interp1d -from tierpsy_features import SmoothedWorm -from tierpsy_features.food import _h_smooth_cnt +from tierpsy.features.tierpsy_features import SmoothedWorm +from tierpsy.features.tierpsy_features.food import _h_smooth_cnt from tierpsy.analysis.feat_create.obtainFeaturesHelper import WormFromTable from tierpsy.analysis.stage_aligment.alignStageMotion import _h_get_stage_inv diff --git a/tierpsy/analysis/feat_tierpsy/get_tierpsy_features.py b/tierpsy/analysis/feat_tierpsy/get_tierpsy_features.py index c48539f6..81384355 100644 --- a/tierpsy/analysis/feat_tierpsy/get_tierpsy_features.py +++ b/tierpsy/analysis/feat_tierpsy/get_tierpsy_features.py @@ -9,8 +9,8 @@ import pandas as pd import tables -from tierpsy_features import get_timeseries_features, timeseries_all_columns -from tierpsy_features.summary_stats import get_summary_stats +from tierpsy.features.tierpsy_features import get_timeseries_features, timeseries_all_columns +from tierpsy.features.tierpsy_features.summary_stats import get_summary_stats from tierpsy.helper.misc import TimeCounter, print_flush, get_base_name, TABLE_FILTERS from tierpsy.helper.params import read_fps, read_ventral_side diff --git a/tierpsy/features/__init__.py b/tierpsy/features/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/tierpsy/features/open_worm_analysis_toolbox/__init__.py b/tierpsy/features/open_worm_analysis_toolbox/__init__.py new file mode 100755 index 00000000..9270c626 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/__init__.py @@ -0,0 +1,49 @@ +""" +open-worm-analysis-toolbox: A Python library +https://github.com/openworm/open-worm-analysis-toolbox + +Takes raw videos of C. elegans worms and processes them into features + +The purpose is to be able to compare the behavior of worms statistically, +and in particular to validate how closely the behaviour of OpenWorm's worm +simulation is to the behaviour of real worms. + +License +--------------------------------------- +https://github.com/openworm/open-worm-analysis-toolbox/LICENSE.md + +""" +from .version import __version__ + +from .prefeatures.video_info import VideoInfo +from .prefeatures.basic_worm import BasicWorm +from .prefeatures.normalized_worm import NormalizedWorm + +# This is temporary; we will eventually remove it when the code is ready +# to become WormFeatures +from .features.worm_features import WormFeatures +from .features.worm_features import get_feature_specs +from .features import feature_manipulations + +from .features.worm_features import WormFeatures +from .features.feature_processing_options import FeatureProcessingOptions + + + +try: + from . import user_config +except ImportError: + raise Exception( + "user_config.py not found. Copy the " + "user_config_example.txt in the 'open-worm-analysis-toolbox' " + "package to user_config.py in the same directory and " + "edit the values") + + +__all__ = ['__version__', + 'BasicWorm', + 'NormalizedWorm', + 'VideoInfo', + 'WormFeatures', + 'FeatureProcessingOptions' + ] diff --git a/tierpsy/features/open_worm_analysis_toolbox/config.py b/tierpsy/features/open_worm_analysis_toolbox/config.py new file mode 100755 index 00000000..3d63c710 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/config.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +""" +Package-level configuration settings. The original Schafer Lab code +contained many hardcoded values. These are tracked in a central +location here. + +""" + +from __future__ import division + +""" DEBUG MODE TO RESTORE OLD SCHAFER LAB ERRORS """ + +# TODO + +""" PRE-FEATURES CONFIGURATION SETTINGS """ + +# This is the frame rate of the test video. Generally the FPS should be +# obtained properly from the video itself; this value will not be correct +# for most videos. +DEFAULT_FPS = 25.8398 + +# Again, generally ventral_mode should be determined and specified by the +# experimenter. This default value will be wrong for most videos. +DEFAULT_VENTRAL_MODE = 0 + +N_POINTS_NORMALIZED = 49 + + +""" FEATURES CONFIGURATION SETTINGS """ + +# Note: for features-level configuration options, see +# features/feature_processing_options.py + +EIGENWORM_FILE = 'master_eigen_worms_N2.mat' + + +""" STATISTICS CONFIGURATION SETTINGS """ + +# Used in Histogram.h_computeMHists +MAX_NUM_HIST_OBJECTS = 1000 + +# Used in HistogramManager.h__computeBinInfo +# The maximum # of bins that we'll use. Since the data +# is somewhat random, outliers could really chew up memory. I'd prefer not +# to have some event which all of a sudden tells the computer we need to +# allocate a few hundred gigabytes of data. If this does ever end up a +# problem we'll need a better solution (or really A solution) +MAX_NUMBER_BINS = 10**6 diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/__init__.py b/tierpsy/features/open_worm_analysis_toolbox/features/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/events.py b/tierpsy/features/open_worm_analysis_toolbox/features/events.py new file mode 100755 index 00000000..e5ed1903 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/events.py @@ -0,0 +1,1096 @@ +# -*- coding: utf-8 -*- +""" +A module for finding and describing frames-spanning "events" given a +worm video. + +Use EventFinder() to get an EventList() + +Contents +--------------------------------------- +This module contains definitions for the following: + +Classes: + EventFinder + __init__ + get_events + + EventList + __init__ + num_events @property + get_event_mask + merge + + EventListWithFeatures + __init__ + from_disk + test_equality + + +Usage +----- +Places used (Not exhaustive): +- locomotion_features.MotionEvents +- posture_features.get_worm_coils +- locomotion_turns.LocomotionTurns + +One usage is in locomotion features. + +LocomotionFeatures.get_motion_codes() calculates the motion codes for the +worm, and to do so, for each of the possible motion states (forward, +backward, paused) it creates an instance of the EventFinder class, +sets up appropriate parameters, and then calls EventFinder.get_events() +to obtain an instance of EventList, the "result" class. + +Then to format the result appropriately, the EventListWithFeatures class is +instantiated with our "result" and then get_feature_dict is called. + +So the flow from within LocomotionFeatures.get_motion_codes() is: + # (approximately): + ef = EventFinder() + event_list = ef.get_events() + me = EventListWithFeatures(event_list, features_per_frame) + +EventListWithFeatures is used by not just get_motion_codes but also ... +DEBUG (add the other uses (e.g. upsilons)) + +Notes +--------------------------------------- +See https://github.com/openworm/open-worm-analysis-toolbox/blob/master/ +documentation/Yemini%20Supplemental%20Data/Locomotion.md#2-motion-states +for a plain-English description of a motion state. + +The code for this module came from several files in the +@event_finder, @event, and @event_ss folders from: +https://github.com/JimHokanson/SegwormMatlabClasses/blob/ + master/%2Bseg_worm/%2Bfeature/ + +""" + +import numpy as np +import operator +import warnings + +from itertools import groupby +from .. import utils + + +class EventFinder: + """ + To use this, create an instance, then specify the options. Default + options are initialized in __init__. + + Then call get_events() to obtain an EventList instance + containing the desired events from a given block of data. + + """ + + def __init__(self): + # Temporal thresholds + self.min_frames_threshold = None # (scalar or [1 x n_frames]) + self.include_at_frames_threshold = False + + self.max_inter_frames_threshold = None # (scalar or [1 x n_frames]) + self.include_at_inter_frames_threshold = False + + # Space (distance) and space&time (speed) thresholds + self.min_distance_threshold = None # (scalar or [1 x n_frames]) + self.max_distance_threshold = None # (scalar or [1 x n_frames]) + self.include_at_distance_threshold = True + + self.min_speed_threshold = None # (scalar or [1 x n_frames]) + self.max_speed_threshold = None # (scalar or [1 x n_frames]) + self.include_at_speed_threshold = True + + def get_events(self, speed_data, distance_data=None): + """ + Obtain the events implied by event_data, given how this instance + of EventFinder has been configured. + + Parameters + ---------- + speed_data : 1-d numpy array of length n + The per-frame instantaneous speed as % of skeleton length + distance_data : 1-d numpy array of length n (optional) + The per-frame distance travelled as % of skeleton length + If not specified, speed_data will be used to derive distance_data, + since speed = distance x time. + + Returns + ------- + EventList + + Notes: + --------------------------------------- + If the first/last event are solely preceded/followed by NaN + frames, these frames are swallowed into the respective event. + + Formerly getEvents.m. Originally it was findEvent.m. + + """ + # Override distance_data with speed_data if it was not provided + if distance_data is None: + distance_data = speed_data + + # For each frame, determine if it matches our speed threshold criteria + speed_mask = self.get_speed_threshold_mask(speed_data) + + # Convert our mask into the indices of the "runs" of True, that is + # of the data matching our above speed criteria + event_candidates = self.get_start_stop_indices(speed_data, speed_mask) + + # ERROR: start is not at 0 + #??? Starts might all be off by 1 ... + + # Possible short circuit: if we have absolutely no qualifying events + # in event_data, just exit early. + if not event_candidates.size: + return EventList() + + if self.max_inter_frames_threshold: + # Decide if we are removing gaps AT the threshold or + # just strictly smaller than the threshold. + if self.include_at_inter_frames_threshold: + inter_frames_comparison_operator = operator.le + else: + inter_frames_comparison_operator = operator.lt + + # In this function we remove time gaps between events if the gaps + # are too small (max_inter_frames_threshold) + event_candidates = self.remove_gaps( + event_candidates, + self.max_inter_frames_threshold, + inter_frames_comparison_operator) + + if self.min_frames_threshold: + # Remove events that aren't at least + # self.min_frames_threshold in length + event_candidates = self.remove_too_small_events(event_candidates) + + # For each candidate event, sum the instantaneous speed at all + # frames in the event, and decide if the worm moved enough distance + # for the event to qualify as genuine. + # i.e. Filter events based on data sums during event + event_candidates = \ + self.remove_events_by_data_sum(event_candidates, distance_data) + + return EventList(event_candidates) + + def __repr__(self): + return utils.print_object(self) + + def get_speed_threshold_mask(self, event_data): + """ + Get possible events between the speed thresholds. Return a mask + + Parameters + --------------------------------------- + event_data: 1-d numpy array of instantaneous worm speeds + + Returns + --------------------------------------- + A 1-d boolean numpy array masking any instantaneous speeds falling + frame-by-frame within the boundaries specified by + self.min_speed_threshold and self.max_speed_threshold, + which are themselves 1-d arrays. + + Notes + --------------------------------------- + Formerly h__getPossibleEventsByThreshold, in + seg_worm/feature/event_finder/getEvents.m + + """ + + # Start with a mask that's all True since if neither min or max thresholds + # were set there was nothing to mask. + event_mask = np.ones((len(event_data)), dtype=bool) + + if self.min_speed_threshold is not None: + # suppress runtime warning of comparison to None + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + if self.include_at_speed_threshold: + event_mask = event_data >= self.min_speed_threshold + else: + event_mask = event_data > self.min_speed_threshold + + if self.max_speed_threshold is not None: + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + if self.include_at_speed_threshold: + event_mask = event_mask & ( + event_data <= self.max_speed_threshold) + else: + event_mask = event_mask & ( + event_data < self.max_speed_threshold) + + return event_mask + + def get_start_stop_indices(self, event_data, event_mask): + """ + From a numpy event mask, get the start and stop indices. For + example: + + 0 1 2 3 4 5 <- indices + F F T T F F <- event_mask + F F F T T F F F <- bracketed_event_mask + s s <- start and stop + So in this case we'd have as an output [(2,3)], for the one run + that starts at 2 and ends at 3. + + Parameters + --------------------------------------- + event_data: 1-d float numpy array + Instantaneous worm speeds + event_mask: 1-d boolean numpy array + True if the frame is a possible event candidate for this event. + + Returns + --------------------------------------- + event_candidates: 2-d int numpy array + An array of tuples giving the start and stop, respectively, + of each run of Trues in the event_mask. IMPORTANTLY, these are indices, + NOT slice values + + Notes + --------------------------------------- + Formerly h__getStartStopIndices, in + seg_worm/feature/event_finder/getEvents.m + + """ + # NO + # Go from shape == (1, 4642) to (4642,) (where 4642 is the number of + # frames, for instance) + #event_data = event_data.flatten() + #event_mask = event_mask.flatten() + + # Make sure our parameters are of the correct type and dimension + assert(isinstance(event_data, np.ndarray)) + assert(isinstance(event_mask, np.ndarray)) + assert(len(np.shape(event_data)) == 1) + assert(np.shape(event_data) == np.shape(event_mask)) + assert(event_mask.dtype == bool) + assert(event_data.dtype == float) + + # We concatenate falses to ensure event starts and stops at the edges + # are caught + bracketed_event_mask = np.concatenate([[False], event_mask, [False]]) + + # Let's obtain the "x-coordinates" of the True entries. + # e.g. If our bracketed_event_mask is + # [False, False, False, True, False True, True, True, False], then + # we obtain the array [3, 5, 6, 7] + x = np.flatnonzero(bracketed_event_mask) - 1 + + # Group these together using a fancy trick from + # http://stackoverflow.com/questions/2154249/, since + # the lambda function x:x[0]-x[1] on an enumerated list will + # group consecutive integers together + # e.g. [[(0, 3)], [(1, 5), (2, 6), (3, 7)]] + # list(group) + x_grouped = [list(group) for key, group in + groupby(enumerate(x), lambda i:i[0] - i[1])] + + # We want to know the first element from each "run", and the + # last element e.g. [[3, 4], [5, 7]] + event_candidates = [(i[0][1], i[-1][1]) for i in x_grouped] + + # Early exit if we have no starts and stops at all + if not event_candidates: + return np.array(event_candidates) + + # If a run of NaNs precedes the first start index, all the way back to + # the first element, then revise our first (start, stop) entry to + # include all those NaNs. + if np.all(np.isnan(event_data[:event_candidates[0][0]])): + event_candidates[0] = (0, event_candidates[0][1]) + + # Same but with NaNs succeeding the final end index. + if np.all(np.isnan(event_data[event_candidates[-1][1] + 1:])): + event_candidates[-1] = (event_candidates[-1] + [0], event_data.size - 1) + + return np.array(event_candidates) + + def remove_gaps(self, event_candidates, threshold, + comparison_operator): + """ + Remove time gaps in the events that are smaller/larger than a given + threshold value. + + That is, apply a greedy right-concatenation to any (start, stop) duples + within threshold of each other. + + Parameters + --------------------------------------- + event_candidates: a list of (start, stop) duples + The start and stop indexes of the events + threshold: int + Number of frames to do the comparison on + comparison_operator: a comparison function + One of operator.lt, le, ge, gt + + Returns + --------------------------------------- + A new numpy array of (start, stop) duples giving the indexes + of the events with the gaps removed. + + """ + assert(comparison_operator == operator.lt or + comparison_operator == operator.le or + comparison_operator == operator.gt or + comparison_operator == operator.ge) + + new_event_candidates = [] + num_groups = np.shape(event_candidates)[0] + + i = 0 + while(i < num_groups): + # Now advance through groups to the right, + # continuing as long as they satisfy our comparison operator + ii = i + while(ii + 1 < num_groups and comparison_operator( + event_candidates[ii + 1][0] - event_candidates[ii][1] - 1, + threshold)): + ii += 1 + + # Add this largest possible start/stop duple to our NEW revised + # list + new_event_candidates.append((event_candidates[i][0], + event_candidates[ii][1])) + i = ii + 1 + + return np.array(new_event_candidates) + + def remove_too_small_events(self, event_candidates): + """ + This function filters events based on time (really sample count) + + Parameters + --------------------------------------- + event_candidates: numpy array of (start, stop) duples + + Returns + --------------------------------------- + numpy array of (start, stop) duples + + """ + if not self.min_frames_threshold: + return event_candidates + + event_num_frames = event_candidates[:, 1] - event_candidates[:, 0] + 1 + + events_to_remove = np.zeros(len(event_num_frames), dtype=bool) + + if self.include_at_frames_threshold: + events_to_remove = event_num_frames <= self.min_frames_threshold + else: + events_to_remove = event_num_frames < self.min_frames_threshold + + return event_candidates[np.flatnonzero(~events_to_remove)] + + def remove_events_by_data_sum(self, event_candidates, distance_data): + """ + This function removes events by data sum. An event is only valid + if the worm has moved a certain minimum proportion of its mean length + over the course of the event. + + For example, a forward motion state is only a forward event if the + worm has moved at least 5% of its mean length over the entire period. + + For a given event candidate, to calculate the worm's movement we + SUM the worm's distance travelled per frame (distance_data) over the + frames in the event. + + Parameters + --------------------------------------- + event_candidates: numpy array of (start, stop) duples + + Returns + --------------------------------------- + numpy array of (start, stop) duples + A subset of event_candidates with only the qualifying events. + + Notes + --------------------------------------- + Formerly h____RemoveEventsByDataSum + + """ + # If we've established no distance thresholds, we have nothing to + # remove from event_candidates + if self.min_distance_threshold is None and \ + self.max_distance_threshold is None: + return event_candidates + + # Sum over the event and threshold data so we know exactly how far + # the worm did travel in each candidate event, and also the min/max + # distance it MUST travel for the event to be considered valid + # -------------------------------------------------------- + + num_runs = np.shape(event_candidates)[0] + + # Sum the actual distance travelled by the worm during each candidate + # event + event_sums = np.empty(num_runs, dtype=float) + for i in range(num_runs): + event_sums[i] = np.nansum( + distance_data[ + event_candidates[i][0]:( + event_candidates[i][1] + 1)]) + + # self.min_distance_threshold contains a 1-d n-element array of + # skeleton lengths * 5% or whatever proportion we've decided the + # worm must move for our event to be valid. So to figure out the + # threshold for a given we event, we must take the MEAN of this + # threshold array. + # + # Note that we test if min_distance_threshold event contains any + # elements, since we may have opted to simply not include this + # threshold at all. + min_threshold_sums = np.empty(num_runs, dtype=float) + if self.min_distance_threshold is not None: + for i in range(num_runs): + min_threshold_sums[i] = np.nanmean( + self.min_distance_threshold[ + event_candidates[i][0]:( + event_candidates[i][1] + 1)]) + + # Same procedure as above, but for the maximum distance threshold. + max_threshold_sums = np.empty(num_runs, dtype=float) + if self.max_distance_threshold is not None: + for i in range(num_runs): + max_threshold_sums[i] = np.nanmean( + self.max_distance_threshold[ + event_candidates[i][0]:( + event_candidates[i][1] + 1)]) + + # Actual filtering of the candidate events + # -------------------------------------------------------- + + events_to_remove = np.zeros(num_runs, dtype=bool) + + # Remove events where the worm travelled too little + if self.min_distance_threshold is not None: + if self.include_at_distance_threshold: + events_to_remove = (event_sums <= min_threshold_sums) + else: + events_to_remove = (event_sums < min_threshold_sums) + + # Remove events where the worm travelled too much + if self.max_distance_threshold is not None: + if self.include_at_distance_threshold: + events_to_remove = events_to_remove | \ + (event_sums >= max_threshold_sums) + else: + events_to_remove = events_to_remove | \ + (event_sums > max_threshold_sums) + + return event_candidates[np.flatnonzero(~events_to_remove)] + + +class EventList(object): + """ + The EventList class is a relatively straightforward class specifying + when "events" start and stop. + + The EventListWithFeatures class, on the other hand, computes other + statistics on the data over which the event occurs. + + An event is a contiguous subset of frame indices. + + You can ask for a representation of the event list as + 1) a sequence of (start, stop) tuples + 2) a boolean array of length num_frames with True for all event frames + + Attributes + ---------- + start_frames : numpy.array 1-d + Frames when each event starts. + end_frames : numpy.array 1-d + Frames when each event ends (is inclusive, i.e. the last frame is + a part of the event) + starts_and_stops : 2-d numpy.array + num_events : int + num_events_for_stats : int + last_frame : int + + Methods + ------- + get_event_mask: returns 1-d boolean numpy array + merge: returns an EventList instance + + Notes + ----- + Previous name: + seg_worm.feature.event_ss ("ss" stands for "simple structure") + + + @JimHokanson: This class is the encapsulation of the raw + substructure, or output from finding the event. + + """ + + def __init__(self, event_starts_and_stops=None): + """ + + Parameters + ---------- + + """ + # self.start_frames and self.end_frames will be the internal representation + # of the events within this class. + self.start_frames = None + self.end_frames = None + + # Check if our events array exists and there is at least one event + if (event_starts_and_stops is not None and + event_starts_and_stops.size != 0): + self.start_frames = event_starts_and_stops[:, 0] + self.end_frames = event_starts_and_stops[:, 1] + + if(self.start_frames is None): + self.start_frames = np.array([], dtype=int) + + if(self.end_frames is None): + self.end_frames = np.array([], dtype=int) + + def __repr__(self): + return utils.print_object(self) + + @property + def starts_and_stops(self): + """ + Returns the start_frames and end_frames as a single numpy array + """ + s_and_s = np.array([self.start_frames, self.end_frames]) + + # check that we didn't have s_and_s = [None, None] or something + if len(np.shape(s_and_s)) == 2: + # We need the first dimension to be the events, and the second + # to be the start / end, not the other way around. + # i.e. we want it to be n x 2, not 2 x n + s_and_s = np.rollaxis(s_and_s, 1) + return s_and_s + else: + return np.array([]) + + @property + def __len__(self): + """ + Return the number of events stored by a given instance of this class. + + Notes + --------------------------------------- + Formerly n_events + + """ + # TODO: I think we are mixing lists and numpy arrays - let's remove the lists + # TypeError: object of type 'numpy.float64' has no len() + try: + return len(self.start_frames) + except TypeError: + return self.start_frames.size + + @property + def last_event_frame(self): + """ + Return the frame # of end of the final event + + Notes + --------------------------------------- + Note that the events represented by a given instance + must have come from a video of at least this many + frames. + + """ + # Check if the end_frames have any entries at all + if self.end_frames is not None and self.end_frames.size != 0: + return self.end_frames[-1] + else: + return 0 + + @property + def num_events_for_stats(self): + """ + Compute the number of events, excluding the partially recorded ones. + Partially recorded ones are: + 1) An event that has already started at the first frame + 2) An event that is still going at the last frame + """ + value = self.__len__ + if value > 1: + if self.start_frames[0] == 0: + value = value - 1 + if self.end_frames[-1] == self.num_video_frames - 1: + value = value - 1 + + return value + + def get_event_mask(self, num_frames=None): + """ + + This was developed for locomotion_features.MotionEvents in which + the goal was to go from events back to the original timeline to + calculate for every frame which event that frame is a part of. + + Returns an array with True entries only between + start_frames[i] and end_frames[i], for all i such that + 0 <= end_frames[i] < num_frames + + Parameters + ---------- + num_frames: int (optional) + The number of frames to use in the mask + If num_frames is not given, a mask just large enough to accomodate + all the events is returned (i.e. of length self.last_event_frame+1) + + Returns + ------- + 1-d boolean numpy array of length num_frames + + """ + # Create empty array of all False, as large as + # it might possibly need to be + if num_frames is None: + num_frames = self.last_event_frame + 1 + + mask = np.zeros(max(self.last_event_frame + + 1, num_frames), dtype='bool') + + for i_event in range(self.__len__): + mask[self.start_frames[i_event]:self.end_frames[i_event] + 1] = True + + #??? Why are we slicing the output? + # This appears to be because last_event_frame+1 could be larger + # than num_frames + # TODO: This should be fixed, let's make this truncation more explicit + #in the documentation. We should also consider embedding the # + # of frames into the event + return mask[0:num_frames] + + @classmethod + def merge(cls, obj1, obj2): + """ + Merge two EventList instances into a single, larger, EventList + + Acts as a factory, producing a third instance of the EventList class + that is the concatenation of the first two, with the start indices + blended and properly in order. + + Parameters + ---------- + cls: The static class parameter, associated with @classmethod + obj1: EventList instance + obj2: EventList instance + + Returns + --------------------------------------- + Tuple (EventList, is_from_first_object) + EventList: A new EventList instance + is_from_first_object: A mask in case you care which indices are + from the first object. + + """ + all_starts = np.concatenate((obj1.start_frames, obj2.start_frames)) + all_ends = np.concatenate((obj1.end_frames, obj2.end_frames)) + + # TODO: It would be good to check that events don't overlap + + new_starts = np.sort(all_starts) + order_I = np.argsort(all_starts) + + new_ends = all_ends[order_I] + + is_from_first_object = order_I < obj1.start_frames.size + + starts_stops = np.transpose(np.vstack((new_starts, new_ends))) + + return (EventList(starts_stops), is_from_first_object) + + +class EventListWithFeatures(EventList): + + """ + An list of events, but also with a set of features calculated for those + events. e.g. time_between_events, etc. + + The list of events can also be embued with another dimension of data, + called "distance" (i.e. the distance the worm has travelled during the + given frame) but which can be generalized to anything that can happen + over time to the worm. + + With this extra dimension other features can be calculated, such as + distance_during_events. + + Properties + --------------------------------------- + num_video_frames + start_frames + end_frames + event_durations + time_between_events + distance_during_events + distance_between_events + total_time + frequency + time_ratio + data_ratio + num_events_for_stats + + Notes + --------------------------------------- + Formerly seg_worm.feature.event + See also seg_worm.events.events2stats + + Known Uses + --------------------------------------- + posture.coils + locomotion.turns.omegas + locomotion.turns.upsilons + locomotion.motion.forward + locomotion.motion.backward + locomotion.motion.paused + + %.frames - event_stats (from event2stats) + %.frequency - + + THSI IS ALL OUTDATED + + + properties + fps + n_video_frames + + %INPUTS + %------------------------------------------------------------------ + #Old Names: start and end + #NOTE: These are the exact frames, the end is NOT setup for slicing + start_frames %[1 n_events] + end_frames %[1 n_events] + data_sum_name %[1 n_events] + inter_data_sum_name %[1 n_events], last value is NaN + + + end + + + """ + + def __init__(self, fps, event_list=None, distance_per_frame=None, + compute_distance_during_event=False, make_null=False): + """ + Initialize an instance of EventListWithFeatures + + Parameters: + ----------- + event_list : EventList (default None) + A list of all events + distance_per_frame : numpy.array (default None) + Distance moved per frame. In fact, as discussed in the class + definition for EventListWithFeatures, this parameter can be used + for any quantifiable behaviour the worm is engaged in over time, + not just distance travelled. Perhaps therefore could be renamed + to something more general. + compute_distance_during_event: boolean (default False) + Whether or not to compute the distance during the even. + make_null: boolean (default False) + Whether or not the caller wants simply a blank instance + to be returned. This is for cases in which there are no events + for a particular feature. + + This is different than if the event has not been computed, in which + case the object itself should be None + + + Parameters: + ----------- + Original Code: +seg_worm/+feature/+event/event.m + + Used by: + get_motion_codes - computes data and interdata + get_coils - computes only interdata + omega and upsilon - computes only interdata + + """ + if event_list is None: + # If no event_list instance is supplied, instantiate + EventList.__init__(self, None) + else: + EventList.__init__(self, event_list.starts_and_stops) + + self.distance_per_frame = distance_per_frame + + # If a blank instance has been requested, or if a blank event_list + # has been provided, flag the self.is_null variable as such + self.is_null = make_null or (self.__len__ == 0) + + # Only calculate the extra features if this is not a "null" instance + if not self.is_null: + # Calculate the features + self.calculate_features(fps, compute_distance_during_event) + else: + # Otherwise, populate with blanks + self.event_durations = np.array([], dtype=float) + self.distance_during_events = np.array([], dtype=float) + self.time_between_events = np.array([], dtype=float) + self.distance_between_events = np.array([], dtype=float) + self.frequency = np.NaN + self.time_ratio = np.NaN + self.data_ratio = np.NaN + + def calculate_features(self, fps, compute_distance_during_event): + """ + num_video_frames + start_frames + end_frames + event_durations + time_between_events + distance_during_events + distance_between_events + total_time + frequency + time_ratio + data_ratio + num_events_for_stats + + """ + + self.num_video_frames = len(self.distance_per_frame) + + # Old Name: time + self.event_durations = (self.end_frames - self.start_frames + 1) / fps + + # Old Name: interTime + self.time_between_events = ( + self.start_frames[1:] - self.end_frames[:-1] - 1) / fps + + # Old Name: interDistance + # Distance moved during events + if compute_distance_during_event: + self.distance_during_events = np.zeros(self.__len__) + for i in range(self.__len__): + self.distance_during_events[i] = np.nansum( + self.distance_per_frame[ + self.start_frames[i]:self.end_frames[i] + 1]) + self.data_ratio = np.nansum(self.distance_during_events) \ + / np.nansum(self.distance_per_frame) + else: + self.distance_during_events = np.array([]) + self.data_ratio = np.NaN + + # Old Name: distance + # Distance moved between events + self.distance_between_events = np.zeros(self.__len__ - 1) + for i in range(self.__len__ - 1): + # Suppress "FutureWarning: In Numpy 1.9 the sum along empty + # slices will be zero." + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=FutureWarning) + self.distance_between_events[i] = \ + np.nansum( + self.distance_per_frame[self.end_frames[i] + 1: + self.start_frames[i + 1]]) + + #self.distance_between_events[-1] = np.NaN + + self.total_time = self.num_video_frames / fps + + # How frequently an event occurs - add to documentation + self.frequency = self.num_events_for_stats / self.total_time + + self.time_ratio = np.nansum(self.event_durations) / self.total_time + + def get_event_mask(self): + """ + Return a numpy boolean array corresponding to the events + + Returns + --------------------------------------- + A 1-d numpy boolean array + + Notes + --------------------------------------- + + EventListWithFeatures has overridden its superclass, EventList's, + get_event_mask with its own here, since self.distance_per_frame + gives it the precise number of frames so it no longer needs to + accept it as a parameter. + + """ + return EventList.get_event_mask(self, self.num_video_frames) + + @classmethod + def from_disk(cls, event_ref, ref_format): + """ + Class factory method to return an instance of the class, loaded from + disk. + + Returns + --------------------------------------- + ref_format : {'MRC'} + The format used. Currently 'MRC' is the only option. + + """ + # Construct the class + self = cls.__new__(cls) + # Initialize the superclass + EventList.__init__(self, None) + + """ + num_video_frames + start_frames + end_frames + event_durations + time_between_events + distance_during_events + distance_between_events + total_time + frequency + time_ratio + data_ratio + num_events_for_stats + """ + + if ref_format is 'MRC': + frames = event_ref['frames'] + + + + # In Matlab this is a structure array + # Our goal is to go from an array of structures to a + # single "structure" with arrays of values + # + # If only a single element is present, the data are saved + # differently. In this case the values are saved directly without + # a reference to dereference. + frame_values = {} + file_ref = frames.file + for key in frames: + ref_array = frames[key] + try: + # Yikes, getting the indexing right here was a PITA + + # (1,5) -> second option - + # Seriously Matlab :/ + if ref_array.shape[0] > 1: + frame_values[key] = np.array( + [file_ref[x[0]][0][0] for x in ref_array]) + else: + # This is correct for omegas ... + frame_values[key] = np.array( + [file_ref[x][0][0] for x in ref_array[0]]) + + except AttributeError: + # AttributeError: 'numpy.float64' object has no attribute + # 'encode' + ref_element = ref_array + frame_values[key] = [ref_element[0][0]] + + self.start_frames = np.array(frame_values['start'], dtype=int) + self.end_frames = np.array(frame_values['end'], dtype=int) + self.event_durations = np.array(frame_values['time']) + if('isVentral' in frame_values.keys()): + # For isVentral to even exist we must be at a signed event, + # such as where + # frames.name == '/worm/locomotion/turns/omegas/frames + self.is_ventral = np.array(frame_values['isVentral'], + dtype=bool) + + # Remove NaN value at end + n_events = self.start_frames.size + if n_events < 2: + self.time_between_events = np.zeros(0) + self.distance_between_events = np.zeros(0) + else: + self.time_between_events = np.array( + frame_values['interTime'][:-1]) + self.distance_between_events = np.array( + frame_values['interDistance'][:-1]) + + # JAH: I found float was needed as the calculated frequency was also + # of this type. I'm not sure why we lost the numpy array entry + # for the calculated frequency ... + self.frequency = event_ref['frequency'].value[0][0] + + if 'ratio' in event_ref.keys(): + ratio = event_ref['ratio'] + self.distance_during_events = np.array( + frame_values['distance']) + self.time_ratio = ratio['time'][0][0] + self.data_ratio = ratio['distance'][0][0] + else: + self.time_ratio = event_ref['timeRatio'].value[0][0] + self.data_ratio = np.NaN + self.distance_during_events = np.zeros(0) + else: + raise Exception('Other formats not yet supported :/') + + # Num_video_frames - CRAP: :/ - @JimHokanson + # Look away ... + temp_length = file_ref['worm/morphology/length'] + self.num_video_frames = len(temp_length) + + # Total_time - CRAP :/ - @JimHokanson + self.total_time = self.num_events_for_stats / self.frequency + + return self + + def __repr__(self): + return utils.print_object(self) + + def test_equality(self, other, event_name): + + try: + if self.is_null and other.is_null: + return True + elif self.is_null != other.is_null: + print('Event mismatch %s' % event_name) + return False + except: + raise Exception("Problem while testing inequality") + + # TODO: Add an integer equality comparison with name printing + return utils.compare_is_equal( + self.num_video_frames, + other.num_video_frames, + event_name + + '.num_video_frames') and utils.correlation( + self.start_frames, + other.start_frames, + event_name + + '.start_frames') and utils.correlation( + self.end_frames, + other.end_frames, + event_name + + '.end_frames') and utils.correlation( + self.event_durations, + other.event_durations, + event_name + + '.event_durations') and utils.correlation( + self.distance_during_events, + other.distance_during_events, + event_name + + '.distance_during_events') and utils.compare_is_equal( + self.total_time, + other.total_time, + event_name + + '.total_time', + 0.01) and utils.compare_is_equal( + self.frequency, + other.frequency, + event_name + + '.frequency', + 0.01) and utils.compare_is_equal( + self.time_ratio, + other.time_ratio, + event_name + + '.time_ratio', + 0.01) and utils.compare_is_equal( + self.data_ratio, + other.data_ratio, + event_name + + '.data_ratio', + 0.01) and utils.compare_is_equal( + self.num_events_for_stats, + other.num_events_for_stats, + event_name + + '.total_time') diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/feature_manipulations.py b/tierpsy/features/open_worm_analysis_toolbox/features/feature_manipulations.py new file mode 100755 index 00000000..12cb6831 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/feature_manipulations.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- +""" + +TODO: The processing for expand_mrc_features should go in its own module. Just +the entry function should be here ... + +""" + +from .. import utils +from . import generic_features + +import copy +import warnings +import numpy as np + + +def _expand_event_features(old_features, e_feature, m_masks, num_frames): + """ + event + - at some point we need to filter events :/ + - When not signed, only a single value + - If signed then 4x, then we compute all, absolute, positive, negative + """ + + cur_spec = e_feature.spec + + if e_feature.has_data: + # Removes partials and signs data + cur_data = e_feature.get_value() + # Remove the NaN and Inf entries + all_data = utils.filter_non_numeric(cur_data) + + data_entries = {} + data_entries['all'] = all_data + if cur_spec.is_signed: + data_entries['absolute'] = np.absolute(all_data) + data_entries['positive'] = all_data[all_data > 0] + data_entries['negative'] = all_data[all_data > 0] + + else: + data_entries = {} + data_entries['all'] = None + if cur_spec.is_signed: + data_entries['absolute'] = None + data_entries['positive'] = None + data_entries['negative'] = None + + return [ + _create_new_event_feature( + e_feature, + data_entries[x], + x) for x in data_entries] + + +def _create_new_event_feature(feature, data, d_type): + + # TODO: Need to verify that this is correct + + FEATURE_NAME_FORMAT_STR = '%s.%s_data' + + temp_feature = feature.copy() + temp_spec = feature.spec.copy() + temp_spec.type = 'expanded_event' + temp_spec.is_time_series = False + temp_spec.name = FEATURE_NAME_FORMAT_STR % (temp_spec.name, d_type) + + # We might want to change this to load from the spec + temp_feature.name = temp_spec.name + # display_name? + # short_display_name? + # + # has_zero_bin => stays the same + # is_signed => maybe ... + # TODO: Might need to change this for events + temp_spec.is_signed = temp_spec.is_signed and d_type == 'all' + temp_feature.value = data + temp_feature.spec = temp_spec + # TODO: Let's update the keep mask and signed + + return temp_feature + + +def _expand_movement_features(m_feature, m_masks, num_frames): + """ + Movement features are expanded as follows: + - if not signed, then we have 4x based on how the worm is moving\ + - all + - forward + - paused + - backward + - if signed, then we have 16x based on the features values and + based on how the worm is moving + + *All NaN values are removed + + """ + + # feature names + + motion_types = ['all', 'forward', 'paused', 'backward'] + data_types = ['all', 'absolute', 'positive', 'negative'] + + cur_spec = m_feature.spec + cur_data = m_feature.value + + good_data_mask = ~utils.get_non_numeric_mask(cur_data).flatten() + + d_masks = {} + d_masks['all'] = good_data_mask + if cur_spec.is_signed: + d_masks["absolute"] = good_data_mask + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + d_masks["positive"] = cur_data >= 0 # bad data will be false + d_masks["negative"] = cur_data <= 0 # bad data will be false + + # Now let's create 16 histograms, for each element of + # (motion_types x data_types) + + new_features = [] + for cur_motion_type in motion_types: + + # We could get rid of this if we don't care about the order + # OR if we use an ordered dict ... + # since we could iterate on d_masks + if cur_spec.is_signed: + end_type_index = 4 + else: + end_type_index = 1 + + for cur_data_type in data_types[:end_type_index]: + new_feature = _create_new_movement_feature( + m_feature, m_masks, d_masks, cur_motion_type, cur_data_type) + new_features.append(new_feature) + + return new_features + + +def _create_new_movement_feature(feature, m_masks, d_masks, m_type, d_type): + """ + + Parameters + ---------- + m_type : string + Movement type + d_type : string + Data type + """ + + # Spec adjustment + #--------------- + + FEATURE_NAME_FORMAT_STR = '%s.%s_data_with_%s_movement' + + cur_mask = m_masks[m_type] & d_masks[d_type] + temp_feature = feature.copy() + temp_spec = temp_feature.spec + temp_spec.type = 'expanded_movement' + temp_spec.is_time_series = False + temp_spec.name = FEATURE_NAME_FORMAT_STR % (temp_spec.name, d_type, m_type) + + # We might want to change this to load from the spec + temp_feature.name = temp_spec.name + # display_name? + # short_display_name? + # + # has_zero_bin => stays the same + # is_signed => maybe ... + temp_spec.is_signed = temp_spec.is_signed and d_type == 'all' + if d_type == 'absolute': + temp_feature.value = np.absolute(feature.value[cur_mask]) + else: + temp_feature.value = feature.value[cur_mask] + + temp_feature.spec = temp_spec + + return temp_feature + + +def expand_mrc_features(old_features): + """ + Feature Expansion: + ------------------ + simple - no expansion + movement + - if not signed, then we have 4x based on how the worm is moving\ + - all + - forward + - paused + - backward + - if signed, then we have 16x based on the features values and + based on how the worm is moving + event + - at some point we need to filter events :/ + - When not signed, only a single value + - If signed then 4x, then we compute all, absolute, positive, negative + + Outline + ------- + Return a new set of features in which the specs have been appropriately + modified (need to implement a deep copy) + """ + + # Motion of the the worm's body + motion_types = ['all', 'forward', 'paused', 'backward'] + # Value that the current feature is taking on + data_types = ['all', 'absolute', 'positive', 'negative'] + + motion_modes = old_features.get_features('locomotion.motion_mode').value + + num_frames = len(motion_modes) + + move_mask = {} + move_mask["all"] = np.ones(num_frames, dtype=bool) + move_mask["forward"] = motion_modes == 1 + move_mask["backward"] = motion_modes == -1 + move_mask["paused"] = motion_modes == 0 + + all_features = [] + for cur_feature in old_features: + + cur_spec = cur_feature.spec + + if cur_spec.type == 'movement': + all_features.extend( + _expand_movement_features( + cur_feature, move_mask, num_frames)) + # elif cur_spec.type == 'simple': + # all_features.append(copy.deepcopy(cur_feature)) + elif cur_spec.type == 'event': + all_features.extend( + _expand_event_features( + old_features, + cur_feature, + move_mask, + num_frames)) + else: + all_features.append(cur_feature.copy()) + + return old_features.copy(all_features) diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/features_list.csv b/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/features_list.csv new file mode 100755 index 00000000..4a436c8b --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/features_list.csv @@ -0,0 +1,126 @@ +is_final_feature,feature_name,module,class_name,processing_flags,notes,type,category,display_name,short_display_name,units,bin_width,is_signed,has_zero_bin,signing_field,remove_partial_events,make_zero_if_empty,is_time_series,old_schafer_feature_name,old_schafer_sub_field +y,morphology.length,morphology_features,Length,,,movement,morphology,Length,Length,Microns,1,0,0,,,,1,morphology.length, +y,morphology.width.head,morphology_features,WidthSection,head,,movement,morphology,Head Width,Head,Microns,1,0,0,,,,1,morphology.width.head, +y,morphology.width.midbody,morphology_features,WidthSection,midbody,,movement,morphology,Midbody Width,Midbody,Microns,1,0,0,,,,1,morphology.width.midbody, +y,morphology.width.tail,morphology_features,WidthSection,tail,,movement,morphology,Tail Width,Tail,Microns,1,0,0,,,,1,morphology.width.tail, +y,morphology.area,morphology_features,Area,,,movement,morphology,Area,Area,Microns^2,100,0,0,,,,1,morphology.area, +y,morphology.area_per_length,morphology_features,AreaPerLength,,,movement,morphology,Area/Length,Area/Length,Microns,0.1,0,0,,,,1,morphology.areaPerLength, +y,morphology.width_per_length,morphology_features,WidthPerLength,,,movement,morphology,Width/Length,Width/Length,None,0.0001,0,0,,,,1,morphology.widthPerLength, +n,locomotion.velocity.avg_body_angle,locomotion_features,AverageBodyAngle,,,,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.velocity.head_tip,locomotion_features,LocomotionVelocitySection,head_tip,,,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.velocity.head_tip.speed,locomotion_features,VelocitySpeed,head_tip,,movement,locomotion,Head Tip Speed (+/- = Forward/Backward),Head Tip,Microns/Seconds,1,1,1,,,,1,locomotion.velocity.headTip.speed, +y,locomotion.velocity.head_tip.direction,locomotion_features,VelocityDirection,head_tip,,movement,locomotion,Head Tip Motion Direction (+/- = Toward D/V),Head Tip,Degrees/Seconds,0.01,1,1,,,,1,locomotion.velocity.headTip.direction, +n,locomotion.velocity.head,locomotion_features,LocomotionVelocitySection,head,,movement,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.velocity.head.speed,locomotion_features,VelocitySpeed,head,,movement,locomotion,Head Speed (+/- = Forward/Backward),Head,Microns/Seconds,1,1,1,,,,1,locomotion.velocity.head.speed, +y,locomotion.velocity.head.direction,locomotion_features,VelocityDirection,head,,movement,locomotion,Head Motion Direction (+/- = Toward D/V),Head,Degrees/Seconds,0.01,1,1,,,,1,locomotion.velocity.head.direction, +n,locomotion.velocity.midbody,locomotion_features,LocomotionVelocitySection,midbody,,,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.velocity.midbody.speed,locomotion_features,VelocitySpeed,midbody,,movement,locomotion,Midbody Speed (+/- = Forward/Backward),Midbody,Microns/Seconds,1,1,1,,,,1,locomotion.velocity.midbody.speed, +y,locomotion.velocity.midbody.direction,locomotion_features,VelocityDirection,midbody,,movement,locomotion,Midbody Motion Direction (+/- = Toward D/V),Midbody,Degrees/Seconds,0.01,1,1,,,,1,locomotion.velocity.midbody.direction, +n,locomotion.velocity.mibdody.distance,locomotion_features,MidbodyVelocityDistance,,,NA,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.velocity.tail,locomotion_features,LocomotionVelocitySection,tail,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.velocity.tail.speed,locomotion_features,VelocitySpeed,tail,,movement,locomotion,Tail Speed (+/- = Forward/Backward),Tail,Microns/Seconds,1,1,1,,,,1,locomotion.velocity.tail.speed, +y,locomotion.velocity.tail.direction,locomotion_features,VelocityDirection,tail,,movement,locomotion,Tail Motion Direction (+/- = Toward D/V),Tail,Degrees/Seconds,0.01,1,1,,,,1,locomotion.velocity.tail.direction, +n,locomotion.velocity.tail_tip,locomotion_features,LocomotionVelocitySection,tail_tip,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.velocity.tail_tip.speed,locomotion_features,VelocitySpeed,tail_tip,,movement,locomotion,Tail Tip Speed (+/- = Forward/Backward),Tail Tip,Microns/Seconds,1,1,1,,,,1,locomotion.velocity.tailTip.speed, +y,locomotion.velocity.tail_tip.direction,locomotion_features,VelocityDirection,tail_tip,,movement,locomotion,Tail Tip Motion Direction (+/- = Toward D/V),Tail Tip,Degrees/Seconds,0.01,1,1,,,,1,locomotion.velocity.tailTip.direction, +n,locomotion.motion_events.forward,locomotion_features,MotionEvent,forward,,NA,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.motion_events.backward,locomotion_features,MotionEvent,backward,,NA,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.motion_events.paused,locomotion_features,MotionEvent,paused,,NA,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.motion_mode,locomotion_features,MotionMode,,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.motion_events.forward.event_durations,generic_features,EventFeature,,,event,locomotion,Forward Time,Time,seconds,0.5,0,0,,1,0,0,locomotion.motion.forward.frames,time +y,locomotion.motion_events.forward.distance_during_events,generic_features,EventFeature,,,event,locomotion,Forward Distance,Distance,microns,10,0,0,,1,0,0,locomotion.motion.forward.frames,distance +y,locomotion.motion_events.forward.time_between_events,generic_features,EventFeature,,,event,locomotion,Inter Forward Time,Inter Time,seconds,5,0,0,,1,0,0,locomotion.motion.forward.frames,interTime +y,locomotion.motion_events.forward.distance_between_events,generic_features,EventFeature,,,event,locomotion,Inter Forward Distance,Inter Distance,microns,100,0,0,,1,0,0,locomotion.motion.forward.frames,interDistance +y,locomotion.motion_events.forward.frequency,generic_features,EventFeature,,,event,locomotion,Forward Motion Frequency,Frequency,Hz,0.001,0,0,,0,1,0,locomotion.motion.forward.frequency, +y,locomotion.motion_events.forward.time_ratio,generic_features,EventFeature,,,event,locomotion,Forward Motion Time Ratio,Time Ratio,no units,0.001,0,0,,0,1,0,locomotion.motion.forward.ratio,time +y,locomotion.motion_events.forward.data_ratio,generic_features,EventFeature,,,event,locomotion,Forward Motion Distance Ratio,Distance Ratio,no units,0.001,0,0,,0,1,0,locomotion.motion.forward.ratio,distance +y,locomotion.motion_events.paused.event_durations,generic_features,EventFeature,,,event,locomotion,Paused Time,Time,seconds,0.5,0,0,,1,0,0,locomotion.motion.paused.frames,time +y,locomotion.motion_events.paused.distance_during_events,generic_features,EventFeature,,,event,locomotion,Paused Distance,Distance,microns,10,0,0,,1,0,0,locomotion.motion.paused.frames,distance +y,locomotion.motion_events.paused.time_between_events,generic_features,EventFeature,,,event,locomotion,Inter Paused Time,Inter Time,seconds,5,0,0,,1,0,0,locomotion.motion.paused.frames,interTime +y,locomotion.motion_events.paused.distance_between_events,generic_features,EventFeature,,,event,locomotion,Inter Paused Distance,Inter Distance,microns,100,0,0,,1,0,0,locomotion.motion.paused.frames,interDistance +y,locomotion.motion_events.paused.frequency,generic_features,EventFeature,,,event,locomotion,Paused Motion Frequency,Frequency,Hz,0.001,0,0,,0,1,0,locomotion.motion.paused.frequency, +y,locomotion.motion_events.paused.time_ratio,generic_features,EventFeature,,,event,locomotion,Paused Motion Time Ratio,Time Ratio,no units,0.001,0,0,,0,1,0,locomotion.motion.paused.ratio,time +y,locomotion.motion_events.paused.data_ratio,generic_features,EventFeature,,,event,locomotion,Paused Motion Distance Ratio,Distance Ratio,no units,0.001,0,0,,0,1,0,locomotion.motion.paused.ratio,distance +y,locomotion.motion_events.backward.event_durations,generic_features,EventFeature,,,event,locomotion,Backward Time,Time,seconds,0.5,0,0,,1,0,0,locomotion.motion.backward.frames,time +y,locomotion.motion_events.backward.distance_during_events,generic_features,EventFeature,,,event,locomotion,Backward Distance,Distance,microns,10,0,0,,1,0,0,locomotion.motion.backward.frames,distance +y,locomotion.motion_events.backward.time_between_events,generic_features,EventFeature,,,event,locomotion,Inter Backward Time,Inter Time,seconds,5,0,0,,1,0,0,locomotion.motion.backward.frames,interTime +y,locomotion.motion_events.backward.distance_between_events,generic_features,EventFeature,,,event,locomotion,Inter Backward Distance,Inter Distance,microns,100,0,0,,1,0,0,locomotion.motion.backward.frames,interDistance +y,locomotion.motion_events.backward.frequency,generic_features,EventFeature,,,event,locomotion,Backward Motion Frequency,Frequency,Hz,0.001,0,0,,0,1,0,locomotion.motion.backward.frequency, +y,locomotion.motion_events.backward.time_ratio,generic_features,EventFeature,,,event,locomotion,Backward Motion Time Ratio,Time Ratio,no units,0.001,0,0,,0,1,0,locomotion.motion.backward.ratio,time +y,locomotion.motion_events.backward.data_ratio,generic_features,EventFeature,,,event,locomotion,Backward Motion Distance Ratio,Distance Ratio,no units,0.001,0,0,,0,1,0,locomotion.motion.backward.ratio,distance +n,locomotion.foraging_bends,locomotion_bends,ForagingBends,,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.foraging_bends.amplitude,locomotion_bends,ForagingAmplitude,,,movement,locomotion,Foraging Amplitude (+/- = Toward D/V),Amplitude,Microns,1,1,1,,,,1,locomotion.bends.foraging.amplitude, +y,locomotion.foraging_bends.angle_speed,locomotion_bends,ForagingAngleSpeed,,,movement,locomotion,Foraging Speed (+/- = Toward D/V),Speed,Degrees/Seconds,10,1,1,,,,1,locomotion.bends.foraging.angleSpeed, +n,locomotion.motion_events.is_paused,locomotion_features,IsPaused,,,NA,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.crawling_bends.head,locomotion_bends,CrawlingBend,head,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.crawling_bends.head.amplitude,locomotion_bends,BendAmplitude,head,,movement,locomotion,Head Crawling Amplitude (+/- = D/V Inside),Head,Degrees,1,1,1,,,,1,locomotion.bends.head.amplitude, +y,locomotion.crawling_bends.head.frequency,locomotion_bends,BendFrequency,head,,movement,locomotion,Head Crawling Frequency (+/- = D/V Inside),Head,Hz,0.1,1,1,,,,1,locomotion.bends.head.frequency, +n,locomotion.crawling_bends.midbody,locomotion_bends,CrawlingBend,midbody,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.crawling_bends.midbody.amplitude,locomotion_bends,BendAmplitude,midbody,,movement,locomotion,Midbody Crawling Amplitude (+/- = D/V Inside),Midbody,Degrees,1,1,1,,,,1,locomotion.bends.midbody.amplitude, +y,locomotion.crawling_bends.midbody.frequency,locomotion_bends,BendFrequency,midbody,,movement,locomotion,Midbody Crawling Frequency (+/- = D/V Inside),Midbody,Hz,0.1,1,1,,,,1,locomotion.bends.midbody.frequency, +n,locomotion.crawling_bends.tail,locomotion_bends,CrawlingBend,tail,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.crawling_bends.tail.amplitude,locomotion_bends,BendAmplitude,tail,,movement,locomotion,Tail Crawling Amplitude (+/- = D/V Inside),Tail,Degrees,1,1,1,,,,1,locomotion.bends.tail.amplitude, +y,locomotion.crawling_bends.tail.frequency,locomotion_bends,BendFrequency,tail,,movement,locomotion,Tail Crawling Frequency (+/- = D/V Inside),Tail,Hz,0.1,1,1,,,,1,locomotion.bends.tail.frequency, +n,locomotion.turn_processor,locomotion_turns,TurnProcessor,,,movement,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.omega_turns,locomotion_turns,NewOmegaTurns,,,NA,locomotion,NA,NA,NA,,,,,,,,, +n,locomotion.upsilon_turns,locomotion_turns,NewUpsilonTurns,,,NA,locomotion,NA,NA,NA,,,,,,,,, +y,locomotion.omega_turns.event_durations,generic_features,EventFeature,,,event,locomotion,Omega Turn Time (+/- = D/V Inside),Time,seconds,0.1,1,0,is_ventral,1,0,0,locomotion.turns.omegas.frames,time +y,locomotion.omega_turns.time_between_events,generic_features,EventFeature,,,event,locomotion,Inter Omega Time (+/- = Previous D/V),Inter Time,seconds,5,1,0,is_ventral,1,0,0,locomotion.turns.omegas.frames,interTime +y,locomotion.omega_turns.distance_between_events,generic_features,EventFeature,,,event,locomotion,Inter Omega Distance (+/- = Previous D/V),Inter Distance,microns,100,1,0,is_ventral,1,0,0,locomotion.turns.omegas.frames,interDistance +y,locomotion.omega_turns.frequency,generic_features,EventFeature,,,event,locomotion,Omega Turns Frequency,Frequency,Hz,0.001,0,0,,0,1,0,locomotion.turns.omegas.frequency, +y,locomotion.omega_turns.time_ratio,generic_features,EventFeature,,,event,locomotion,Omega Turns Time Ratio,Time Ratio,no units,0.001,0,0,,0,1,0,locomotion.turns.omegas.timeRatio, +n,locomotion.omega_turns.is_ventral,generic_features,EventFeature,,,event,locomotion,,,,,,,,,,,, +y,locomotion.upsilon_turns.event_durations,generic_features,EventFeature,,,event,locomotion,Upsilon Turn Time (+/- = D/V Inside),Time,seconds,0.1,1,0,is_ventral,1,0,0,locomotion.turns.upsilons.frames,time +y,locomotion.upsilon_turns.time_between_events,generic_features,EventFeature,,,event,locomotion,Inter Upsilon Time (+/- = Previous D/V),Inter Time,seconds,5,1,0,is_ventral,1,0,0,locomotion.turns.upsilons.frames,interTime +y,locomotion.upsilon_turns.distance_between_events,generic_features,EventFeature,,,event,locomotion,Inter Upsilon Distance (+/- = Previous D/V),Inter Distance,microns,100,1,0,is_ventral,1,0,0,locomotion.turns.upsilons.frames,interDistance +y,locomotion.upsilon_turns.frequency,generic_features,EventFeature,,,event,locomotion,Upsilon Turns Frequency,Frequency,Hz,0.001,0,0,,0,1,0,locomotion.turns.upsilons.frequency, +y,locomotion.upsilon_turns.time_ratio,generic_features,EventFeature,,,event,locomotion,Upsilon Turns Time Ratio,Time Ratio,no units,0.001,0,0,,0,1,0,locomotion.turns.upsilons.timeRatio, +n,locomotion.upsilon_turns.is_ventral,generic_features,EventFeature,,,event,locomotion,,,,,,,,,,,, +y,path.range,path_features,NewRange,,,movement,path,Path Range,Range,Microns,10,0,0,,,,1,path.range, +n,path.duration,path_features,Duration,,,,path,NA,NA,NA,,,,,,,,, +y,path.duration.worm,path_features,DurationFeature,worm,,simple,path,Worm Dwelling,Worm,seconds,1,0,0,,,move into code,move towards setup,, +y,path.duration.head,path_features,DurationFeature,head,,simple,path,Head Dwelling,Head,seconds,0.5,0,0,,,,0,, +y,path.duration.midbody,path_features,DurationFeature,midbody,,simple,path,Midbody Dwelling,Midbody,seconds,1,0,0,,,,0,, +y,path.duration.tail,path_features,DurationFeature,tail,,simple,path,Tail Dwelling,Tail,seconds,0.5,0,0,,,,0,, +n,path.coordinates,path_features,Coordinates,,,,path,NA,NA,NA,,,,,,,,, +y,path.curvature,path_features,Curvature,,,movement,path,Path Curvature (+/- = D/V Inside),Curvature,Radians/Microns,0.005,1,1,,,,1,path.curvature, +n,posture.eccentricity_and_orientation,posture_features,EccentricityAndOrientationProcessor,,,,posture,NA,NA,NA,,,,,,,,, +y,posture.eccentricity,posture_features,Eccentricity,,,movement,posture,Eccentricity,Eccentricity,No Units,0.01,0,0,,,,1,posture.eccentricity, +n,posture.amplitude_wavelength_processor,posture_features,AmplitudeAndWavelengthProcessor,,,,posture,NA,NA,NA,,,,,,,,, +y,posture.amplitude_max,posture_features,AmplitudeMax,,,movement,posture,Max Amplitude,Amplitude,Microns,1,0,0,,,,1,posture.amplitude.max, +y,posture.amplitude_ratio,posture_features,AmplitudeRatio,,,movement,posture,Amplitude Ratio,Ratio,None,0.01,0,0,,,,1,posture.amplitude.ratio, +y,posture.primary_wavelength,posture_features,PrimaryWavelength,,,movement,posture,Primary Wavelength,Primary,Microns,1,0,0,,,,1,posture.wavelength.primary, +y,posture.secondary_wavelength,posture_features,SecondaryWavelength,,,movement,posture,Secondary Wavelength,Secondary,Microns,1,0,0,,,,1,posture.wavelength.secondary, +y,posture.track_length,posture_features,TrackLength,,,movement,posture,Track Length,Track,Microns,1,0,0,,,,1,posture.tracklength, +n,posture.coils,posture_features,Coils,,,,posture,NA,NA,NA,,,,,,,,, +y,posture.coils.event_durations,generic_features,EventFeature,,,event,posture,Coil Time,Time,seconds,0.1,0,0,,1,0,0,posture.coils.frames,time +y,posture.coils.time_between_events,generic_features,EventFeature,,,event,posture,Inter Coil Time,Inter Time,seconds,5,0,0,,1,0,0,posture.coils.frames,interTime +y,posture.coils.distance_between_events,generic_features,EventFeature,,,event,posture,Inter Coil Distance,Inter Distance,microns,100,0,0,,1,0,0,posture.coils.frames,interDistance +y,posture.coils.frequency,generic_features,EventFeature,,,event,posture,Coils Frequency,Frequency,Hz,0.001,0,0,,0,1,0,posture.coils.frequency, +y,posture.coils.time_ratio,generic_features,EventFeature,,,event,posture,Coils Time Ratio,Time Ratio,no units,0.001,0,0,,0,1,0,posture.coils.timeRatio, +y,posture.kinks,posture_features,Kinks,,,movement,posture,Bend Count,Bends,Counts,1,0,1,,,,1,posture.kinks, +n,posture.all_eigenprojections,posture_features,EigenProjectionProcessor,,,,posture,NA,NA,NA,,,,,,,,, +y,posture.eigen_projection0,posture_features,EigenProjection,,,movement,posture,Eigen Projection 1,Projection 1,No Units,1,1,1,,,,1,posture.eigenProjection, +y,posture.eigen_projection1,posture_features,EigenProjection,,,movement,posture,Eigen Projection 2,Projection 2,No Units,1,1,1,,,,1,posture.eigenProjection, +y,posture.eigen_projection2,posture_features,EigenProjection,,,movement,posture,Eigen Projection 3,Projection 3,No Units,1,1,1,,,,1,posture.eigenProjection, +y,posture.eigen_projection3,posture_features,EigenProjection,,,movement,posture,Eigen Projection 4,Projection 4,No Units,1,1,1,,,,1,posture.eigenProjection, +y,posture.eigen_projection4,posture_features,EigenProjection,,,movement,posture,Eigen Projection 5,Projection 5,No Units,1,1,1,,,,1,posture.eigenProjection, +y,posture.eigen_projection5,posture_features,EigenProjection,,,movement,posture,Eigen Projection 6,Projection 6,No Units,1,1,1,,,,1,posture.eigenProjection, +n,posture.bends.head,posture_features,Bend,head,,movement,posture,NA,NA,NA,,,,,,,,, +n,posture.bends.neck,posture_features,Bend,neck,,movement,posture,NA,NA,NA,,,,,,,,, +n,posture.bends.midbody,posture_features,Bend,midbody,,movement,posture,NA,NA,NA,,,,,,,,, +n,posture.bends.hips,posture_features,Bend,hips,,movement,posture,NA,NA,NA,,,,,,,,, +n,posture.bends.tail,posture_features,Bend,tail,,movement,posture,NA,NA,NA,,,,,,,,, +y,posture.bends.head.mean,posture_features,BendMean,head,,movement,posture,Head Bend Mean (+/- = D/V Inside),Head,Degrees,1,1,1,,,,1,posture.bends.head.mean, +y,posture.bends.neck.mean,posture_features,BendMean,neck,,movement,posture,Neck Bend Mean (+/- = D/V Inside),Neck,Degrees,1,1,1,,,,1,posture.bends.neck.mean, +y,posture.bends.midbody.mean,posture_features,BendMean,midbody,,movement,posture,Midbody Bend Mean (+/- = D/V Inside),Midbody,Degrees,1,1,1,,,,1,posture.bends.midbody.mean, +y,posture.bends.hips.mean,posture_features,BendMean,hips,,movement,posture,Hips Bend Mean (+/- = D/V Inside),Hips,Degrees,1,1,1,,,,1,posture.bends.hips.mean, +y,posture.bends.tail.mean,posture_features,BendMean,tail,,movement,posture,Tail Bend Mean (+/- = D/V Inside),Tail,Degrees,1,1,1,,,,1,posture.bends.tail.mean, +y,posture.bends.head.std_dev,posture_features,BendStdDev,head,,movement,posture,Head Bend S.D. (+/- = D/V Inside),Head,Degrees,0.5,1,1,,,,1,posture.bends.head.stdDev, +y,posture.bends.neck.std_dev,posture_features,BendStdDev,neck,,movement,posture,Neck Bend S.D. (+/- = D/V Inside),Neck,Degrees,0.5,1,1,,,,1,posture.bends.neck.stdDev, +y,posture.bends.midbody.std_dev,posture_features,BendStdDev,midbody,,movement,posture,Midbody Bend S.D. (+/- = D/V Inside),Midbody,Degrees,0.5,1,1,,,,1,posture.bends.midbody.stdDev, +y,posture.bends.hips.std_dev,posture_features,BendStdDev,hips,,movement,posture,Hips Bend S.D. (+/- = D/V Inside),Hips,Degrees,0.5,1,1,,,,1,posture.bends.hips.stdDev, +y,posture.bends.tail.std_dev,posture_features,BendStdDev,tail,,movement,posture,Tail Bend S.D. (+/- = D/V Inside),Tail,Degrees,0.5,1,1,,,,1,posture.bends.tail.stdDev, +y,posture.directions.tail2head,posture_features,Direction,tail2head,,movement,posture,Tail-To-Head Orientation,Tail-To-Head,Degrees,1,1,1,,,,1,posture.directions.tail2head, +y,posture.directions.tail,posture_features,Direction,tail,,movement,posture,Tail Orientation,Tail,Degrees,1,1,1,,,,1,posture.directions.tail, +y,posture.directions.head,posture_features,Direction,head,,movement,posture,Head Orientation,Head,Degrees,1,1,1,,,,1,posture.directions.head, diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/features_list_definitions.txt b/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/features_list_definitions.txt new file mode 100755 index 00000000..54a8ddcb --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/features_list_definitions.txt @@ -0,0 +1,5 @@ +is_final_feature : + - y : Indicates that the computed feature should have a 'value' attribute which contains a value of interest. + - n : Indicates that the computed value is generally not interesting on its own. This is usually the case for features that temporarily hold multiple features that are computed together before they are broken up into their own individual features at a later processing step. +feature_name : + Hopefully this one is pretty self explanatory. Naming is somewhat arbitrary although I think we would prefer lower case with underscore spelling and periods to delineate different "sections" diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/old_features_comparison_notes.csv b/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/old_features_comparison_notes.csv new file mode 100755 index 00000000..477948f8 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/feature_metadata/old_features_comparison_notes.csv @@ -0,0 +1,11 @@ +need to handle event feature comparison using new approach,,NaN mask mismatch,Looks Good, +locomotion.crawling_bends.tail.amplitude,0.763,yes,yes,utils.separated_peaks works slightly differently so merge_nans is true here for now. The utils function works correctly and the old version works incorrectly but was convoluted enough that it was hard to replicate +locomotion.crawling_bends.tail.frequency,0.677,yes,yes,""" """ +posture.bends.hip.mean,0.959,no,didn't look,The indices used on the worm for posture bends are very different from how they are used elsewhere in the code. This difference has been noted in the Bends class +posture.bends.tail.mean,0.976,no,didn't look, +posture.bends.head.std_dev,0.881,no,didn't look, +posture.bends.neck.std_dev,0.894,no,didn't look, +posture.bends.hips.std_dev,0.607,no,didn't look, +posture.bends.tail.std_dev,0.693,no,didn't look, +posture.primary_wavelength,0.988,yes,yes,There was an error in the old code relating to which feature - primary or secondary - got which peak from the frequency response. I believe it was largest index (or maybe lowest) and not largest amplitude that got the primary +posture.secondary_wavelength,0.988,yes,yes,""" """ diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/feature_processing_options.py b/tierpsy/features/open_worm_analysis_toolbox/features/feature_processing_options.py new file mode 100755 index 00000000..6b6561d1 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/feature_processing_options.py @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- +""" +This module holds a class that is referenced when +processing features. + +I'd like to move things from "config" into here ... +- @JimHokanson + +""" + +from __future__ import division + +from .. import utils + +# Can't do this, would be circular +#from .worm_features import WormFeatures + + +class FeatureProcessingOptions(object): + """ + """ + + def __init__(self): + + # The idea with this attribute is that functions will check if + # they are in this list. If they are then they can display some + # sort of popup that clarifies how they are working. + # + # No functions actually use this yet. It is just a placeholder. + # + # An example of this might be: + # 'morphology.length' + # s + self.functions_to_explain = [] + + # This indicates that, where possible, code should attempt to + # replicate the errors and inconsistencies present in the way that + # the Schafer lab computed features. This can be useful for ensuring + # that we are able to compute features in the same way that they did. + # + # NOTE: There are a few instances where this is not supported such + # that the behavior will not match even if this value is set to True. + self.mimic_old_behaviour = True + + self.locomotion = LocomotionOptions() + self.posture = PostureOptions() + + # TODO: Implement this. + # This is not yet implemented. The idea is to support not + # computing certain features. We might also allow disabling + # certain groups of features. + self.features_to_ignore = [] + + def should_compute_feature(self, feature_name, worm_features): + """ + + """ + # TODO + return True + + def disable_contour_features(self): + """ + Contour features: + + """ + # see self.features_to_ignore + contour_dependent_features = [ + 'morphology.width', + 'morphology.area', + 'morphology.area_per_length', + 'morphology.width_per_length', + 'posture.eccentricity'] + + self.features_to_ignore = list(set(self.features_to_ignore + + contour_dependent_features)) + + def disable_feature_sections(self, section_names): + """ + + This can be used to disable processing of features by section + (see the options available below) + + Modifies 'features_to_ignore' + + Parameters + ---------- + section_names : list[str] + Options are: + - morphology + - locomotion + - posture + - path + + Examples + -------- + fpo.disable_feature_sections(['morphology']) + + fpo.disable_feature_sections(['morphology','locomotion']) + + """ + new_ignores = [] + f = IgnorableFeatures() + for section in section_names: + new_ignores.extend(getattr(f, section)) + + self.features_to_ignore = list(set(self.features_to_ignore + + new_ignores)) + + def __repr__(self): + return utils.print_object(self) + + +class PostureOptions(object): + + def __init__(self): + # Grid size for estimating eccentricity, this is the + # max # of points that will fill the wide dimension. + # (scalar) The # of points to place in the long dimension. More points + # gives a more accurate estimate of the ellipse but increases + # the calculation time. + # + # Used by: posture_features.get_eccentricity_and_orientation + self.n_eccentricity_grid_points = 50 + + # The maximum # of available values is 7 although technically there + # are generally 48 eigenvectors avaiable, we've just only precomputed + # 7 to use for the projections + # + # Used by: posture.eigenprojections + self.n_eigenworms_use = 6 + + # This the fraction of the worm length that a bend must be + # in order to be counted. The # of worm points + # (this_value*worm_length_in_samples) is rounded to an integer + # value. The threshold value is inclusive. + # + # Used by: posture_features.get_worm_kinks + self.kink_length_threshold_pct = 1 / 12 + + self.wavelength = PostureWavelengthOptions() + + def coiling_frame_threshold(self, fps): + # This is the # of + # frames that an epoch must exceed in order for it to be truly + # considered a coiling event + # Current value translation: 1/5 of a second + # + # Used by: posture_features.get_worm_coils + return int(round(1 / 5 * fps)) + + +class PostureWavelengthOptions(object): + """ + These options are all used in: + get_amplitude_and_wavelength + + """ + + def __init__(self): + + self.n_points_fft = 512 + + # This value is in samples, not a + # spatial frequency. The spatial frequency sampling also + # varies by the worm length, so this resolution varies on a + # frame by frame basis. + self.min_dist_peaks = 5 + + self.pct_max_cutoff = 0.5 + self.pct_cutoff = 2 + + +class LocomotionOptions(object): + + def __init__(self): + # locomotion_features.LocomotionVelocity + #------------------------------------- + # Units: seconds + # NOTE: We could get the defaults from the class ... + self.velocity_tip_diff = 0.25 + self.velocity_body_diff = 0.5 + + # locomotion_features.MotionEvents + #-------------------------------------- + # Interpolate only this length of NaN run; anything longer is + # probably an omega turn. + # If set to "None", interpolate all lengths (i.e. infinity) + # TODO - Inf would be a better specification + self.motion_codes_longest_nan_run_to_interpolate = None + # These are a percentage of the worm's length + self.motion_codes_speed_threshold_pct = 0.05 + self.motion_codes_distance_threshold_pct = 0.05 + self.motion_codes_pause_threshold_pct = 0.025 + + # These are times (s) + self.motion_codes_min_frames_threshold = 0.5 + self.motion_codes_max_interframes_threshold = 0.25 + + # locomotion_bends.LocomotionCrawlingBends + self.crawling_bends = LocomotionCrawlingBends() + self.foraging_bends = LocomotionForagingBends() + self.locomotion_turns = LocomotionTurns() + + def __repr__(self): + return utils.print_object(self) + + +class LocomotionTurns(object): + + def __init__(self): + self.max_interpolation_gap_allowed = 9 # frames + + def min_omega_event_length(self, fps): + return int(round(fps / 4)) + + # TODO: There is still a lot to put into here + + +class LocomotionForagingBends(object): + + def __init__(self): + # NOTE: The nose & neck can also be thought of as the head tip + # and head neck + pass + + def min_nose_window_samples(self, fps): + return int(round(0.1 * fps)) + + def max_samples_interp_nose(self, fps): + return 2 * self.min_nose_window_samples(fps) - 1 + + +class LocomotionCrawlingBends(object): + + def __init__(self): + self.fft_n_samples = 2 ** 14 + + self.bends_partitions = \ + {'head': (5, 10), + 'midbody': (22, 27), + 'tail': (39, 44)} + + self.peak_energy_threshold = 0.5 + + # max_amplitude_pct_bandwidth - when determining the bandwidth, + # the minimums that are found can't exceed this percentage of the + # maximum. Doing so invalidates the result. + self.max_amplitude_pct_bandwidth = 0.5 + + self.min_time_for_bend = 0.5 + self.max_time_for_bend = 15 + + # TODO: What are the units on these things ???? + # This is a spatial frequency + + # The comment that went with this in the original code was: + #"require at least 50% of the wave" + self.min_frequency = 1 / (4 * self.max_time_for_bend) + + # This is wrong ... + #self.min_frequency = 0.25 * self.max_time_for_bend + + # This is a processing optimization. + # How far into the maximum peaks should we look ... + # If this value is low, an expensive computation could go faster. + # If it is too low, then we end up rerunning the calculation the + # whole dataset and we end up losing time. + self.initial_max_I_pct = 0.5 + + def max_frequency(self, fps): + # What is the technical max???? 0.5 fps???? + return 0.25 * fps + + def __repr__(self): + return utils.print_object(self) + + +class IgnorableFeatures: + """ + I'm not thrilled with where this is placed, but placing it in + WormFeatures creates a circular dependency + + """ + + def __init__(self): + temp = [ + 'length', + 'width', + 'area', + 'area_per_length', + 'width_per_length'] + self.morphology = ['morphology.' + s for s in temp] + # None of these are implemented ... + + temp = ['velocity', 'motion_events', 'motion_mode', + 'crawling_bends', 'foraging_bends', 'turns'] + self.locomotion = ['locomotion.' + s for s in temp] + # locomotion + # crawling_bends: Done + # turns: Done + + temp = ['bends', 'eccentricity', 'amplitude_and_wavelength', + 'kinks', 'coils', 'directions', 'eigen_projection'] + self.posture = ['posture.' + s for s in temp] + # None of these are implemented ... diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/generic_features.py b/tierpsy/features/open_worm_analysis_toolbox/features/generic_features.py new file mode 100755 index 00000000..49d3f93d --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/generic_features.py @@ -0,0 +1,415 @@ +# -*- coding: utf-8 -*- +""" +This module handles the base (generic) Feature code that the actual +computed features inherit from. + +Jim's notes on problematic features: +- missing dependency +- empty video (Let's not deal with this for now) +- no events +- missing_from_disk #i.e. not present in loaded data +See: + - locomotion_features.AverageBodyAngle + - locomotion_features.MidbodyVelocityDistance +""" + +from .. import utils + +import numpy as np +import copy +import re + +# Get everything until a period that is followed by no periods +# +# e.g. from: +# temp.name.prop TO +# temp.name +_parent_feature_pattern = re.compile('(.*)\.([^\.]+)') + + +class Feature(object): + + """ + This is the parent class from which all features inherit. + + Unfortunately, with the current setup some of these features are + populated in the Specs.compute_feature() + + Attributes + ---------- + name : + is_temporary : + spec : worm_features.FeatureProcessingSpec + value : + This is the value of interest. This generally does not exist for + features that are temporary + dependencies : list + missing_dependency : + Unable to compute feature due to a missing dependency + missing_from_disk : + Unable to load the feature as it was not saved in the loaded file + empty_video : + Indicates that there was no data in the processed video. Note, I'm + not sure that we'll use this one. + no_events : + Indicates that no events were observed in the video. + + + + Temporary features may have additional attributes that are essentially + the values of the feature of interest. The child features grab these + attributes and populate them in their 'value' attributes. + TODO: provide example of what I mean by this + + See Also + -------- + worm_features.FeatureProcessingSpec.compute_feature() + + """ + + def __repr__(self): + return utils.print_object(self) + + # This should be part of the features + def __eq__(self, other): + # TODO: Figure out how to pass parameters into this + # We should probably just overload it ... + return utils.correlation(self.value, other.value, self.name) + + def get_feature(self, wf, feature_name): + """ + This was put in to do anything we need when getting a feature + rather than calling the feature directly. + + For example, right now we'll log dependencies. + + Note, the dependencies are not recursive (i.e. we don't load in the + dependencies of the featurew we are requesting) + """ + + # 1) Do logging - NYI + # What is the easiest way to initialize without forcing a init super call? + # NOTE: We could also support returning all depdencies, in which we get + # the dependencies of the parent and add those as well + if hasattr(self, 'dependencies'): + self.dependencies.append(feature_name) + else: + self.dependencies = [feature_name] + + # 2) Make the call to WormFeatures + # Note, we call wf.get_features rather than the spec to ensure that wf + # is aware of the new features that have been computed + return wf._get_and_log_feature(feature_name, internal_request=True) + + @property + def is_valid(self): + """ + Note, the properties here are currently assigned in the spec + and dependent on the time of debugging may not exist. + We might want to wrap with a try/catch + """ + return not self.missing_from_disk and not self.missing_dependency + + @property + def has_data(self): + return self.is_valid and not self.no_events and not self.empty_video + + def copy(self): + # TODO: We might want to do something special for value + + new_self = self.__new__(self.__class__) + d = self.__dict__ + for key in d: + temp = d[key] + if key == 'spec': + setattr(new_self, 'spec', temp.copy()) + else: + setattr(new_self, key, copy.copy(temp)) + + return new_self + # return copy.copy(self) + + +def get_parent_feature_name(feature_name): + """ + Go from something like: + locomotion.crawling_bends.head.amplitude + TO + locomotion.crawling_bends.head + """ + + return get_feature_name_info(feature_name)[0] + + +def get_feature_name_info(feature_name): + """ + Outputs + ------- + (parent_name,specific_feature_name) + """ + # We could make this more obvious by using split ... + # I might want to also remove the parens and only get back the 1st string somehow + # 0 - the entire match + # 1 - the first parenthesized subgroup + + result = _parent_feature_pattern.match(feature_name) + return result.group(1), result.group(2) + +# How are we going to do from disk? +# +# 1) Need some reference to a saved file +# 2) + +# @classmethod +# def from_disk(cls,width_ref): +# +# self = cls.__new__(cls) +# +# for partition in self.fields: +# widths_in_partition = utils._extract_time_from_disk(width_ref, +# partition) +# setattr(self, partition, widths_in_partition) + +# event_durations +# distance_during_events +# time_between_events +# distance_between_events +# frequency +# time_ratio +# data_ratio + + +def get_event_attribute(event_object, attribute_name): + + # We might want to place some logic in here + + if event_object.is_null: + return None + else: + return getattr(event_object, attribute_name) + + +class EventFeature(Feature): + """ + This covers features that come from events. This is NOT the temporary + event feature parent. + + TODO: Insert example + + temp main event list: + locomotion_features.MotionEvent + locomotion_turns.(upsilon and omega) + posture_features.Coils + + Attributes + ---------- + + + """ + + def __init__(self, wf, feature_name): + + # This is a bit messy :/ + # We might want to eventually obtain this some other way + cur_spec = wf.specs[feature_name] + + self.name = feature_name + event_name, feature_type = get_feature_name_info(feature_name) + event_name = get_parent_feature_name(feature_name) + + temp_parent_feature = self.get_feature(wf, event_name) + + if temp_parent_feature.no_events: + self.no_events = True + self.keep_mask = None + self.value = None + return + + # TODO: I'd like a better name for this + #--------------------------- + # event_parent? + # event_main? + event_value = temp_parent_feature.value + # event_value : EventListWithFeatures + + self.value = get_event_attribute(event_value, feature_type) + start_frames = get_event_attribute(event_value, 'start_frames') + end_frames = get_event_attribute(event_value, 'end_frames') + + try: + self.num_video_frames = event_value.num_video_frames + except: + import pdb + pdb.set_trace() + + # event_durations - filter on starts and stops + #distance_during_events - " " + # time_between_events - filter on the breaks + # distance_between_events - filter on the breaks + # + # Ideally + + # TODO: I think we should hide the 'keep_mask' => '_keep_mask' + + # TODO: The filtering should maybe be identified by type + #------------------------- + # event-main - summary of an event itself + # event-inter - summary of something between events + # event-summary - summary statistic over all events + # or something like this ... + + # TODO: This is different behavior than the original + # In the original the between events were filtered the same as the 1st + # event. In other words, if the 1st event was a partial, the time + # between the 1st and 2nd event was considered a partial + # + # 1) Document difference + # 2) Build in temporary support for the old behavior flag + + # print(cur_spec.name) + + # This will eventually be removed when we move to empty_features + if self.value is None or isinstance( + self.value, float) or self.value.size == 0: + self.keep_mask = None + self.signing_mask = None + else: + if cur_spec.is_signed: + signing_field_name = cur_spec.signing_field + signing_mask = get_event_attribute( + event_value, signing_field_name) + if feature_type in [ + 'event_durations', + 'distance_during_events']: + self.signing_mask = signing_mask + else: + # TODO: We should check on scalar vs inter-event here + # but only inter-events are signed + # + # Signing them makes little sense and should be removed + # + # This is the old behavior + # Signing the inter-events doesn't make much sense + # + # See note below regarding inter-events and their relation to + # events + # This behavior signs the interevent based on the proceeding + # event since I'm 99% sure you only ever have: + # - event, interevent, event AND NOT + # - interevent, event, interevent OR + # - event, interevent, etc. + # + # i.e. only have interevents between events + self.signing_mask = signing_mask[0:-1] + else: + self.signing_mask = None + + self.keep_mask = np.ones(self.value.shape, dtype=bool) + if feature_type in ['event_durations', 'distance_during_events']: + self.keep_mask[0] = start_frames[0] != 0 + self.keep_mask[-1] = end_frames[-1] != ( + self.num_video_frames - 1) + elif feature_type in ['time_between_events', 'distance_between_events']: + + # TODO: Verify that inter-events can be partial ... + # i.e. if an event starts at frame 20, verify that we have an + # inter-event from frames 1 - 19 + # + # Document this result (either way) + # + # I think we only ever include inter-event values that are + # actually between events ... + + # First is partial if the main event starts after the first + # frame + self.keep_mask[0] = start_frames[0] == 0 + # Similarly, if the last event ends before the end of the + # video, then anything after that is partial + self.keep_mask[-1] = end_frames[-1] == ( + self.num_video_frames - 1) + else: + # Should be a scalar value + # e.g. => frequency + self.keep_mask = np.ones(1, dtype=bool) + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + # TODO: Figure out how to make sure that on copy + # we adjust the mask if only full_values are set ... + # def copy(self): + + def __eq__(self, other): + # TODO: We need to implement this ... + # scalars - see if they are close, otherwise call super? + return True + + def get_value(self, partials=False, signed=True): + # TODO: Document this function + if not self.has_data: + return None + + temp_values = self.value + if signed and (self.signing_mask is not None): + # TODO: Not sure if we multiply by -1 for True or False + temp_values[self.signing_mask] = - \ + 1 * temp_values[self.signing_mask] + + if partials: + temp_values = temp_values[self.keep_mask] + + return temp_values + +# We might want to make things specific again but for now we'll use +# a single class + +# class EventDuration(Feature): +# +# def __init__(self, wf, feature_name): +# parent_name = get_parent_feature_name(feature_name) +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'event_durations') +# self.name = event_name + '.event_durations' +# +# class DistanceDuringEvents(Feature): +# +# def __init__(self,wf,event_name): +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'distance_during_events') +# self.name = event_name + '.distance_during_events' +# +# class TimeBetweenEvents(Feature): +# +# def __init__(self,wf,event_name): +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'time_between_events') +# self.name = event_name + '.time_between_events' +# +# class DistanceBetweenEvents(Feature): +# +# def __init__(self,wf,event_name): +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'distance_between_events') +# self.name = event_name + '.distance_between_events' +# +# class Frequency(Feature): +# +# def __init__(self,wf,event_name): +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'frequency') +# self.name = event_name + '.frequency' +# +# class EventTimeRatio(Feature): +# +# def __init__(self,wf,event_name): +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'time_ratio') +# self.name = event_name + '.time_ratio' +# +# class EventDataRatio(Feature): +# +# def __init__(self,wf,event_name): +# temp = wf[event_name] +# self.value = get_event_attribute(temp.value,'data_ratio') +# self.name = event_name + '.data_ratio' diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_bends.py b/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_bends.py new file mode 100755 index 00000000..6fad2b61 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_bends.py @@ -0,0 +1,1731 @@ +# -*- coding: utf-8 -*- +""" +Calculate the "Bends" locomotion feature + +JAH: 2014-10-29 - this documentation is out of date + +Contains two classes: + LocomotionCrawlingBends, which yields properties: + .head + .amplitude + .frequency + .mid + .amplitude + .frequency + .tail + .amplitude + .frequency + + LocomotionForagingBends, which yields properties: + .amplitude + .angleSpeed + +""" + +import numpy as np +import scipy.ndimage.filters as filters + +import warnings + +from . import generic_features +from .generic_features import Feature +from .. import utils + +class BendHelper(object): + def h__getBendData(self, avg_bend_angles, bound_info, options, fps): + """ + Compute the bend amplitude and frequency. + + Parameters + ---------- + avg_bend_angles: numpy.array + - [1 x n_frames] + bound_info: + options: open-worm-analysis-toolbox.features.feature_processing_options.LocomotionCrawlingBends + fps: float + Frames Per Second + + + Returns + ------- + + """ + + # Compute the short-time Fourier transforms (STFT). + #-------------------------------------------------- + # Unpack options ... + + #make sure the frames per seconds is an integer + fps = int(fps) + + max_freq = options.max_frequency(fps) + min_freq = options.min_frequency + fft_n_samples = options.fft_n_samples + max_amp_pct_bandwidth = options.max_amplitude_pct_bandwidth + peak_energy_threshold = options.peak_energy_threshold + + # Maximum index to keep for frequency analysis: + fft_max_I = int(fft_n_samples / 2) + # This gets multiplied by an index to compute the frequency at that + # index + freq_scalar = (fps / 2) * 1 / (fft_max_I - 1) + + n_frames = len(avg_bend_angles) + amps = np.empty(n_frames) * np.NaN + freqs = np.empty(n_frames) * np.NaN + + left_bounds = bound_info.left_bounds + right_bounds = bound_info.right_bounds + is_bad_mask = bound_info.is_bad_mask + + # This is a processing optimization that in general will speed + # things up + max_freq_I = max_freq / freq_scalar + INIT_MAX_I_FOR_BANDWIDTH = \ + round(options.initial_max_I_pct * max_freq_I) + + # Convert each element from float to int + right_bounds = right_bounds.astype(int) + left_bounds = left_bounds.astype(int) + + for iFrame in np.flatnonzero(~is_bad_mask): + windowed_data = avg_bend_angles[ + left_bounds[iFrame]:right_bounds[iFrame]] + data_win_length = len(windowed_data) + + # + # fft frequency and bandwidth + # + # Compute the real part of the STFT. + # These two steps take a lot of time ... + + # New code: + fft_data = abs(np.fft.rfft(windowed_data, fft_n_samples)) + + # Find the peak frequency. + maxPeakI = np.argmax(fft_data) + maxPeak = fft_data[maxPeakI] + + # NOTE: If this is true, we'll never bound the peak on the left. + # We are looking for a hump with a peak, not just a decaying + # signal. + if maxPeakI == 0: + continue + + unsigned_freq = freq_scalar * maxPeakI + + if not (min_freq <= unsigned_freq <= max_freq): + continue + + peakStartI, peakEndI = \ + self.h__getBandwidth(data_win_length, + fft_data, + maxPeakI, + INIT_MAX_I_FOR_BANDWIDTH) + + if np.isnan(peakStartI) or np.isnan(peakEndI): + #wrong indexes, next loop + continue + + # Store data + #------------------------------------------------------------------ + fenergy = fft_data**2 + tot_energy = np.sum(fenergy) + peak_energy = np.sum(fenergy[peakStartI:peakEndI]) + + peak_amplitud_treshold = (max_amp_pct_bandwidth * maxPeak) + if not (# The minima can't be too big: + fft_data[peakStartI] > peak_amplitud_treshold or + fft_data[peakEndI] > peak_amplitud_treshold or + # Needs to have enough energy: + (peak_energy < (peak_energy_threshold * tot_energy)) + ): + + # Convert the peak to a time frequency. + dataSign = np.sign(np.nanmean(windowed_data)) # sign the data + amps[iFrame] = (2 * fft_data[maxPeakI] / + data_win_length) * dataSign + freqs[iFrame] = unsigned_freq * dataSign + + return amps, freqs + + + def h__getBandwidth(self, data_win_length, fft_data, + max_peak_I, INIT_MAX_I_FOR_BANDWIDTH): + """ + The goal is to find minimum 'peaks' that border the maximal frequency + response. + + Since this is a time-intensive process, we try and start with a small + range of frequencies, as execution time is proportional to the length + of the input data. If this fails we use the full data set. + + Called by: h__getBendData + + Parameters + ---------- + data_win_length + Length of real data (ignoring zero padding) that + went into computing the FFT + + fft_data + Output of the fft function + + max_peak_I + Location (index) of the maximum of fft_data + + INIT_MAX_I_FOR_BANDWIDTH + See code + + + Returns + ------- + peak_start_I: scalar + + peak_end_I: scalar + + + Notes + --------------------------------------- + Formerly [peak_start_I,peak_end_I] = \ + h__getBandwidth(data_win_length, fft_data, + max_peak_I, INIT_MAX_I_FOR_BANDWIDTH) + + See also, formerly: seg_worm.util.maxPeaksDist + + """ + + peakWinSize = round(np.sqrt(data_win_length)) + + # Find the peak bandwidth. + if max_peak_I < INIT_MAX_I_FOR_BANDWIDTH: + # NOTE: It is incorrect to filter by the maximum here, as we want to + # allow matching a peak that will later be judged invalid. If we + # filter here we may find another smaller peak which will not be + # judged invalid later on. + min_peaks, min_peaks_I = utils.separated_peaks( + fft_data[:INIT_MAX_I_FOR_BANDWIDTH], + peakWinSize, + use_max=False, + value_cutoff=np.inf) + + del min_peaks # this part of max_peaks_dist's return is unused + + # TODO: This is wrong, replace add find to utils ... + peak_start_I = min_peaks_I[utils.find(min_peaks_I < max_peak_I, 1)] + peak_end_I = min_peaks_I[utils.find(min_peaks_I > max_peak_I, 1)] + else: + peak_start_I = np.array([]) + peak_end_I = np.array([]) + + # NOTE: Besides checking for an empty value, we also need to ensure that + # the minimum didn't come too close to the data border, as more data + # could invalidate the result we have. + # + # NOTE: In order to save time we only look at a subset of the FFT data. + if (peak_end_I.size == 0) | \ + (peak_end_I + peakWinSize >= INIT_MAX_I_FOR_BANDWIDTH): + # If true, then rerun on the full set of data + [min_peaks, min_peaks_I] = utils.separated_peaks( + fft_data, peakWinSize, use_max=False, value_cutoff=np.inf) + + del(min_peaks) # This part of max_peaks_dist's return is unused + + peak_start_I = min_peaks_I[utils.find(min_peaks_I < max_peak_I, 1)] + peak_end_I = min_peaks_I[utils.find(min_peaks_I > max_peak_I, 1)] + + assert peak_start_I.size <= 1 + assert peak_end_I.size <= 1 + + #return an array it is problematic, and can give rise to deprecation errors. Let's return a tuple instead. + peak_start_Int = int(peak_start_I[0]) if peak_start_I.size == 1 else np.nan + peak_end_Int = int(peak_end_I[0]) if peak_end_I.size == 1 else np.nan + + # TODO: Why is this not a tuple - tuple would be more consistent + return (peak_start_Int, peak_end_Int) + +class LocomotionBend(object): + """ + Element for LocomotionCrawlingBends + + """ + + def __init__(self, amplitude, frequency, name): + self.amplitude = amplitude + self.frequency = frequency + self.name = name + + @classmethod + def from_disk(cls, bend_ref, name): + + self = cls.__new__(cls) + + self.amplitude = utils._extract_time_from_disk(bend_ref, 'amplitude') + self.frequency = utils._extract_time_from_disk(bend_ref, 'frequency') + self.name = name + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + + # merge_nans=True - utils.separated_peaks works slightly differently + # so merge_nans is true here, for now. The utils function works correctly + # and the old version works incorrectly but was convoluted enough that + # it was hard to replicate + + return utils.correlation( + self.amplitude, + other.amplitude, + 'locomotion.bends.' + + self.name + + '.amplitude', + merge_nans=True) and utils.correlation( + self.frequency, + other.frequency, + 'locomotion.bends.' + + self.name + + '.frequency', + merge_nans=True) + + +class LocomotionCrawlingBends(BendHelper): + """ + Locomotion Crawling Bends Feature. + + Attributes + ---------- + head : LocomotionBend + midbody : LocomotionBend + tail : LocomotionBend + + Notes + --------------------------------------- + Formerly +segworm/+features/@locomotion/getLocomotionBends + + Originally, part of wormBends.m + + + Note from Ev Yemini on Setup Options + --------------------------------------- + Empirically I've found the values below achieve good signal. + + Furthermore: + The body bend frequency is much easier to see (than foraging). The N2 + signal is clearly centered around 1/3Hz in both the literature and + through visual inspection. + + I chose a high-frequency threshold of 4 frames. With 4 frames a 3-frame + tick, resulting from segmentation noise, will be diluted by the + additional frame. + + + Nature Methods Description + --------------------------------------- + + Worm crawling is expressed as both an amplitude and frequency + (Supplementary Fig. 4e). We measure these features instantaneously at + the head, midbody, and tail. The amplitude and frequency are signed + negatively whenever the worm’s ventral side is contained within the + concave portion of its instantaneous bend. + + Crawling is only measured during forward and backward motion states. + The worm bend mean angles (described in the section on “Posture”) show + a roughly periodic signal as the crawling wave travels along the worm’s + body. This wave can be asymmetric due to differences in dorsal-ventral + flexibility or simply because the worm is executing a turn. Moreover + the wave dynamics can change abruptly to speed up or slow down. + Therefore, the signal is only roughly periodic and we measure its + instantaneous properties. + + Worm bends are linearly interpolated across unsegmented frames. The + motion states criteria (described earlier in this section) guarantee + that interpolation is no more than 1/4 of a second long. For each + frame, we search both backwards and forwards for a zero crossing in the + bend angle mean – the location where the measured body part (head, + midbody, or tail) must have hit a flat posture (a supplementary bend + angle of 0°). This guarantees that we are observing half a cycle for + the waveform. Crawling is bounded between 1/30Hz (a very slow wave that + would not resemble crawling) and 1Hz (an impossibly fast wave on agar). + + If the window between zero crossings is too small, the nearest zero + crossing is assumed to be noise and we search for the next available + zero crossing in its respective direction. If the window is too big, + crawling is marked undefined at the frame. + + Once an appropriate window has been found, the window is extended in + order to center the frame and measure instantaneous crawling by + ensuring that the distance on either side to respective zero crossings + is identical. If the distances are not identical, the distance of the + larger side is used in place of the zero-crossing distance of the + smaller side in order to expand the small side and achieve a symmetric + window, centered at the frame of interest. + + We use a Fourier transform to measure the amplitude and frequency + within the window described above. The largest peak within the + transform is chosen for the crawling amplitude and frequency. If the + troughs on either side of the peak exceed 1/2 its height, the peak is + rejected for being unclear and crawling is marked as undefined at the + frame. Similarly, if the integral between the troughs is less than half + the total integral, the peak is rejected for being weak. + + + """ + + bend_names = ['head', 'midbody', 'tail'] + + def __init__( + self, + features_ref, + bend_angles, + is_paused, + is_segmented_mask): + """ + Compute the temporal bending frequency at the head, midbody, and tail. + + Parameters: + ----------- + features_ref : + bend_angles : numpy.array + - [49 x n_frames] + is_paused : numpy.array + - [1 x n_frames] + Whether or not the worm is considered to be paused during the frame + is_segmented_mask : [1 x n_frames] + + """ + + options = features_ref.options.locomotion.crawling_bends + + if not features_ref.options.should_compute_feature( + 'locomotion.crawling_bends', features_ref): + self.head = None + self.midbody = None + self.tail = None + return + + timer = features_ref.timer + timer.tic() + + fps = features_ref.video_info.fps + + # Special Case: No worm data. + #------------------------------------ + if ~np.any(is_segmented_mask): + nan_data = np.empty(len(is_segmented_mask)) * np.NaN + bend_dict = {'frequency': nan_data.copy(), + 'amplitude': nan_data.copy()} + + raise Exception('This is no longer impelemented properly') + self.head = bend_dict.copy() + self.midbody = bend_dict.copy() + self.tail = bend_dict.copy() + return + + for cur_partition_name in self.bend_names: + # Find the mean bend angle for the current partition, across all + # frames + + s = slice(*options.bends_partitions[cur_partition_name]) + + # Suppress RuntimeWarning: Mean of empty slice + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + avg_bend_angles = np.nanmean(bend_angles[s, :], axis=0) + + # Ensure there are both data and gaps if we are going to + # interpolate - i.e.: + # - that not everything is segmented (missing) - i.e. data + # - that something is segmented - i.e. gaps + if not(np.all(is_segmented_mask)) and np.any(is_segmented_mask): + avg_bend_angles = utils.interpolate_with_threshold( + avg_bend_angles) + + bound_info = CrawlingBendsBoundInfo( + avg_bend_angles, is_paused, options, fps) + + [amplitude, frequency] = self.h__getBendData(avg_bend_angles, + bound_info, + options, + fps) + + setattr( + self, + cur_partition_name, + LocomotionBend( + amplitude, + frequency, + cur_partition_name)) + + timer.toc('locomotion.crawling_bends') + + + @classmethod + def from_disk(cls, bend_ref): + + self = cls.__new__(cls) + + self.head = LocomotionBend.from_disk(bend_ref['head'], 'head') + self.midbody = LocomotionBend.from_disk(bend_ref['midbody'], 'midbody') + self.tail = LocomotionBend.from_disk(bend_ref['tail'], 'tail') + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + return self.head == other.head and \ + self.midbody == other.midbody and \ + self.tail == other.tail + +#%% + + +class LocomotionForagingBends(object): + + """ + Locomotion Foraging Bends Feature. + + Attributes + ---------- + amplitude + angleSpeed + + + Methods + --------------------------------------- + __init__ + h__computeNoseBends + h__computeAvgAngles + h__interpData + h__getNoseInterpolationIndices + h__foragingData + h__getAmps + + Notes + --------------------------------------- + Formerly +segworm/+features/@locomotion/getForaging + + Originally, part of wormBends.m + + """ + + def __init__(self, features_ref, is_segmented_mask, ventral_mode): + """ + Initialize an instance of LocomotionForagingBends + + Parameters + ---------- + nw: NormalizedWorm instance + is_segmented_mask: boolean numpy array [1 x n_frames] + ventral_mode: int + 0, 1, or 2 depending on the orientation of the worm. + + """ + + options = features_ref.options.locomotion.foraging_bends + + if not features_ref.options.should_compute_feature( + 'locomotion.foraging_bends', features_ref): + self.amplitude = None + self.angle_speed = None + return + + timer = features_ref.timer + timer.tic() + + # self.amplitude = None # DEBUG + # self.angleSpeed = None # DEBUG + + fps = features_ref.video_info.fps + + nose_x, nose_y = \ + features_ref.nw.get_partition('head_tip', + data_key='skeleton', + split_spatial_dimensions=True) + + neck_x, neck_y = \ + features_ref.nw.get_partition('head_base', + data_key='skeleton', + split_spatial_dimensions=True) + + # TODO: Add "reversed" and "interpolated" options to the get_partition + # function, to replace the below blocks of code! + #---------------------------------------------------------------------- + + # We need to flip the orientation (i.e. reverse the entries along the + # first, or skeleton index, axis) for angles and consistency with old + # code: + nose_x = nose_x[::-1, :] + nose_y = nose_y[::-1, :] + neck_x = neck_x[::-1, :] + neck_y = neck_y[::-1, :] + + # Step 1: Interpolation of skeleton indices + #--------------------------------------- + # TODO: ensure that we are excluding the points at the beginning + # and ending of the second dimension (the frames list) of nose_x, etc. + # from being interpolated. (this was a step in + # h__getNoseInterpolationIndices, that we no longer have since I've + # put the interpolation code into + # utils.interpolate_with_threshold_2D instead. But we + # might be okay, since the beginning and end are going to be left alone + # since I've set left=np.NaN and right=np.NaN in the underlying + # utils.interpolate_with_threshold code. + interp = utils.interpolate_with_threshold_2D + + max_samples_interp = options.max_samples_interp_nose(fps) + + nose_xi = interp(nose_x, threshold=max_samples_interp) + nose_yi = interp(nose_y, threshold=max_samples_interp) + neck_xi = interp(neck_x, threshold=max_samples_interp) + neck_yi = interp(neck_y, threshold=max_samples_interp) + #---------------------------------------------------------------------- + + # Step 2: Calculation of the bend angles + #--------------------------------------- + nose_bends = self.h__computeNoseBends( + nose_xi, nose_yi, neck_xi, neck_yi) + + # Step 3: + #--------------------------------------- + [nose_amps, nose_freqs] = \ + self.h__foragingData(fps, nose_bends, + options.min_nose_window_samples(fps)) + + if ventral_mode > 1: + nose_amps = -nose_amps + nose_freqs = -nose_freqs + + self.amplitude = nose_amps + self.angle_speed = nose_freqs + + timer.toc('locomotion.foraging_bends') + + def h__computeNoseBends(self, nose_x, nose_y, neck_x, neck_y): + """ + Compute the difference in angles between the nose and neck (really the + head tip and head base). + + Parameters + ---------- + nose_x: [4 x n_frames] + nose_y: [4 x n_frames] + neck_x: [4 x n_frames] + neck_y: [4 x n_frames] + + Returns + ------- + nose_bends_d + + Notes + --------------------------------------- + Formerly nose_bends_d = h__computeNoseBends(nose_x,nose_y,neck_x,neck_y) + + """ + + nose_angles = self.h__computeAvgAngles(nose_x, nose_y) + neck_angles = self.h__computeAvgAngles(neck_x, neck_y) + + # TODO: These three should be a method, calculating the difference + # in angles and ensuring all results are within +/- 180 + nose_bends_d = (nose_angles - neck_angles) * (180 / np.pi) + + # Suppress warnings so we can compare a numpy array that may contain NaNs + # without triggering a Runtime Warning + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + nose_bends_d[nose_bends_d > 180] -= 360 + nose_bends_d[nose_bends_d < -180] += 360 + + return nose_bends_d + + def h__computeAvgAngles(self, x, y): + """ + Take average difference between successive x and y skeleton points, + then compute the arc tangent from those averages. + + Parameters + --------------------------------------- + x : m x n float numpy array + m is the number of skeleton points + n is the number of frames + y : m x n float numpy array + (Same as x) + + Returns + --------------------------------------- + 1-d float numpy array of length n + The angles + + Notes + --------------------------------------- + Simple helper for h__computeNoseBends + + """ + # Suppress RuntimeWarning: Mean of empty slice + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + avg_diff_x = np.nanmean(np.diff(x, n=1, axis=0), axis=0) + avg_diff_y = np.nanmean(np.diff(y, n=1, axis=0), axis=0) + + angles = np.arctan2(avg_diff_y, avg_diff_x) + + return angles + + def h__foragingData(self, fps, nose_bend_angle_d, min_win_size): + """ + Compute the foraging amplitude and angular speed. + + Parameters + ---------- + fps : + nose_bend_angle_d : [n_frames x 1] + min_win_size : (scalar) + + Returns + --------------------------------------- + amplitudes : [1 x n_frames] + speeds : [1 x n_frames] + + Notes + --------------------------------------- + Formerly [amps,speeds] = h__foragingData(nose_bend_angle_d, + min_win_size, fps) + + """ + if min_win_size > 0: + # Clean up the signal with a gaussian filter. + gauss_filter = utils.gausswin(2 * min_win_size + 1) / min_win_size + nose_bend_angle_d = filters.convolve1d(nose_bend_angle_d, + gauss_filter, + cval=0, + mode='constant') + + # Remove partial data frames ... + nose_bend_angle_d[:min_win_size] = np.NaN + nose_bend_angle_d[-min_win_size:] = np.NaN + + # Calculate amplitudes + amplitudes = self.h__getAmplitudes(nose_bend_angle_d) + assert(np.shape(nose_bend_angle_d) == np.shape(amplitudes)) + + # Calculate angular speed + # Compute the speed centered between the back and front foraging movements. + # + # TODO: fix the below comments to conform to 0-based indexing + # I believe I've fixed the code already. - @MichaelCurrie + # 1 2 3 + # d1 d2 d1 = 2 - 1, d2 = 3 - 2 + # x assign to x, avg of d1 and d2 + + #???? - why multiply and not divide by fps???? + + d_data = np.diff(nose_bend_angle_d) * fps + speeds = np.empty(amplitudes.size) * np.NaN + # This will leave the first and last frame's speed as NaN: + speeds[1:-1] = (d_data[:-1] + d_data[1:]) / 2 + + # Propagate NaN for speeds to amplitudes + amplitudes[np.isnan(speeds)] = np.NaN + + return amplitudes, speeds + + def h__getAmplitudes(self, nose_bend_angle_d): + """ + In between all sign changes, get the maximum or minimum value and + apply to all indices that have the same sign within the stretch + + Parameters + --------------------------------------- + nose_bend_angle_d : 1-d numpy array of length n_frames + + Returns + --------------------------------------- + 1-d numpy array of length n_frames + + Notes + --------------------------------------- + Formerly amps = h__getAmps(nose_bend_angle_d): + + NOTE: This code is very similar to wormKinks + + Example + --------------------------------------- + >>> h__getAmps(np.array[1, 2, 3, 2, 1, -1, -2, -1, 1, 2, 2, 5]) + array[3, 3, 3, 3, 3, -2, -2, -2, 5, 5, 5, 5] + (indentation is used here to line up the returned array for clarity) + + """ + n_frames = len(nose_bend_angle_d) + + # Suppress warnings related to finding the sign of a numpy array that + # may contain NaN values. + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + data_sign = np.sign(nose_bend_angle_d) + sign_change_I = np.flatnonzero(data_sign[1:] != data_sign[:-1]) + + start_I = np.concatenate([[0], sign_change_I + 1]) + stop_I = np.concatenate([sign_change_I, [n_frames - 1]]) + + # All NaN values are considered sign changes, + # but we don't want them considered that way. + # So create a mask of items to be removed: + mask = np.isnan(nose_bend_angle_d[start_I]) + # Keep only those items NOT in the mask: + start_I = start_I[np.flatnonzero(~mask)] + stop_I = stop_I[np.flatnonzero(~mask)] + + # Python's array index notation requires that we specify one PAST the + # index of the last entry in the "run" + end_I = stop_I + 1 + + amps = np.empty(n_frames) * np.NaN + # For each chunk, get max or min, depending on whether the data is positive + # or negative ... + for i_chunk in range(len(start_I)): + cur_start = start_I[i_chunk] + cur_end = end_I[i_chunk] + + if nose_bend_angle_d[cur_start] > 0: + amps[cur_start:cur_end] = max( + nose_bend_angle_d[cur_start:cur_end]) + else: + amps[cur_start:cur_end] = min( + nose_bend_angle_d[cur_start:cur_end]) + + return amps + + @classmethod + def from_disk(cls, foraging_ref): + + self = cls.__new__(cls) + + self.amplitude = utils._extract_time_from_disk( + foraging_ref, 'amplitude') + self.angle_speed = utils._extract_time_from_disk( + foraging_ref, 'angleSpeed') + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + return utils.correlation( + self.amplitude, + other.amplitude, + 'locomotion.foraging.amplitude') and utils.correlation( + self.angle_speed, + other.angle_speed, + 'locomotion.foraging.angle_speed') + +#%% + + +#============================================================================== +# New Feature Organization +#============================================================================== +# self.crawling_bends = locomotion_bends.LocomotionCrawlingBends( +# features_ref, +# nw.angles, +# self.motion_events.is_paused, +# video_info.is_segmented) +# +# self.foraging_bends = locomotion_bends.LocomotionForagingBends( +# features_ref, +# video_info.is_segmented, +# video_info.ventral_mode) + +# locomotion.foraging_bends.amplitude +# locomotion.foraging_bends.angle_speed +# locomotion.crawling_bends.head.amplitude +# locomotion.crawling_bends.midbody.amplitude +# locomotion.crawling_bends.tail.amplitude +# locomotion.crawling_bends.head.frequency +# locomotion.crawling_bends.midbody.frequency +# locomotion.crawling_bends.tail.frequency + +class ForagingBends(Feature): + + """ + temporary feature: locomotion.foraging_bends + + Attributes + ---------- + amplitude + angle_speed + + + Methods + ------- + __init__ + h__computeNoseBends + h__computeAvgAngles + h__interpData + h__getNoseInterpolationIndices + h__foragingData + h__getAmps + + Notes + --------------------------------------- + Formerly +segworm/+features/@locomotion/getForaging + + Originally, part of wormBends.m + + """ + + def __init__(self, wf, feature_name): + """ + Initialize an instance of LocomotionForagingBends + + Parameters + ---------- + nw: NormalizedWorm instance + is_segmented_mask: boolean numpy array [1 x n_frames] + ventral_mode: int + 0, 1, or 2 depending on the orientation of the worm. + + """ + + #features_ref, is_segmented_mask, ventral_mode + + self.name = feature_name + + options = wf.options.locomotion.foraging_bends + video_info = wf.video_info + fps = video_info.fps + nw = wf.nw + ventral_mode = video_info.ventral_mode + + # TODO: Why don't we use this anymore????? + is_segmented_mask = video_info.is_segmented + + timer = wf.timer + timer.tic() + + # self.amplitude = None # DEBUG + # self.angleSpeed = None # DEBUG + + nose_x, nose_y = \ + nw.get_partition('head_tip', + data_key='skeleton', + split_spatial_dimensions=True) + + neck_x, neck_y = \ + nw.get_partition('head_base', + data_key='skeleton', + split_spatial_dimensions=True) + + # TODO: Add "reversed" and "interpolated" options to the get_partition + # function, to replace the below blocks of code! + #---------------------------------------------------------------------- + + # We need to flip the orientation (i.e. reverse the entries along the + # first, or skeleton index, axis) for angles and consistency with old + # code: + nose_x = nose_x[::-1, :] + nose_y = nose_y[::-1, :] + neck_x = neck_x[::-1, :] + neck_y = neck_y[::-1, :] + + # Step 1: Interpolation of skeleton indices + #--------------------------------------- + # TODO: ensure that we are excluding the points at the beginning + # and ending of the second dimension (the frames list) of nose_x, etc. + # from being interpolated. (this was a step in + # h__getNoseInterpolationIndices, that we no longer have since I've + # put the interpolation code into + # utils.interpolate_with_threshold_2D instead. But we + # might be okay, since the beginning and end are going to be left alone + # since I've set left=np.NaN and right=np.NaN in the underlying + # utils.interpolate_with_threshold code. + interp = utils.interpolate_with_threshold_2D + + max_samples_interp = options.max_samples_interp_nose(fps) + + nose_xi = interp(nose_x, threshold=max_samples_interp) + nose_yi = interp(nose_y, threshold=max_samples_interp) + neck_xi = interp(neck_x, threshold=max_samples_interp) + neck_yi = interp(neck_y, threshold=max_samples_interp) + #---------------------------------------------------------------------- + + # Step 2: Calculation of the bend angles + #--------------------------------------- + nose_bends = self.h__computeNoseBends( + nose_xi, nose_yi, neck_xi, neck_yi) + + # Step 3: + #--------------------------------------- + [nose_amps, nose_freqs] = \ + self.h__foragingData(fps, nose_bends, + options.min_nose_window_samples(fps)) + + if ventral_mode == 2: + nose_amps = -nose_amps + nose_freqs = -nose_freqs + + self.amplitude = nose_amps + self.angle_speed = nose_freqs + + timer.toc('locomotion.foraging_bends') + + def h__computeNoseBends(self, nose_x, nose_y, neck_x, neck_y): + """ + Compute the difference in angles between the nose and neck (really the + head tip and head base). + + Parameters + ---------- + nose_x: [4 x n_frames] + nose_y: [4 x n_frames] + neck_x: [4 x n_frames] + neck_y: [4 x n_frames] + + Returns + ------- + nose_bends_d + + Notes + --------------------------------------- + Formerly nose_bends_d = h__computeNoseBends(nose_x,nose_y,neck_x,neck_y) + + """ + + nose_angles = self.h__computeAvgAngles(nose_x, nose_y) + neck_angles = self.h__computeAvgAngles(neck_x, neck_y) + + # TODO: These three should be a method, calculating the difference + # in angles and ensuring all results are within +/- 180 + nose_bends_d = (nose_angles - neck_angles) * (180 / np.pi) + + # Suppress warnings so we can compare a numpy array that may contain NaNs + # without triggering a Runtime Warning + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + nose_bends_d[nose_bends_d > 180] -= 360 + nose_bends_d[nose_bends_d < -180] += 360 + + return nose_bends_d + + def h__computeAvgAngles(self, x, y): + """ + Take average difference between successive x and y skeleton points, + then compute the arc tangent from those averages. + + Parameters + ---------- + x : m x n float numpy array + m is the number of skeleton points + n is the number of frames + y : m x n float numpy array + (Same as x) + + Returns + ------- + 1-d float numpy array of length n + The angles + + Notes + --------------------------------------- + Simple helper for h__computeNoseBends + + """ + # Suppress RuntimeWarning: Mean of empty slice + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + avg_diff_x = np.nanmean(np.diff(x, n=1, axis=0), axis=0) + avg_diff_y = np.nanmean(np.diff(y, n=1, axis=0), axis=0) + + angles = np.arctan2(avg_diff_y, avg_diff_x) + + return angles + + def h__foragingData(self, fps, nose_bend_angle_d, min_win_size): + """ + Compute the foraging amplitude and angular speed. + + Parameters + ---------- + fps : + nose_bend_angle_d : [n_frames x 1] + min_win_size : (scalar) + + Returns + --------------------------------------- + amplitudes : [1 x n_frames] + speeds : [1 x n_frames] + + Notes + --------------------------------------- + Formerly [amps,speeds] = h__foragingData(nose_bend_angle_d, + min_win_size, fps) + + """ + if min_win_size > 0: + # Clean up the signal with a gaussian filter. + gauss_filter = utils.gausswin(2 * min_win_size + 1) / min_win_size + nose_bend_angle_d = filters.convolve1d(nose_bend_angle_d, + gauss_filter, + cval=0, + mode='constant') + + # Remove partial data frames ... + nose_bend_angle_d[:min_win_size] = np.NaN + nose_bend_angle_d[-min_win_size:] = np.NaN + + # Calculate amplitudes + amplitudes = self.h__getAmplitudes(nose_bend_angle_d) + assert(np.shape(nose_bend_angle_d) == np.shape(amplitudes)) + + # Calculate angular speed + # Compute the speed centered between the back and front foraging movements. + # + # 0 1 2 + # d1 d2 d1 = 1 - 0, d2 = 2 - 1 + # x assign to x, avg of d1 and d2 + + #???? - why multiply and not divide by fps???? + + d_data = np.diff(nose_bend_angle_d) * fps + speeds = np.empty(amplitudes.size) * np.NaN + # This will leave the first and last frame's speed as NaN: + speeds[1:-1] = (d_data[:-1] + d_data[1:]) / 2 + + # Propagate NaN for speeds to amplitudes + amplitudes[np.isnan(speeds)] = np.NaN + + return amplitudes, speeds + + def h__getAmplitudes(self, nose_bend_angle_d): + """ + In between all sign changes, get the maximum or minimum value and + apply to all indices that have the same sign within the stretch + + Parameters + --------------------------------------- + nose_bend_angle_d : 1-d numpy array of length n_frames + + Returns + --------------------------------------- + 1-d numpy array of length n_frames + + Notes + --------------------------------------- + Formerly amps = h__getAmps(nose_bend_angle_d): + + NOTE: This code is very similar to wormKinks + + Example + --------------------------------------- + >>> h__getAmps(np.array[1, 2, 3, 2, 1, -1, -2, -1, 1, 2, 2, 5]) + array[3, 3, 3, 3, 3, -2, -2, -2, 5, 5, 5, 5] + (indentation is used here to line up the returned array for clarity) + + """ + n_frames = len(nose_bend_angle_d) + + # Suppress warnings related to finding the sign of a numpy array that + # may contain NaN values. + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + data_sign = np.sign(nose_bend_angle_d) + sign_change_I = np.flatnonzero(data_sign[1:] != data_sign[:-1]) + + start_I = np.concatenate([[0], sign_change_I + 1]) + stop_I = np.concatenate([sign_change_I, [n_frames - 1]]) + + # All NaN values are considered sign changes, + # but we don't want them considered that way. + # So create a mask of items to be removed: + mask = np.isnan(nose_bend_angle_d[start_I]) + # Keep only those items NOT in the mask: + start_I = start_I[np.flatnonzero(~mask)] + stop_I = stop_I[np.flatnonzero(~mask)] + + # Python's array index notation requires that we specify one PAST the + # index of the last entry in the "run" + end_I = stop_I + 1 + + amps = np.empty(n_frames) * np.NaN + # For each chunk, get max or min, depending on whether the data is positive + # or negative ... + for i_chunk in range(len(start_I)): + cur_start = start_I[i_chunk] + cur_end = end_I[i_chunk] + + if nose_bend_angle_d[cur_start] > 0: + amps[cur_start:cur_end] = max( + nose_bend_angle_d[cur_start:cur_end]) + else: + amps[cur_start:cur_end] = min( + nose_bend_angle_d[cur_start:cur_end]) + + return amps + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.amplitude = utils.get_nested_h5_field( + wf.h, ['locomotion', 'bends', 'foraging', 'amplitude']) + self.angle_speed = utils.get_nested_h5_field( + wf.h, ['locomotion', 'bends', 'foraging', 'angleSpeed']) + + return self + +# def __eq__(self, other): +# return utils.correlation(self.amplitude, other.amplitude, 'locomotion.foraging.amplitude') and \ +# utils.correlation(self.angle_speed, other.angle_speed, 'locomotion.foraging.angle_speed') + + +class ForagingAmplitude(Feature): + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'locomotion.foraging_bends').amplitude + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class ForagingAngleSpeed(Feature): + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'locomotion.foraging_bends').angle_speed + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class CrawlingBend(Feature, BendHelper): + + """ + Locomotion Crawling Bends Feature. + + Notes + --------------------------------------- + Formerly +segworm/+features/@locomotion/getLocomotionBends + + Originally, part of wormBends.m + + + Note from Ev Yemini on Setup Options + --------------------------------------- + Empirically I've found the values below achieve good signal. + + Furthermore: + The body bend frequency is much easier to see (than foraging). The N2 + signal is clearly centered around 1/3Hz in both the literature and + through visual inspection. + + I chose a high-frequency threshold of 4 frames. With 4 frames a 3-frame + tick, resulting from segmentation noise, will be diluted by the + additional frame. + + TODO: Move this to a different location and reference in the code + Nature Methods Description + --------------------------------------- + + Worm crawling is expressed as both an amplitude and frequency + (Supplementary Fig. 4e). We measure these features instantaneously at + the head, midbody, and tail. The amplitude and frequency are signed + negatively whenever the worm’s ventral side is contained within the + concave portion of its instantaneous bend. + + Crawling is only measured during forward and backward motion states. + The worm bend mean angles (described in the section on “Posture”) show + a roughly periodic signal as the crawling wave travels along the worm’s + body. This wave can be asymmetric due to differences in dorsal-ventral + flexibility or simply because the worm is executing a turn. Moreover + the wave dynamics can change abruptly to speed up or slow down. + Therefore, the signal is only roughly periodic and we measure its + instantaneous properties. + + Worm bends are linearly interpolated across unsegmented frames. The + motion states criteria (described earlier in this section) guarantee + that interpolation is no more than 1/4 of a second long. For each + frame, we search both backwards and forwards for a zero crossing in the + bend angle mean – the location where the measured body part (head, + midbody, or tail) must have hit a flat posture (a supplementary bend + angle of 0°). This guarantees that we are observing half a cycle for + the waveform. Crawling is bounded between 1/30Hz (a very slow wave that + would not resemble crawling) and 1Hz (an impossibly fast wave on agar). + + If the window between zero crossings is too small, the nearest zero + crossing is assumed to be noise and we search for the next available + zero crossing in its respective direction. If the window is too big, + crawling is marked undefined at the frame. + + Once an appropriate window has been found, the window is extended in + order to center the frame and measure instantaneous crawling by + ensuring that the distance on either side to respective zero crossings + is identical. If the distances are not identical, the distance of the + larger side is used in place of the zero-crossing distance of the + smaller side in order to expand the small side and achieve a symmetric + window, centered at the frame of interest. + + We use a Fourier transform to measure the amplitude and frequency + within the window described above. The largest peak within the + transform is chosen for the crawling amplitude and frequency. If the + troughs on either side of the peak exceed 1/2 its height, the peak is + rejected for being unclear and crawling is marked as undefined at the + frame. Similarly, if the integral between the troughs is less than half + the total integral, the peak is rejected for being weak. + + + """ + + #bend_names = ['head', 'midbody', 'tail'] + + def __init__(self, wf, feature_name, bend_name): + """ + Compute the temporal bending frequency at the head, midbody, and tail. + + Parameters: + ----------- + features_ref : + bend_angles : numpy.array + - [49 x n_frames] + is_paused : numpy.array + - [1 x n_frames] + Whether or not the worm is considered to be paused during the frame + is_segmented_mask : [1 x n_frames] + + """ + + #features_ref, bend_angles, is_paused, is_segmented_mask + + self.name = feature_name + + options = wf.options.locomotion.crawling_bends + video_info = wf.video_info + fps = video_info.fps + is_segmented_mask = video_info.is_segmented + is_paused = self.get_feature( + wf, 'locomotion.motion_events.is_paused').value + bend_angles = wf.nw.angles + + timer = wf.timer + timer.tic() + + # Special Case: No worm data. + #------------------------------------ + if ~np.any(is_segmented_mask): + self.amplitude = None + self.frequency = None + return + + # Find the mean bend angle for the current partition, across all frames + s = slice(*options.bends_partitions[bend_name]) + + # Suppress RuntimeWarning: Mean of empty slice + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + avg_bend_angles = np.nanmean(bend_angles[s, :], axis=0) + + # interpolate if: + # not all are segmented - if all present we don't need to interpolate + # some are segmented - segmented data provides basis for interpolation + if not(np.all(is_segmented_mask)) and np.any(is_segmented_mask): + avg_bend_angles = utils.interpolate_with_threshold(avg_bend_angles) + + bound_info = CrawlingBendsBoundInfo( + avg_bend_angles, is_paused, options, fps) + + [amplitude, frequency] = self.h__getBendData(avg_bend_angles, + bound_info, + options, + fps) + + self.amplitude = amplitude + self.frequency = frequency + # setattr(self,cur_partition_name,LocomotionBend(amplitude,frequency,cur_partition_name)) + + timer.toc('locomotion.crawling_bends') + +# def __eq__(self, other): +# +# #merge_nans=True - utils.separated_peaks works slightly differently +# #so merge_nans is true here, for now. The utils function works correctly +# #and the old version works incorrectly but was convoluted enough that +# #it was hard to replicate +# +# return utils.correlation(self.value, other.v, +# 'locomotion.bends.' + self.name + '.amplitude', +# merge_nans=True) and \ +# utils.correlation(self.frequency, other.frequency, +# 'locomotion.bends.' + self.name + '.frequency', +# merge_nans=True) + + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + self = cls.__new__(cls) + self.name = feature_name + self.amplitude = utils.get_nested_h5_field( + wf.h, ['locomotion', 'bends', bend_name, 'amplitude']) + self.frequency = utils.get_nested_h5_field( + wf.h, ['locomotion', 'bends', bend_name, 'frequency']) + + return self + + +class CrawlingBendsBoundInfo(object): + + """ + This class is used by LocomotionCrawlingBends. + + Attributes + ---------- + back_zeros_I : + front_zeros_I : + left_bounds : + right_bounds : + half_distances : + + + """ + + def __init__(self, avg_bend_angles, is_paused, options, fps): + + # TODO: This needs to be cleaned up ... - @JimHokanson + min_number_frames_for_bend = round(options.min_time_for_bend * fps) + max_number_frames_for_bend = round(options.max_time_for_bend * fps) + + [back_zeros_I, front_zeros_I] = \ + self.h__getBoundingZeroIndices(avg_bend_angles, + min_number_frames_for_bend) + + n_frames = len(avg_bend_angles) + + # Go left and right, and get the + left_distances = np.array(range(n_frames)) - back_zeros_I + right_distances = front_zeros_I - np.array(range(n_frames)) + half_distances = np.maximum(left_distances, right_distances) + + left_bounds = np.array(range(n_frames)) - half_distances + + #+1 for slicing to be inclusive of the right bound + right_bounds = np.array(range(n_frames)) + half_distances + 1 + + self.back_zeros_I = back_zeros_I + self.front_zeros_I = front_zeros_I + self.left_bounds = left_bounds + self.right_bounds = right_bounds + self.half_distances = half_distances + + # Compute conditions by which we will ignore frames: + # ------------------------------------------------- + # 1) frame is not bounded on both sides by a sign change + #- avg_bend_angles is NaN, this will only happen on the edges because we + # interpolate over the other frames ... (we just don't extrapolate) + #- the sign change region is too large + #- the bounds we settle on exceed the data region + #- mode segmentation determined the frame was a paused frame + # + # + #??? - what about large NaN regions, are those paused regions??? + + # MRC code placed restriction on half distance, not on the full distance + # This is still left in place below + # Should be 2*half_distances > max_number_frames_for_bend + + self.is_bad_mask = \ + (back_zeros_I == -1) | \ + (front_zeros_I == -1) | \ + np.isnan(avg_bend_angles) | \ + (half_distances > max_number_frames_for_bend) | \ + (left_bounds < 0) | \ + (right_bounds > n_frames) | \ + is_paused + + def h__getBoundingZeroIndices( + self, + avg_bend_angles, + min_number_frames_for_bend): + """ + The goal of this function is to bound each index of avg_bend_angles by + sign changes. + + + Parameters: + ----------- + avg_bend_angles : [1 x n_frames] + min_number_frames_for_bend : int + The minimum size of the data window + + Returns + ---------------------- + back_zeros_I : [1 x n_frames] + For each frame, this specifies a preceding frame in which a + change in the bend angle occurs. Invalid entries are + indicated by -1. + front_zeros_I : [1 x n_frames] + + Notes + ---------------------- + Formerly [back_zeros_I,front_zeros_I] = \ + h__getBoundingZeroIndices(avg_bend_angles,min_number_frames_for_bend) + + """ + + # Getting sign change indices ... + # --------------------------------------- + # The old code found sign changes for every frame, even though + # the sign changes never changed. Instead we find all sign changes, + # and then for each frame know which frame to the left and right + # have sign changes. We do this in such a way so that if we need to + # look further to the left or right, it is really easy to get the + # next answer. In other words, if we are bounded by the 3rd and 4th sign + # change, and we are going towards the 3rd sign change, then if the + # 3rd sign change doesn't work, we can go to the 2nd sign change index, + # not by searching the data array, but by getting the 2nd element of + # the sign change index array. + + with np.errstate(invalid='ignore'): + sign_change_mask = np.sign(avg_bend_angles[:-1]) != \ + np.sign(avg_bend_angles[1:]) + + sign_change_I = np.flatnonzero(sign_change_mask) + n_sign_changes = len(sign_change_I) + n_frames = len(avg_bend_angles) + + if n_sign_changes == 0: + # no changes of sign return two zeros arrays + return [np.zeros(n_frames), np.zeros(n_frames)] + + """ + To get the correct frame numbers, we need to do the following + depending on whether or not the bound is the left (backward) + bound or the right (forward) bound. + + Note from @JimHokanson: I haven't really thought through why + this is, but it mimics the old code. + + for left bounds - at sign changes - don't subtract or add + for right bounds - we need to add 1 + + + Let's say we have sign changes at indices 3 6 9 + What we need ... + 1 2 3 4 5 6 7 9 10 Indices + Left = 0 0 0 3 3 3 6 6 6 - at 4, the left sign change is at 3 + Right = 4 4 4 7 7 7 10 10 0 - at 4, the right sign change is at 7 + + NOTE: The values above are the final indices or values, but instead we + want to work with the indices, so we need: + + 1 2 3 4 5 6 7 9 10 Indices + Left = 0 0 0 1 1 1 2 2 2 - left_sign_change_I + Right = 1 1 1 2 2 2 3 3 3 - right_sign_change_I + + we also need: + left_values = [3 6 9] #the sign change indices + right_values = [4 7 10] #+1 + + So this says: + left_sign_change_I(7) => 2 + left_values(2) => 6, our sign change is at 6 + + Let's say we need to expand further to the left, then we take + left_sign_change_I(7) - 1 => 1 + left_values(1) => 3, our new sign change is at 3 + + Further: + left_sign_change_I(7) - 2 => 0 + We've gone too far, nothing at index 0, set to invalid + """ + + # For each element, determine the indices to the left and right of the + # element at which a sign change occurs. + + BAD_INDEX_VALUE = -1 + + # For each element in the array, these values indicate which + # sign change index to use ... + left_sign_change_I = np.zeros(n_frames) + + left_sign_change_I[sign_change_I + 1] = 1 + # We increment at values to the right of the sign changes + left_sign_change_I = left_sign_change_I.cumsum() - 1 + + # NOTE: We need to do this after the cumsum :/ + left_sign_change_I[:sign_change_I[0]] = BAD_INDEX_VALUE + # The previous line is a little Matlab trick in which + # something like: + # 0 1 0 1 0 0 1 0 0 <= sign change indices + # 0 1 2 3 4 5 6 7 8 <= indices + # becomes: + # -1 0 0 1 1 1 2 2 2 <= -1 is off limits, values are inclusive + # + # so now at each frame, we get the index of the value that + # is to the left. + # + # From above; sign_change_I = [0 3 6] + # + # So at index 5, the next sign change is at sign_change_I[left_change_I[5]] + # or sign_change_I[1] => 3 + + # This does: + # 0 1 0 1 0 0 1 0 0 <= sign change indices + # 0 1 2 3 4 5 6 7 8 <= indices + # 0 0 1 1 2 2 2 -1 -1 <= indices of sign change to right + right_sign_change_I = np.zeros(n_frames) + right_sign_change_I[sign_change_I[:-1] + 1] = 1 + right_sign_change_I[0] = 1 + right_sign_change_I = right_sign_change_I.cumsum() - 1 + # We must have nothing to the right of the last change: + right_sign_change_I[sign_change_I[-1] + 1:] = BAD_INDEX_VALUE + + # Indices that each left_sign_change_I or right_sign_change_I points to + left_values = sign_change_I + right_values = sign_change_I + 1 # By definition + #---------------------------------------------------------------- + + back_zeros_I = np.zeros(n_frames) + back_zeros_I[:] = BAD_INDEX_VALUE + front_zeros_I = np.zeros(n_frames) + back_zeros_I[:] = BAD_INDEX_VALUE + + for iFrame in range(n_frames): + cur_left_index = left_sign_change_I[iFrame] + cur_right_index = right_sign_change_I[iFrame] + + if cur_left_index == BAD_INDEX_VALUE or cur_right_index == BAD_INDEX_VALUE: + continue + + # Convert from float to int + cur_left_index = int(cur_left_index) + cur_right_index = int(cur_right_index) + + back_zero_I = left_values[cur_left_index] + front_zero_I = right_values[cur_right_index] + + use_values = True + + # Expand the zero-crossing window. + #---------------------------------- + # Note from @JimHokanson: + # + # TODO: Fix and move this code to old config + # + # General problem, we specify a minimum acceptable window size, + # and the old code needlessly expands the window past this point + # by doing the following comparison: + # + # - distance from right to left > min_window_size? + # + # The following code centers on 2x the larger of the following gaps: + # + # - distance from left to center + # - distance from right to center + # + # So we should check if either of these is half ot the + # required width. + # + # half-window sizes: + # left_window_size = iFrame - back_zero_I + # right_window_size = front_zero_I - iFrame + # + # so in reality we should use: + # + # front_zero_I - iFrame < min_number_frames_for_bend/2 and + # iFrame - back_zero_I < min_number_frames_for_bend/2 + # + # By not doing this, we overshoot the minimum window size that + # we need to use. Consider window sizes that are in terms of + # the minimum window size. + # + # i.e. 0.5w means the left or right window is half min_number_frames_for_bend + # + # Consider we have: + # 0.5w left + # 0.3w right + # + # total 0.8w => not at 1w, thus old code should expand + # + # But in reality, if we stopped now we would be at twice 0.5w + + while ( + front_zero_I - + back_zero_I + + 1) < min_number_frames_for_bend: + # Expand the smaller of the two windows + # ------------------------------------- + # left_window_size right_window_size + if (iFrame - back_zero_I) < (front_zero_I - iFrame): + # Expand to the left: + cur_left_index = cur_left_index - 1 + if cur_left_index == BAD_INDEX_VALUE: + use_values = False + break + back_zero_I = left_values[cur_left_index] + else: + # Expand to the right: + cur_right_index = cur_right_index + 1 + if cur_right_index >= n_sign_changes: + use_values = False + break + front_zero_I = right_values[cur_right_index] + + if use_values: + back_zeros_I[iFrame] = back_zero_I + front_zeros_I[iFrame] = front_zero_I + + return [back_zeros_I, front_zeros_I] + + def __repr__(self): + return utils.print_object(self) + + +class BendAmplitude(Feature): + """ + Feature: locomotion.crawling_bends.[bend_name].amplitude + """ + + def __init__(self, wf, feature_name, bend_name): + parent_name = generic_features.get_parent_feature_name(feature_name) + self.name = feature_name + self.value = self.get_feature(wf, parent_name).amplitude + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + return cls(wf, feature_name, bend_name) + + def __eq__(self, other): + + return utils.correlation(self.value, other.value, + self.name, + merge_nans=True) + + +class BendFrequency(Feature): + """ + Feature: locomotion.crawling_bends.[bend_name].frequency + """ + + def __init__(self, wf, feature_name, bend_name): + parent_name = generic_features.get_parent_feature_name(feature_name) + self.name = feature_name + self.value = self.get_feature(wf, parent_name).frequency + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + return cls(wf, feature_name, bend_name) + + def __eq__(self, other): + + return utils.correlation(self.value, other.value, + self.name, + merge_nans=True) + diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_features.py b/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_features.py new file mode 100755 index 00000000..84eac052 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_features.py @@ -0,0 +1,490 @@ +# -*- coding: utf-8 -*- +""" +Locomotion features + +Contains Processing code for: +----------------------------- +locomotion.velocity +locomotion.motion_events + +""" + +import numpy as np + +from .. import utils + +from .generic_features import Feature, get_parent_feature_name +from . import events +# To avoid conflicting with variables named 'velocity', we +# import this as 'velocity_module': +from . import velocity as velocity_module + + +class LocomotionVelocityElement(object): + """ + + This can be deleted when we move over to the new feature organization + + This class is a simple container class for a velocity element. + + Attributes + ---------- + name : string + speed : numpy array + direction : numpy array + + See Also + -------- + LocomotionVelocity + + """ + + def __init__(self, name, speed, direction): + self.name = name + self.speed = speed + self.direction = direction + + def __eq__(self, other): + return utils.correlation( + self.speed, + other.speed, + 'locomotion.velocity.' + + self.name + + '.speed') and utils.correlation( + self.direction, + other.direction, + 'locomotion.velocity.' + + self.name + + '.direction') + + def __repr__(self): + return utils.print_object(self) + + @classmethod + def from_disk(cls, parent_ref, name): + + self = cls.__new__(cls) + + self.name = name + self.speed = utils._extract_time_from_disk(parent_ref, 'speed') + self.direction = utils._extract_time_from_disk(parent_ref, 'direction') + + return self + + +#============================================================================== +# NEW CODE +#============================================================================== + + +class AverageBodyAngle(Feature): + + """ + Temporary Feature: locomotion.velocity.avg_body_angle + + This is a temporary feature that is needed for + locomotion.velocity features + + Description + ----------- + For the "body" parition, compute angles between each point + on the skeleton and average them together to compute a single value for + each frame. + + See Also + -------- + LocomotionVelocitySection + velocity_module.get_partition_angles + + """ + + def __init__(self, wf, feature_name): + nw = wf.nw + self.name = feature_name + self.value = velocity_module.get_partition_angles(nw, + partition_key='body', + data_key='skeleton', + head_to_tail=False) + + @classmethod + def from_schafer_file(cls, wf, feature_name): + # TODO: Move this into a function in generic_features + self = cls.__new__(cls) + self.name = feature_name + self.value = None + self.missing_from_disk = True + return self + + +class LocomotionVelocitySection(Feature): + + """ + Temporary Feature: + + This is a generic temporary class that implements: + locomotion.velocity.head_tip, + locomotion.velocity.head, etc. + + This is the parent feature which computes and then temporarily holds + attributes for more specific child features. + + Attributes + ---------- + speed : VelocitySpeed + direction : VelocityDirection + + + """ + + def __init__(self, wf, feature_name, segment): + """ + Parameters + ---------- + segment : string + Options include: + - head_tip + - head + - midbody + - tail + - tail_tip + + Feature Dependencies + -------------------- + - locomotion.velocity.avg_body_angle + + See Also + -------- + - velocity_module.compute_speed # This is the function that + # does all the work + + """ + + self.name = feature_name + + # Unpacking + #------------------------- + nw = wf.nw + ventral_mode = nw.video_info.ventral_mode + fps = nw.video_info.fps + + # TODO: I'd like this to be inside the class + locomotion_options = wf.options.locomotion + + avg_body_angle = self.get_feature( + wf, 'locomotion.velocity.avg_body_angle').value + + # Options by segment + #-------------------------------------------------- + if segment == 'head_tip' or segment == 'tail_tip': + sample_time = locomotion_options.velocity_tip_diff + else: + sample_time = locomotion_options.velocity_body_diff + + data_key = segment + if segment == 'midbody' and wf.options.mimic_old_behaviour: + data_key = 'old_midbody_velocity' + + # The actual computation + #---------------------- + # If we ever move nw features into the self.get_feature approach, this + # would be tougher to replicate + #i.e. x = self.get_feature(nw,'skeleton_x') + x, y = nw.get_partition(data_key, 'skeleton', True) + # The real work ... + speed, direction = velocity_module.compute_speed(fps, x, y, + avg_body_angle, + sample_time, + ventral_mode)[0:2] + + self.speed = speed + self.direction = direction + + @classmethod + def from_schafer_file(cls, wf, feature_name, segment): + self = cls.__new__(cls) + self.name = feature_name + + # These particular segments were renamed internally to follow naming + # conventions. The other ones were fine + if segment == 'head_tip': + old_key = 'headTip' + elif segment == 'tail_tip': + old_key = 'tailTip' + else: + old_key = segment + + temp = utils.get_nested_h5_field( + wf.h, ['locomotion', 'velocity', old_key], resolve_value=False) + + self.speed = utils.get_nested_h5_field(temp, 'speed') + self.direction = utils.get_nested_h5_field(temp, 'direction') + + return self + + def __eq__(self, other): + # I'm not sure what we want to do for these temporary features ... + return True + + +class VelocitySpeed(Feature): + """ + Feature: locomotion.velocity.[segment].speed + + This feature is actually calculated via LocomotionVelocitySection + """ + + def __init__(self, wf, feature_name, segment): + self.name = feature_name + parent_feature_name = get_parent_feature_name(feature_name) + self.value = self.get_feature(wf, parent_feature_name).speed + + @classmethod + def from_schafer_file(cls, wf, feature_name, segment): + return cls(wf, feature_name, segment) + + +class VelocityDirection(Feature): + """ + Feature: locomotion.velocity.[segment].speed + """ + + def __init__(self, wf, feature_name, segment): + self.name = feature_name + parent_feature_name = get_parent_feature_name(feature_name) + self.value = self.get_feature(wf, parent_feature_name).direction + + @classmethod + def from_schafer_file(cls, wf, feature_name, segment): + return cls(wf, feature_name, segment) + + +# New motion events code +#===================================================== + +""" + forward : open-worm-analysis-toolbox.features.events.EventListWithFeatures + paused : open-worm-analysis-toolbox.features.events.EventListWithFeatures + backward : open-worm-analysis-toolbox.features.events.EventListWithFeatures + mode : numpy.array + - shape num_frames + - Values are: + -1, backward locomotion + 0, no locomotion (the worm is paused) + 1, forward locomotion + """ + + +class MidbodyVelocityDistance(Feature): + """ + Temporary Feature: 'locomotion.velocity.mibdody.distance' + + Used for turns + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + + fps = wf.video_info.fps + + midbody_speed = self.get_feature( + wf, 'locomotion.velocity.midbody.speed').value + self.value = abs(midbody_speed / fps) + + @classmethod + def from_schafer_file(cls, wf, feature_name): + # We could calculate this but the features that need it are already + # calculated + self = cls.__new__(cls) + self.name = feature_name + self.value = None + self.missing_from_disk = True + return self + + +class MotionEvent(Feature): + + """ + Implements: + locomotion.motion_events.forward + locomotion.motion_events.backward + locomotion.motion_events.paused + """ + + def __init__(self, wf, feature_name, motion_type): + + self.name = feature_name + + fps = wf.video_info.fps + locomotion_options = wf.options.locomotion + + midbody_speed = self.get_feature( + wf, 'locomotion.velocity.midbody.speed').value + + skeleton_lengths = self.get_feature(wf, 'morphology.length').value + + # TODO: Move this to the video_info object + num_frames = len(midbody_speed) + + # Compute the midbody's "instantaneous" distance travelled at each frame, + # distance per second / (frames per second) = distance per frame + distance_per_frame = abs(midbody_speed / fps) + + # Interpolate the missing lengths. + #------------------------------------ + # TODO: This process should be saved as an intermediate feature + + skeleton_lengths = utils.interpolate_with_threshold( + skeleton_lengths, + locomotion_options.motion_codes_longest_nan_run_to_interpolate) + + # Set Event filter parameters + #-------------------------------- + # Make the speed and distance thresholds a fixed proportion of the + # worm's length at the given frame: + worm_speed_threshold = skeleton_lengths * \ + locomotion_options.motion_codes_speed_threshold_pct + worm_distance_threshold = skeleton_lengths * \ + locomotion_options.motion_codes_distance_threshold_pct + worm_pause_threshold = skeleton_lengths * \ + locomotion_options.motion_codes_pause_threshold_pct + + # Event Constraints ------- + # The minimum number of frames an event had to be taking place for + # to be considered a legitimate event + min_frames_threshold = \ + fps * locomotion_options.motion_codes_min_frames_threshold + # Maximum number of contiguous contradicting frames within the event + # before the event is considered to be over. + max_interframes_threshold = \ + fps * locomotion_options.motion_codes_max_interframes_threshold + + if motion_type == 'forward': + min_speed_threshold = worm_speed_threshold + max_speed_threshold = None + min_distance_threshold = worm_distance_threshold + elif motion_type == 'backward': + min_speed_threshold = None + max_speed_threshold = -worm_speed_threshold + min_distance_threshold = worm_distance_threshold + else: # paused + min_speed_threshold = -worm_pause_threshold + max_speed_threshold = worm_pause_threshold + min_distance_threshold = None + + # We will use EventFinder to determine when the + # event type "motion_type" occurred + ef = events.EventFinder() + + # "Space and time" constraints + ef.min_distance_threshold = min_distance_threshold + ef.max_distance_threshold = None # we are not constraining max dist + ef.min_speed_threshold = min_speed_threshold + ef.max_speed_threshold = max_speed_threshold + + # "Time" constraints + ef.min_frames_threshold = min_frames_threshold + ef.max_inter_frames_threshold = max_interframes_threshold + + event_list = ef.get_events(midbody_speed, distance_per_frame) + + # Take the start and stop indices and convert them to the structure + # used in the feature files + m_event = events.EventListWithFeatures( + fps, event_list, distance_per_frame, compute_distance_during_event=True) + + # This is temporary until a bug is fixed, at which point in time + # it will likeely need to move into the method directly above + m_event.num_video_frames = num_frames + + self.value = m_event + + # I think this is an equivalence + self.no_events = m_event.is_null + + @classmethod + def from_schafer_file(cls, wf, feature_name, motion_type): + + self = cls.__new__(cls) + self.name = feature_name + + ref = utils.get_nested_h5_field( + wf.h, ['locomotion', 'motion', motion_type], resolve_value=False) + + self.value = events.EventListWithFeatures.from_disk(ref, 'MRC') + self.no_events = self.value.is_null + + return self + + def __eq__(self, other): + # temp feature ... + return True + + +class MotionMode(Feature): + + """ + Temporary Feature: + + This is a temporary feature. For each frame it indicates whether that + frame is part of a forward, backward, or paused event. + + Some frames + may not be part of any event type, as is indicated by a NaN value. + + forward: 1 + backward: -1 + paused: 0 + """ + + frame_values = {'forward': 1, 'backward': -1, 'paused': 0} + + def __init__(self, wf, feature_name): + + self.name = feature_name + + # Hack to get num_frames + skeleton_lengths = self.get_feature(wf, 'morphology.length').value + + # TODO: Get this from video_info + num_frames = len(skeleton_lengths) + + # TODO: Can't we initialize NaN directly? + self.value = np.empty(num_frames, dtype='float') * np.NaN + + for key, value in self.frame_values.items(): + motion_type = 'locomotion.motion_events.' + key + event_feature = self.get_feature(wf, motion_type).value + + event_mask = event_feature.get_event_mask() + self.value[event_mask] = value + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + + self.name = feature_name + + self.value = utils.get_nested_h5_field( + wf.h, ['locomotion', 'motion', 'mode']) + + return self + + +class IsPaused(Feature): + + """ + Temporary Feature: locomotion.motion_events.is_paused + """ + + def __init__(self, wf, feature_name): + + # TODO: We could eventually only compute the paused event + # rather than checking the mode. We would need to add on support for + # checking if a feature had been computed + self.name = feature_name + mode = self.get_feature(wf, 'locomotion.motion_mode').value + self.value = mode == 0 + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_turns.py b/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_turns.py new file mode 100755 index 00000000..5b5fcc73 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/locomotion_turns.py @@ -0,0 +1,1364 @@ +# -*- coding: utf-8 -*- +""" +Calculate the "Turns" locomotion feature + +There are two kinds of turns: + - omega + - upsilon. + +The only external-facing item is LocomotionTurns. The rest are internal +to this module. + + +Classes +--------------------------------------- +LocomotionTurns +UpsilonTurns +OmegaTurns + + +Standalone Functions +--------------------------------------- +getTurnEventsFromSignedFrames + + +Notes +--------------------------------------- +For the Nature Methods description see +/documentation/Yemini Supplemental Data/Locomotion.md#5-turns + + +Formerly this code was contained in four Matlab files: + seg_worm.feature_calculator.getOmegaAndUpsilonTurns, which called these 3: + seg_worm.feature_helpers.locomotion.getOmegaEvents + seg_worm.feature_helpers.locomotion.getUpsilonEvents + seg_worm.feature_helpers.locomotion.getTurnEventsFromSignedFrames + + +TODO: OmegaTurns and UpsilonTurns should inherit from LocomotionTurns or something + + +IMPORTANT: My events use 1 based indexing, the old code used 0 based +indexing - @JimHokanson + +""" + +import numpy as np + +import collections +import warnings +import operator +import re + +from .generic_features import Feature + +from .. import utils + +from . import events + +#%% + + +class LocomotionTurns(object): + + """ + + LocomotionTurns + + Attributes + ---------- + omegas : OmegaTurns + upsilons : UpsilonTurns + + Methods + ------- + __init__ + + """ + + def __init__( + self, + features_ref, + bend_angles, + is_stage_movement, + midbody_distance, + sx, + sy): + """ + Initialiser for the LocomotionTurns class + + Parameters + ---------- + features_ref : + bend_angles : + is_stage_movement : + midbody_distance : + sx : + sy : + + Notes + --------------------------------------- + Formerly getOmegaAndUpsilonTurns + + Old Name: + - featureProcess.m + - omegaUpsilonDetectCurvature.m + + """ + + nw = features_ref.nw + + if not features_ref.options.should_compute_feature( + 'locomotion.turns', features_ref): + self.omegas = None + self.upsilons = None + return + + options = features_ref.options.locomotion.locomotion_turns + + fps = features_ref.video_info.fps + + timer = features_ref.timer + timer.tic() + + n_frames = bend_angles.shape[1] + + angles = collections.namedtuple('angles', + ['head_angles', + 'body_angles', + 'tail_angles', + 'body_angles_with_long_nans', + 'is_stage_movement']) + + first_third = nw.get_subset_partition_mask('first_third') + second_third = nw.get_subset_partition_mask('second_third') + last_third = nw.get_subset_partition_mask('last_third') + + # NOTE: For some reason the first and last few angles are NaN, so we use + # nanmean instead of mean. We could probably avoid this for the body. + # Suppress RuntimeWarning: Mean of empty slice for those frames + # that are ALL NaN. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + angles.head_angles = np.nanmean( + bend_angles[first_third, :], axis=0) + angles.body_angles = np.nanmean( + bend_angles[second_third, :], axis=0) + angles.tail_angles = np.nanmean(bend_angles[last_third, :], axis=0) + angles.is_stage_movement = is_stage_movement + + # Deep copy. + # To @JimHokanson from @MichaelCurrie: what does "ht" stand for? + # consider expanding this variable name so it's clear + body_angles_for_ht_change = np.copy(angles.body_angles) + + n_head = np.sum(~np.isnan(angles.head_angles)) + n_body = np.sum(~np.isnan(angles.body_angles)) + n_tail = np.sum(~np.isnan(angles.tail_angles)) + + # Only proceed if there are at least two non-NaN + # value in each angle vector + if n_head < 2 or n_body < 2 or n_tail < 2: + # Make omegas and upsilons into blank events lists and return + self.omegas = events.EventListWithFeatures(fps, make_null=True) + self.upsilons = events.EventListWithFeatures(fps, make_null=True) + return + + # Interpolate the angles. angles is modified. + self.h__interpolateAngles( + angles, options.max_interpolation_gap_allowed) + + # Get frames for each turn type + #---------------------------------------------------------------------- + # This doesn't match was is written in the supplemental material ... + # Am I working off of old code?????? + + # TODO: Move this all to options ... + consts = collections.namedtuple('consts', + ['head_angle_start_const', + 'tail_angle_start_const', + 'head_angle_end_const', + 'tail_angle_end_const', + 'body_angle_const']) + + yuck = [[20, -20, 15, -15], + [30, 30, 30, 30], + [40, 40, 30, 30], + [20, -20, 15, -15], + [20, -20, 15, -15]] + """ + OLD Matlab CODE: + + consts = struct(... + 'head_angle_start_const',{20 -20 15 -15}, ... + 'tail_angle_start_const',{30 30 30 30}, ... + 'head_angle_end_const', {40 40 30 30}, ... + 'tail_angle_end_const', {20 -20 15 -15}, ... + 'body_angle_const' , {20 -20 15 -15}) + """ + + # NOTE: We need to run omegas first (false values) since upsilons are + # more inclusive, but can not occur if an omega event occurs + is_upsilon = [False, False, True, True] + + # NOTE: We assign different values based on the sign of the angles + values_to_assign = [1, -1, 1, -1] + + frames = collections.namedtuple('frames', + ['omega_frames', 'upsilon_frames']) + + frames.omega_frames = np.zeros(n_frames) + frames.upsilon_frames = np.zeros(n_frames) + + for i in range(4): + consts.head_angle_start_const = yuck[0][i] + consts.tail_angle_start_const = yuck[1][i] + consts.head_angle_end_const = yuck[2][i] + consts.tail_angle_end_const = yuck[3][i] + consts.body_angle_const = yuck[4][i] + condition_indices = self.h__getConditionIndices(angles, consts) + self.h__populateFrames(angles, + condition_indices, + frames, + is_upsilon[i], + values_to_assign[i]) + + # Calculate the events from the frame values + self.omegas = OmegaTurns.create(options, + frames.omega_frames, + nw, + body_angles_for_ht_change, + midbody_distance, + fps) + + self.upsilons = UpsilonTurns.create(frames.upsilon_frames, + midbody_distance, + fps) + + timer.toc('locomotion.turns') + + def __repr__(self): + return utils.print_object(self) + + @classmethod + def from_disk(cls, turns_ref): + + self = cls.__new__(cls) + + self.omegas = OmegaTurns.from_disk(turns_ref) + self.upsilons = UpsilonTurns.from_disk(turns_ref) + + return self + + def __eq__(self, other): + return self.upsilons.test_equality( + other.upsilons, 'locomotion.turns.upsilons') and self.omegas.test_equality( + other.omegas, 'locomotion.turns.omegas') + + def h__interpolateAngles(self, angles, MAX_INTERPOLATION_GAP_ALLOWED): + """ + Interpolate the angles in the head, body, and tail. + For the body, also interpolate with a threshold, and assign this + to body_angles_with_long_nans + + Parameters + --------------------------------------- + angles: a named tuple + angles = collections.namedtuple('angles', + ['head_angles', + 'body_angles', + 'tail_angles', + 'body_angles_with_long_nans', + 'is_stage_movement']) + + + Returns + --------------------------------------- + None; instead the angles parameter has been modified in place + + Notes + --------------------------------------- + Formerly a = h__interpolateAngles(a, MAX_INTERPOLATION_GAP_ALLOWED) + + TODO: Incorporate into the former + seg_worm.feature_helpers.interpolateNanData + + """ + # Let's use a shorter expression for clarity + interp = utils.interpolate_with_threshold + + # This might not actually have been applied - SEGWORM_MC used BodyAngles + # - @JimHokanson + angles.body_angles_with_long_nans = interp( + angles.body_angles, MAX_INTERPOLATION_GAP_ALLOWED + 1, make_copy=True) + + interp(angles.head_angles, make_copy=False) + interp(angles.body_angles, make_copy=False) + interp(angles.tail_angles, make_copy=False) + + def h__getConditionIndices(self, a, c): + """ + This function implements a filter on the frames for the different + conditions that we are looking for in order to get a particular turn. + + It does not however provide any logic on their relative order, i.e. + that one condition occurs before another. This is done in a later + function, h__populateFrames. + + Parameters + --------------------------------------- + a: + + c: + + + Notes + --------------------------------------- + Formerly s = h__getConditionIndices(a, c) + + """ + + # Determine comparison function + #---------------------------------------------------------- + is_positive = c.head_angle_start_const > 0 + if is_positive: + fh = operator.gt + else: + fh = operator.lt + + # start: when the head exceeds its angle but the tail does not + # end : when the tail exceeds its angle but the head does not + + # TODO: Rename to convention ... + s = collections.namedtuple('stuffs', + ['startCond', + 'startInds', + 'midCond', + 'midStarts', + 'midEnds', + 'endCond', + 'endInds']) + + def find_diff(array, value): + # diff on logical array doesn't work the same as it does in Matlab + return np.flatnonzero(np.diff(array.astype(int)) == value) + + with np.errstate(invalid='ignore'): + s.startCond = fh(a.head_angles, c.head_angle_start_const) & \ + (np.abs(a.tail_angles) < c.tail_angle_start_const) + + # add 1 for shift due to diff + s.startInds = find_diff(s.startCond, 1) + 1 + + # NOTE: This is NaN check is a bit suspicious, as it implies that the + # head and tail are parsed, but the body is not. The original code puts + # NaN back in for long gaps in the body angle, so it is possible that + # the body angle is NaN but the others are not. + with np.errstate(invalid='ignore'): + s.midCond = fh(a.body_angles, c.body_angle_const) | \ + np.isnan(a.body_angles_with_long_nans) + + # add 1 for shift due to diff + s.midStarts = find_diff(s.midCond, 1) + 1 + s.midEnds = find_diff(s.midCond, -1) + + with np.errstate(invalid='ignore'): + s.endCond = np.logical_and(fh(a.tail_angles, c.tail_angle_end_const), + np.abs(a.head_angles) < + c.head_angle_end_const) + + s.endInds = find_diff(s.endCond, -1) + + return s + + def h__populateFrames(self, a, s, f, get_upsilon_flag, value_to_assign): + """ + + Algorithm + --------------------------------------- + - For the middle angle range, ensure one frame is valid and that + the frame proceeding the start and following the end are valid + - Find start indices and end indices that bound this range + - For upsilons, exclude if they overlap with an omega bend ... + + + Parameters + --------------------------------------- + a: named tuple + head_angles: [1x4642 double] + body_angles: [1x4642 double] + tail_angles: [1x4642 double] + is_stage_movement: [1x4642 logical] + bodyAngle: [1x4642 double] + + s: named tuple + startCond: [1x4642 logical] + startInds: [1x81 double] + midCond: [1x4642 logical] + midStarts: [268 649 881 996 1101 1148 1202 1963 3190 3241 4144 4189 4246 4346 4390 4457 4572 4626] + midEnds: [301 657 925 1009 1103 1158 1209 1964 3196 3266 4148 4200 4258 4350 4399 4461 4579] + endCond: [1x4642 logical] + endInds: [1x47 double] + + f: named tuple + omegaFrames: [4642x1 double] + upsilonFrames: [4642x1 double] + + get_upsilon_flag: bool + Toggled based on whether or not we are getting upsilon events + or omega events + + value_to_assign: + + + Returns + --------------------------------------- + None; modifies parameters in place. + + + Notes + --------------------------------------- + Formerly f = h__populateFrames(a,s,f,get_upsilon_flag,value_to_assign) + + """ + + for cur_mid_start_I in s.midStarts: + + # JAH NOTE: This type of searching is inefficient in Matlab since + # the data is already sorted. It could be improved ... + temp = np.flatnonzero(s.midEnds > cur_mid_start_I) + + # cur_mid_end_I = s.midEnds[find(s.midEnds > cur_mid_start_I, 1)) + + if temp.size != 0: + cur_mid_end_I = s.midEnds[temp[0]] + + if ~np.all( + a.is_stage_movement[ + cur_mid_start_I:cur_mid_end_I + + 1]) and s.startCond[ + cur_mid_start_I - + 1] and s.endCond[ + cur_mid_end_I + + 1]: + + temp2 = np.flatnonzero(s.startInds < cur_mid_start_I) + temp3 = np.flatnonzero(s.endInds > cur_mid_end_I) + + if temp2.size != 0 and temp3.size != 0: + cur_start_I = s.startInds[temp2[-1]] + cur_end_I = s.endInds[temp3[0]] + + if get_upsilon_flag: + # Don't populate upsilon if the data spans an omega + if ~np.any( + np.abs( + f.omega_frames[ + cur_start_I:cur_end_I + + 1])): + f.upsilon_frames[ + cur_start_I:cur_end_I + 1] = value_to_assign + else: + f.omega_frames[ + cur_start_I:cur_end_I + 1] = value_to_assign + + # Nothing needs to be returned since we have modified our parameters + # in place + return None + + +""" +=============================================================================== +=============================================================================== +""" + +#%% + + +class UpsilonTurns(object): + + """ + Represents the Omega turn events + + + Notes + --------------------------------------- + Formerly this was not implemented as a class. + + """ + + def __init__(self, upsilon_frames, midbody_distance, fps): + """ + Initialiser for the UpsilonTurns class. + + Parameters + ---------- + upsilon_frames : + midbody_distance : + fps : + + Notes + --------------------------------------- + Formerly, in the SegWormMatlabClasses repo, this was not the constructor + of a class, but a locomotion method of called + getUpsilonEvents(obj,upsilon_frames,midbody_distance,FPS) + + """ + + self.value = getTurnEventsFromSignedFrames(upsilon_frames, + midbody_distance, + fps) + + self.no_events = self.value.is_null + + @staticmethod + def create(upsilon_frames, midbody_distance, fps): + + temp = UpsilonTurns(upsilon_frames, midbody_distance, fps) + + return temp.value + + @classmethod + def from_disk(cls, turns_ref): + + return events.EventListWithFeatures.from_disk( + turns_ref['upsilons'], 'MRC') + + +""" +=============================================================================== +=============================================================================== +""" + +#%% + + +class OmegaTurns(object): + + """ + Represents the Omega turn events + + Properties + --------------------------------------- + omegas + + Methods + --------------------------------------- + __init__ + h_getHeadTailDirectionChange + h__filterAndSignFrames + + """ + + def __init__(self, options, omega_frames_from_angles, nw, body_angles, + midbody_distance, fps): + """ + Initialiser for the OmegaTurns class. + + Parameters + ---------- + omega_frames_from_angles: [1 x n_frames] + Each frame has the value 0, 1, or -1, + + nw: NormalizedWorm instance + We only use it for its skeleton. + + body_angles + average bend angle of the middle third of the worm + + midbody_distance: + + FPS: float + Frames per second + + + Returns + --------------------------------------- + None + + + Notes + --------------------------------------- + Formerly, in the SegWormMatlabClasses repo, this was not the initialiser + of a class, but a locomotion method of called + omega_events = getOmegaEvents(obj,omega_frames_from_angles,sx,sy, + body_angles,midbody_distance,fps) + + omega_events was an event structure. now self.omegas just contains + the omega turns. + + See also: + seg_worm.features.locomotion.getOmegaAndUpsilonTurns + seg_worm.features.locomotion.getTurnEventsFromSignedFrames + + """ + + body_angles_i = \ + utils.interpolate_with_threshold(body_angles, extrapolate=True) + + self.omegas = None # DEBUG: remove once the below code is ready + + omega_frames_from_th_change = self.h_getHeadTailDirectionChange( + nw, fps) + + # Filter: + # This is to be consistent with the old code. We filter then merge, + # then filter again :/ + omega_frames_from_th_change = \ + self.h__filterAndSignFrames(body_angles_i, + omega_frames_from_th_change, + options.min_omega_event_length(fps)) + + is_omega_frame = (omega_frames_from_angles != 0) | \ + (omega_frames_from_th_change != 0) + + # Refilter and sign + signed_omega_frames = \ + self.h__filterAndSignFrames(body_angles_i, + is_omega_frame, + options.min_omega_event_length(fps)) + + # Convert frames to events ... + self.value = getTurnEventsFromSignedFrames(signed_omega_frames, + midbody_distance, + fps) + + self.no_events = self.value.is_null + + @staticmethod + def create( + options, + omega_frames_from_angles, + nw, + body_angles, + midbody_distance, + fps): + + temp = OmegaTurns(options, omega_frames_from_angles, nw, body_angles, + midbody_distance, fps) + + return temp.value + + @classmethod + def from_disk(cls, turns_ref): + + return events.EventListWithFeatures.from_disk( + turns_ref['omegas'], 'MRC') + + def h_getHeadTailDirectionChange(self, nw, FPS): + """ + + + Parameters + --------------------------------------- + nw: A NormalizedWorm instance + + FPS: int + Frames Per Second + + + Returns + --------------------------------------- + A boolean numpy array indicating in each frame whether or not + it's an omega angle change + + + Notes + --------------------------------------- + Formerly is_omega_angle_change = h_getHeadTailDirectionChange(FPS,sx,sy) + + NOTE: This change in direction of the head and tail indicates that + either a turn occurred OR that an error in the parsing occurred. + Basically we look for the angle from the head to the tail to all of a + sudden change by 180 degrees. + + """ + MAX_FRAME_JUMP_FOR_ANGLE_DIFF = round(FPS / 2) + + # We compute a smoothed estimate of the angle change by using angles at + # indices that are +/- this value ... + HALF_WINDOW_SIZE = round(FPS / 4) + + # NOTE: It would be better to have this be based on time, not samples + MAX_INTERP_GAP_SIZE = 119 + + #????!!!!?? - why is this a per frame value instead of an average angular + # velocity ???? + PER_FRAME_DEGREE_CHANGE_CUTOFF = 3 + + # Compute tail direction + #---------------------------------------------------- + head_x, head_y = nw.get_partition('head', + data_key='skeleton', + split_spatial_dimensions=True) + tail_x, tail_y = nw.get_partition('tail', + data_key='skeleton', + split_spatial_dimensions=True) + + # Take the mean across the partition, so that we are left with a single + # value for each frame (i.e. 1-d an array of length n_frames) + # Suppress RuntimeWarning: Mean of empty slice for those frames + # that are ALL NaN. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + head_x = np.nanmean(head_x, axis=0) + head_y = np.nanmean(head_y, axis=0) + tail_x = np.nanmean(tail_x, axis=0) + tail_y = np.nanmean(tail_y, axis=0) + + th_angle = np.arctan2(head_y - tail_y, head_x - tail_x) * (180 / np.pi) + + n_frames = len(th_angle) + + # Changed angles to being relative to the previous frame + #---------------------------------------------------- + # Compute the angle change between subsequent frames. If a frame is not + # valid, we'll use the last valid frame to define the difference, unless the + # gap is too large. + + is_good_th_direction_value = ~np.isnan(th_angle) + + lastAngle = th_angle[0] + gapCounter = 0 + + th_angle_diff_temp = np.empty(th_angle.size) * np.NAN + for iFrame in range(n_frames)[1:]: # formerly 2:n_frames + if is_good_th_direction_value[iFrame]: + th_angle_diff_temp[iFrame] = th_angle[iFrame] - lastAngle + gapCounter = 0 + lastAngle = th_angle[iFrame] + else: + gapCounter += 1 + + if gapCounter > MAX_FRAME_JUMP_FOR_ANGLE_DIFF: + lastAngle = np.NaN + + #???? - what does this really mean ?????? + # I think this basically says, instead of looking for gaps in the original + # th_angle, we need to take into account how much time has passed between + # successive differences + # + # i.e. instead of doing a difference in angles between all valid frames, we + # only do a difference if the gap is short enough + + # We go through some heroics to avoid the "RuntimeWarning: invalid + # value encountered" warning + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + + positiveJumps = np.flatnonzero(th_angle_diff_temp > 180) + negativeJumps = np.flatnonzero(th_angle_diff_temp < -180) + + # For example data, these are the indices I get ... + #P - 4625 + #N - 3634, 4521 + + # Fix the th_angles by unwrapping + #---------------------------------------------------- + # NOTE: We are using the identified jumps from the fixed angles to unwrap + # the original angle vector + # subtract 2pi from remainging data after positive jumps + for j in range(len(positiveJumps)): + th_angle[positiveJumps[j]:] = th_angle[positiveJumps[j]:] - 2 * 180 + + # add 2pi to remaining data after negative jumps + for j in range(len(negativeJumps)): + th_angle[negativeJumps[j]:] = th_angle[negativeJumps[j]:] + 2 * 180 + + # Fix the th_angles through interpolation + #---------------------------------------------------- + th_angle = \ + utils.interpolate_with_threshold(th_angle, + MAX_INTERP_GAP_SIZE + 1, + make_copy=False, + extrapolate=False) + + # Determine frames that might be omega events (we'll filter later based on + # length) + #---------------------------------------------------- + # Compute angle difference + th_angle_diff = np.empty(len(th_angle)) * np.NaN + + left_indices = np.array(range(n_frames)) - HALF_WINDOW_SIZE + right_indices = np.array(range(n_frames)) + HALF_WINDOW_SIZE + + mask = (left_indices > 1) & (right_indices < n_frames) + + th_angle_diff[mask] = th_angle[right_indices[mask].astype(int)] - \ + th_angle[left_indices[mask].astype(int)] + + avg_angle_change_per_frame = abs( + th_angle_diff / (HALF_WINDOW_SIZE * 2)) + + # Now return whether or not it's an omega angle change + # Again we go through some heroics to avoid the "RuntimeWarning: invalid + # value encountered" warning + + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + return avg_angle_change_per_frame > PER_FRAME_DEGREE_CHANGE_CUTOFF + + def h__filterAndSignFrames(self, body_angles_i, is_omega_frame, + min_omega_event_length): + """ + Filter and sign frames. + + Notes + --------------------------------------- + Formerly signed_omega_frames = + h__filterAndSignFrames(body_angles_i, is_omega_frame, + MIN_OMEGA_EVENT_LENGTH) + + """ + # Let's take a boolean numpy array and change it to a string where + # A is false and B is true: e.g. + # [True, True, False] turns into 'BBA' + # (Note: this is all a translation of this Matlab line: + # [start1, end1] = \ + # regexp(is_omega_frame_as_string, gap_str, 'start', 'end') + is_omega_frame_as_ascii_codes = is_omega_frame.astype(int) + ord('A') + is_omega_frame_as_list = [chr(x) + for x in is_omega_frame_as_ascii_codes] + is_omega_frame_as_string = ''.join(is_omega_frame_as_list) + gap_re = re.compile(r'B{%d,}' % min_omega_event_length) + # Obtain a iterator of the results that match our regex, gap_re. + re_result = list(gap_re.finditer(is_omega_frame_as_string)) + start1 = [m.start(0) for m in re_result] + end1 = [m.end(0) for m in re_result] + + signed_omega_frames = np.zeros(is_omega_frame.size) + + # Note: Here we keep the long gaps instead of removing them + for iEvent in range(len(start1)): + if np.mean(body_angles_i[start1[iEvent]:end1[iEvent]]) > 0: + signed_omega_frames[start1[iEvent]:end1[iEvent]] = 1 + else: + signed_omega_frames[start1[iEvent]:end1[iEvent]] = -1 + + return signed_omega_frames + + +""" +=============================================================================== +=============================================================================== +""" + +#%% + + +def getTurnEventsFromSignedFrames(signed_frames, midbody_distance, FPS): + """ + Get turn events from signed frames + + Parameters + --------------------------------------- + signed_frames: + ??? - I believe the values are -1 or 1, based on + whether something is dorsal or ventral .... + + + Notes + --------------------------------------- + This code is common to omega and upsilon turns. + + Formerly function turn_events = \ + seg_worm.features.locomotion.getTurnEventsFromSignedFrames( + obj,signed_frames,midbody_distance,FPS) + + Called by: + seg_worm.features.locomotion.getUpsilonEvents + seg_worm.features.locomotion.getOmegaEvents + + """ + + ef = events.EventFinder() + ef.include_at_frames_threshold = True + + # get_events(self, speed_data, distance_data=None): + + # JAH: This interface doesn't make as much sense anymore ... + + # seg_worm.feature.event_finder.getEvents + ef.min_speed_threshold = 1 + + frames_dorsal = ef.get_events(signed_frames) + + ef = events.EventFinder() + ef.include_at_frames_threshold = True + ef.min_speed_threshold = None + ef.max_speed_threshold = -1 + frames_ventral = ef.get_events(signed_frames) + + # Unify the ventral and dorsal turns. + [frames_merged, is_ventral] = events.EventList.merge(frames_ventral, + frames_dorsal) + + turn_event_output = events.EventListWithFeatures(FPS, + frames_merged, + midbody_distance) + + turn_event_output.is_ventral = is_ventral + + """ + Note that in the past, the former (Matlab) code for this function + added an is_ventral to each FRAME. EventListForOutput does not have a + frames variable, so instead we simply have an is_ventral numpy array. + - @MichaelCurrie + + Here is the former code, using correct variable names and Python syntax: + + # Add extra field, isVentral ... + for iEvent = range(len(turn_event_output.frames)): + turn_event_output.frames[iEvent].isVentral = is_ventral[iEvent] + + """ + + return turn_event_output + + +class TurnProcessor(Feature): + + """ + Feature: 'locomotion.turn_processor' + + LocomotionTurns + + Attributes + ---------- + omegas : OmegaTurns + upsilons : UpsilonTurns + + Methods + ------- + __init__ + + """ + + def __init__(self, wf, feature_name): + """ + Initialiser for the LocomotionTurns class + + Parameters + ---------- + features_ref : + bend_angles : + is_stage_movement : + midbody_distance : + sx : + sy : + + Notes + --------------------------------------- + Formerly getOmegaAndUpsilonTurns + + Old Name: + - featureProcess.m + - omegaUpsilonDetectCurvature.m + + """ + + # features_ref, , , midbody_distance, sx, sy + + self.name = feature_name + + options = wf.options.locomotion.locomotion_turns + + video_info = wf.video_info + fps = video_info.fps + is_stage_movement = video_info.is_stage_movement + + nw = wf.nw + bend_angles = nw.angles + + #sx = nw.skeleton_x + #sy = nw.skeleton_y + + midbody_distance = self.get_feature( + wf, 'locomotion.velocity.mibdody.distance').value + + timer = wf.timer + timer.tic() + + n_frames = bend_angles.shape[1] + + angles = collections.namedtuple('angles', + ['head_angles', + 'body_angles', + 'tail_angles', + 'body_angles_with_long_nans', + 'is_stage_movement']) + + first_third = nw.get_subset_partition_mask('first_third') + second_third = nw.get_subset_partition_mask('second_third') + last_third = nw.get_subset_partition_mask('last_third') + + # NOTE: For some reason the first and last few angles are NaN, so we use + # nanmean instead of mean. We could probably avoid this for the body. + # Suppress RuntimeWarning: Mean of empty slice for those frames + # that are ALL NaN. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + angles.head_angles = np.nanmean( + bend_angles[first_third, :], axis=0) + angles.body_angles = np.nanmean( + bend_angles[second_third, :], axis=0) + angles.tail_angles = np.nanmean(bend_angles[last_third, :], axis=0) + angles.is_stage_movement = is_stage_movement + + n_head = np.sum(~np.isnan(angles.head_angles)) + n_body = np.sum(~np.isnan(angles.body_angles)) + n_tail = np.sum(~np.isnan(angles.tail_angles)) + + # Only proceed if there are at least two non-NaN + # value in each angle vector + if n_head < 2 or n_body < 2 or n_tail < 2: + # Make omegas and upsilons into blank events lists and return + self.omegas = events.EventListWithFeatures(fps, make_null=True) + self.upsilons = events.EventListWithFeatures(fps, make_null=True) + return + + # Deep copy. + body_angles_for_head_tail_change = np.copy(angles.body_angles) + + # Interpolate the angles. angles is modified. + self.h__interpolateAngles( + angles, options.max_interpolation_gap_allowed) + + # Get frames for each turn type + #---------------------------------------------------------------------- + # This doesn't match was is written in the supplemental material ... + # Am I working off of old code?????? + + # TODO: Move this all to options ... + consts = collections.namedtuple('consts', + ['head_angle_start_const', + 'tail_angle_start_const', + 'head_angle_end_const', + 'tail_angle_end_const', + 'body_angle_const']) + + yuck = [[20, -20, 15, -15], + [30, 30, 30, 30], + [40, 40, 30, 30], + [20, -20, 15, -15], + [20, -20, 15, -15]] + """ + OLD Matlab CODE: + + consts = struct(... + 'head_angle_start_const',{20 -20 15 -15}, ... + 'tail_angle_start_const',{30 30 30 30}, ... + 'head_angle_end_const', {40 40 30 30}, ... + 'tail_angle_end_const', {20 -20 15 -15}, ... + 'body_angle_const' , {20 -20 15 -15}) + """ + + # NOTE: We need to run omegas first (false values) since upsilons are + # more inclusive, but can not occur if an omega event occurs + is_upsilon = [False, False, True, True] + + # NOTE: We assign different values based on the sign of the angles + values_to_assign = [1, -1, 1, -1] + + frames = collections.namedtuple('frames', + ['omega_frames', 'upsilon_frames']) + + frames.omega_frames = np.zeros(n_frames) + frames.upsilon_frames = np.zeros(n_frames) + + for i in range(4): + consts.head_angle_start_const = yuck[0][i] + consts.tail_angle_start_const = yuck[1][i] + consts.head_angle_end_const = yuck[2][i] + consts.tail_angle_end_const = yuck[3][i] + consts.body_angle_const = yuck[4][i] + condition_indices = self.h__getConditionIndices(angles, consts) + self.h__populateFrames(angles, + condition_indices, + frames, + is_upsilon[i], + values_to_assign[i]) + + # Calculate the events from the frame values + self.omegas = OmegaTurns.create(options, + frames.omega_frames, + nw, + body_angles_for_head_tail_change, + midbody_distance, + fps) + + self.upsilons = UpsilonTurns.create(frames.upsilon_frames, + midbody_distance, + fps) + + timer.toc('locomotion.turns') + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + + # TODO: This should be a method, somewhere ... + ref = utils.get_nested_h5_field( + wf.h, ['locomotion', 'turns', 'omegas'], resolve_value=False) + self.omegas = events.EventListWithFeatures.from_disk(ref, 'MRC') + + ref = utils.get_nested_h5_field( + wf.h, ['locomotion', 'turns', 'upsilons'], resolve_value=False) + self.upsilons = events.EventListWithFeatures.from_disk(ref, 'MRC') + + return self + +# @classmethod +# def from_disk(cls, turns_ref): +# +# self = cls.__new__(cls) +# +# self.omegas = OmegaTurns.from_disk(turns_ref) +# self.upsilons = UpsilonTurns.from_disk(turns_ref) +# +# return self +# +# def __eq__(self, other): +# return \ +# self.upsilons.test_equality(other.upsilons,'locomotion.turns.upsilons') and \ +# self.omegas.test_equality(other.omegas,'locomotion.turns.omegas') + + def h__interpolateAngles(self, angles, MAX_INTERPOLATION_GAP_ALLOWED): + """ + Interpolate the angles in the head, body, and tail. + For the body, also interpolate with a threshold, and assign this + to body_angles_with_long_nans + + Parameters + --------------------------------------- + angles: a named tuple + angles = collections.namedtuple('angles', + ['head_angles', + 'body_angles', + 'tail_angles', + 'body_angles_with_long_nans', + 'is_stage_movement']) + + + Returns + --------------------------------------- + None; instead the angles parameter has been modified in place + + Notes + --------------------------------------- + Formerly a = h__interpolateAngles(a, MAX_INTERPOLATION_GAP_ALLOWED) + + TODO: Incorporate into the former + seg_worm.feature_helpers.interpolateNanData + + """ + # Let's use a shorter expression for clarity + interp = utils.interpolate_with_threshold + + # This might not actually have been applied - SEGWORM_MC used BodyAngles + # - @JimHokanson + angles.body_angles_with_long_nans = interp( + angles.body_angles, MAX_INTERPOLATION_GAP_ALLOWED + 1, make_copy=True) + + interp(angles.head_angles, make_copy=False) + interp(angles.body_angles, make_copy=False) + interp(angles.tail_angles, make_copy=False) + + def h__getConditionIndices(self, a, c): + """ + This function implements a filter on the frames for the different + conditions that we are looking for in order to get a particular turn. + + It does not however provide any logic on their relative order, i.e. + that one condition occurs before another. This is done in a later + function, h__populateFrames. + + Parameters + --------------------------------------- + a: + + c: + + + Notes + --------------------------------------- + Formerly s = h__getConditionIndices(a, c) + + """ + + # Determine comparison function + #---------------------------------------------------------- + is_positive = c.head_angle_start_const > 0 + if is_positive: + fh = operator.gt + else: + fh = operator.lt + + # start: when the head exceeds its angle but the tail does not + # end : when the tail exceeds its angle but the head does not + + # TODO: Rename to convention ... + s = collections.namedtuple('stuffs', + ['startCond', + 'startInds', + 'midCond', + 'midStarts', + 'midEnds', + 'endCond', + 'endInds']) + + def find_diff(array, value): + # diff on logical array doesn't work the same as it does in Matlab + return np.flatnonzero(np.diff(array.astype(int)) == value) + + with np.errstate(invalid='ignore'): + s.startCond = fh(a.head_angles, c.head_angle_start_const) & \ + (np.abs(a.tail_angles) < c.tail_angle_start_const) + + # add 1 for shift due to diff + s.startInds = find_diff(s.startCond, 1) + 1 + + # NOTE: This is NaN check is a bit suspicious, as it implies that the + # head and tail are parsed, but the body is not. The original code puts + # NaN back in for long gaps in the body angle, so it is possible that + # the body angle is NaN but the others are not. + with np.errstate(invalid='ignore'): + s.midCond = fh(a.body_angles, c.body_angle_const) | \ + np.isnan(a.body_angles_with_long_nans) + + # add 1 for shift due to diff + s.midStarts = find_diff(s.midCond, 1) + 1 + s.midEnds = find_diff(s.midCond, -1) + + with np.errstate(invalid='ignore'): + s.endCond = np.logical_and(fh(a.tail_angles, c.tail_angle_end_const), + np.abs(a.head_angles) < + c.head_angle_end_const) + + s.endInds = find_diff(s.endCond, -1) + + return s + + def h__populateFrames(self, a, s, f, get_upsilon_flag, value_to_assign): + """ + + Algorithm + --------------------------------------- + - For the middle angle range, ensure one frame is valid and that + the frame proceeding the start and following the end are valid + - Find start indices and end indices that bound this range + - For upsilons, exclude if they overlap with an omega bend ... + + + Parameters + --------------------------------------- + a: named tuple + head_angles: [1x4642 double] + body_angles: [1x4642 double] + tail_angles: [1x4642 double] + is_stage_movement: [1x4642 logical] + bodyAngle: [1x4642 double] + + s: named tuple + startCond: [1x4642 logical] + startInds: [1x81 double] + midCond: [1x4642 logical] + midStarts: [268 649 881 996 1101 1148 1202 1963 3190 3241 4144 4189 4246 4346 4390 4457 4572 4626] + midEnds: [301 657 925 1009 1103 1158 1209 1964 3196 3266 4148 4200 4258 4350 4399 4461 4579] + endCond: [1x4642 logical] + endInds: [1x47 double] + + f: named tuple + omegaFrames: [4642x1 double] + upsilonFrames: [4642x1 double] + + get_upsilon_flag: bool + Toggled based on whether or not we are getting upsilon events + or omega events + + value_to_assign: + + + Returns + --------------------------------------- + None; modifies parameters in place. + + + Notes + --------------------------------------- + Formerly f = h__populateFrames(a,s,f,get_upsilon_flag,value_to_assign) + + """ + + for cur_mid_start_I in s.midStarts: + + # JAH NOTE: This type of searching is inefficient since the data + # are sorted + temp = np.flatnonzero(s.midEnds > cur_mid_start_I) + + if temp.size != 0: + cur_mid_end_I = s.midEnds[temp[0]] + + if ~np.all( + a.is_stage_movement[ + cur_mid_start_I:cur_mid_end_I + + 1]) and s.startCond[ + cur_mid_start_I - + 1] and s.endCond[ + cur_mid_end_I + + 1]: + + temp2 = np.flatnonzero(s.startInds < cur_mid_start_I) + temp3 = np.flatnonzero(s.endInds > cur_mid_end_I) + + if temp2.size != 0 and temp3.size != 0: + cur_start_I = s.startInds[temp2[-1]] + cur_end_I = s.endInds[temp3[0]] + + if get_upsilon_flag: + # Don't populate upsilon if the data spans an omega + if ~np.any( + np.abs( + f.omega_frames[ + cur_start_I:cur_end_I + + 1])): + f.upsilon_frames[ + cur_start_I:cur_end_I + 1] = value_to_assign + else: + f.omega_frames[ + cur_start_I:cur_end_I + 1] = value_to_assign + + # Nothing needs to be returned since we have modified our parameters + # in place + return None + + +class NewUpsilonTurns(Feature): + """ + Feature: locomotion.upsilon_turns + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature(wf, 'locomotion.turn_processor').upsilons + self.no_events = self.value.is_null + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class NewOmegaTurns(Feature): + """ + Feature: 'locomotion.omega_turns' + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature(wf, 'locomotion.turn_processor').omegas + self.no_events = self.value.is_null + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/master_eigen_worms_N2.mat b/tierpsy/features/open_worm_analysis_toolbox/features/master_eigen_worms_N2.mat new file mode 100755 index 00000000..b02482d5 Binary files /dev/null and b/tierpsy/features/open_worm_analysis_toolbox/features/master_eigen_worms_N2.mat differ diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/morphology_features.py b/tierpsy/features/open_worm_analysis_toolbox/features/morphology_features.py new file mode 100755 index 00000000..c96faa1f --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/morphology_features.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +""" +morphology_features.py + +""" +import numpy as np + +from .generic_features import Feature +from .. import utils + + +class Widths(object): + """ + This is no longer used in the new code and can be deleted when ready + + Attributes + ---------- + head : + midbody : + tail : + """ + + fields = ('head', 'midbody', 'tail') + + def __init__(self, features_ref): + """ + Parameters + ---------- + features_ref : WormFeatures instance + + Note the current approach just computes the mean of the + different body section widths. Eventually this should be + computed in this class. + + """ + nw = features_ref.nw + + for partition in self.fields: + widths_in_partition = nw.get_partition(partition, 'widths') + setattr(self, partition, np.mean(widths_in_partition, 0)) + + @classmethod + def from_disk(cls, width_ref): + + self = cls.__new__(cls) + + for partition in self.fields: + widths_in_partition = utils._extract_time_from_disk(width_ref, + partition) + setattr(self, partition, widths_in_partition) + + return self + + def __eq__(self, other): + return ( + utils.correlation(self.head, other.head, + 'morph.width.head') and + utils.correlation(self.midbody, other.midbody, + 'morph.width.midbody') and + utils.correlation(self.tail, other.tail, + 'morph.width.tail')) + + def __repr__(self): + return utils.print_object(self) + +#====================================================================== +# NEW CODE +#====================================================================== +# +# +# Still need to handle equality comparison and loading from disk + + +class Length(Feature): + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = wf.nw.length + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = utils.get_nested_h5_field(wf.h, ['morphology', 'length']) + return self + + +class WidthSection(Feature): + """ + This should only be called by the subclasses + + Attributes + ---------- + partition_name + """ + + def __init__(self, wf, feature_name, partition_name): + """ + Parameters + ---------- + wf : WormFeatures instance + """ + + self.name = feature_name + self.partition_name = partition_name + # I'm not thrilled with the name of this method + widths_in_partition = wf.nw.get_partition(partition_name, 'widths') + self.value = np.mean(widths_in_partition, 0) + + @classmethod + def from_schafer_file(cls, wf, feature_name, partition_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = utils.get_nested_h5_field( + wf.h, ['morphology', 'width', partition_name]) + return self + + +class Area(Feature): + """ + Feature: morphology.area + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = wf.nw.area + # We should instead be using this, NYI + #self.value = self.get_feature(wf,'nw.area').value + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = utils.get_nested_h5_field(wf.h, ['morphology', 'area']) + return self + + +class AreaPerLength(Feature): + """ + Feature: morphology.area_per_length + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + area = self.get_feature(wf, 'morphology.area').value + length = self.get_feature(wf, 'morphology.length').value + self.value = area / length + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class WidthPerLength(Feature): + """ + Feature: morphology.width_per_length + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + width = self.get_feature(wf, 'morphology.width.midbody').value + length = self.get_feature(wf, 'morphology.length').value + self.value = width / length + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/path_features.py b/tierpsy/features/open_worm_analysis_toolbox/features/path_features.py new file mode 100755 index 00000000..037b58fb --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/path_features.py @@ -0,0 +1,552 @@ +# -*- coding: utf-8 -*- +""" +Path Features Include: +---------------------- +TODO: Create basic list + +""" + +import numpy as np + +from .. import config, utils + +# To avoid conflicting with variables named 'velocity', we +# import this as 'velocity_module': +from . import velocity as velocity_module +from . import generic_features +from .generic_features import Feature + + +class Coordinates(Feature): + + """ + Feature: path.coordinates + + Attributes + ---------- + x : + y : + """ + + def __init__(self, wf, feature_name): + + self.name = feature_name + nw = wf.nw + self.x = nw.contour_x.mean(axis=0) + self.y = nw.contour_y.mean(axis=0) + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.x = utils.get_nested_h5_field(wf.h, ['path', 'coordinates', 'x']) + self.y = utils.get_nested_h5_field(wf.h, ['path', 'coordinates', 'y']) + return self + + @classmethod + def from_disk(cls, c_data): + + self = cls.__new__(cls) + + # Use utils loader + self.x = c_data['x'].value[:, 0] + self.y = c_data['y'].value[:, 0] + + return self + + +class Range(object): + + """ + + This is the old class. + + Delete when ready and change "NewRange" to Range + both in the code and in the features list. + + Attributes + ------------------ + value : + + """ + + def __init__(self, contour_x, contour_y): + + # Get average per frame + #------------------------------------------------ + mean_cx = contour_x.mean(axis=0) + mean_cy = contour_y.mean(axis=0) + + # Average over all frames for subtracting + #------------------------------------------------- + x_centroid_cx = np.nanmean(mean_cx) + y_centroid_cy = np.nanmean(mean_cy) + + self.value = np.sqrt( + (mean_cx - x_centroid_cx) ** 2 + (mean_cy - y_centroid_cy) ** 2) + + @classmethod + def from_disk(cls, path_var): + + self = cls.__new__(cls) + + # NOTE: This is of size nx1 for Matlab versions, might want to fix on + # loading + self.value = np.squeeze(path_var['range'].value) + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + + return utils.correlation(self.value, other.value, 'path.range', 0.99) + + +class Duration(Feature): + + """ + + Temporary Feature: path.duration + + Attributes + ---------- + arena : Arena + worm : DurationElement + head : DurationElement + midbody : DurationElement + tail : DurationElement + + """ + + def __init__(self, wf, feature_name): + + self.name = feature_name + + # Inputs + #------ + options = wf.options + nw = wf.nw + sx = nw.skeleton_x + sy = nw.skeleton_y + widths = nw.widths + fps = wf.video_info.fps + + s_points = [nw.worm_partitions[x] + for x in ('all', 'head', 'body', 'tail')] + + # Return early if necessary + #---------------------------------------------------------------------- + if len(sx) == 0 or np.isnan(sx).all(): + raise Exception('This code is not yet translated') + #ar = Arena(create_null = True) + # NAN_cell = repmat({NaN},1,n_points); + # durations = struct('indices',NAN_cell,'times',NAN_cell); + # obj.duration = h__buildOutput(arena,durations); + # return; + + if options.mimic_old_behaviour: + s_points_temp = [nw.worm_partitions[x] + for x in ('head', 'midbody', 'tail')] + temp_widths = [widths[x[0]:x[1], :] for x in s_points_temp] + mean_widths = [np.nanmean(x.reshape(x.size)) for x in temp_widths] + mean_width = np.mean(mean_widths) + else: + mean_width = np.nanmean(widths) + + scale = 2.0 ** 0.5 / mean_width + + # Scale the skeleton and translate so that the minimum values are at 1 + #---------------------------------------------------------------------- + with np.errstate(invalid='ignore'): + # Rounding rules are different between Matlab and Numpy + scaled_sx = np.round(sx * scale) + scaled_sy = np.round(sy * scale) + + x_scaled_min = np.nanmin(scaled_sx) + x_scaled_max = np.nanmax(scaled_sx) + y_scaled_min = np.nanmin(scaled_sy) + y_scaled_max = np.nanmax(scaled_sy) + + # Unfortunately needing to typecast to int for array indexing also + # removes my ability to identify invalid values :/ + # Thus we precompute invalid values and then cast + isnan_mask = np.isnan(scaled_sx) + + scaled_zeroed_sx = (scaled_sx - x_scaled_min).astype(int) + scaled_zeroed_sy = (scaled_sy - y_scaled_min).astype(int) + + arena_size = (int(y_scaled_max - y_scaled_min + 1), int(x_scaled_max - x_scaled_min + 1)) + ar = Arena(sx, sy, arena_size) + + #---------------------------------------------------------------------- + def h__populateArenas(arena_size, sys, sxs, s_points, isnan_mask): + """ + + Attributes: + ---------------------------- + arena_size: list + [2] + sys : numpy.int32 + [49, n_frames] + sxs : numpy.int32 + [49, n_frames] + s_points: list + [4] + isnan_mask: bool + [49, n_frames] + + + """ + + # NOTE: All skeleton points have been rounded to integer values for + # assignment to the matrix based on their values being treated as + # indices + + # Filter out frames which have no valid values + #---------------------------------------------------------- + frames_run = np.flatnonzero(np.any(~isnan_mask, axis=0)) + n_frames_run = len(frames_run) + + # 1 area for each set of skeleton indices + #----------------------------------------- + n_points = len(s_points) + arenas = [None] * n_points + + # Loop over the different regions of the body + #------------------------------------------------ + for iPoint in range(n_points): + + temp_arena = np.zeros(arena_size) + s_indices = s_points[iPoint] + + # For each frame, add +1 to the arena each time a chunk of the skeleton + # is located in that part + #-------------------------------------------------------------- + for iFrame in range(n_frames_run): + cur_frame = frames_run[iFrame] + cur_x = sxs[s_indices[0]:s_indices[1], cur_frame] + cur_y = sys[s_indices[0]:s_indices[1], cur_frame] + + temp_arena[cur_y, cur_x] += 1 + + # Flip y-axis to maintain consistency with Matlab + arenas[iPoint] = temp_arena[::-1, :] + + return arenas + #---------------------------------------------------------------------- + + temp_arenas = h__populateArenas( + arena_size, + scaled_zeroed_sy, + scaled_zeroed_sx, + s_points, + isnan_mask) + + # For looking at the data + #------------------------------------ + # utils.imagesc(temp_arenas[0]) + + temp_duration = [DurationElement(x, fps) for x in temp_arenas] + + self.arena = ar + self.worm = temp_duration[0] + self.head = temp_duration[1] + self.midbody = temp_duration[2] + self.tail = temp_duration[3] + + def __eq__(self, other): + if config.MIMIC_OLD_BEHAVIOUR: + # JAH: I've looked at the results and they look right + # Making them look the same would make things really ugly as it means + # making rounding behavior the same between numpy and Matlab :/ + return True + else: + return \ + self.arena == other.arena and \ + self.worm == other.worm and \ + self.head == other.head and \ + self.midbody == other.midbody and \ + self.tail == other.tail + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + duration_ref = utils.get_nested_h5_field( + wf.h, ['path', 'duration'], resolve_value=False) + + self.arena = Arena.from_disk(duration_ref['arena']) + self.worm = DurationElement.from_disk(duration_ref['worm']) + self.head = DurationElement.from_disk(duration_ref['head']) + self.midbody = DurationElement.from_disk(duration_ref['midbody']) + self.tail = DurationElement.from_disk(duration_ref['tail']) + + return self + + @classmethod + def from_disk(cls, duration_group): + self = cls.__new__(cls) + + self.arena = Arena.from_disk(duration_group['arena']) + self.worm = DurationElement.from_disk(duration_group['worm']) + self.head = DurationElement.from_disk(duration_group['head']) + self.midbody = DurationElement.from_disk(duration_group['midbody']) + self.tail = DurationElement.from_disk(duration_group['tail']) + + return self + + +class DurationElement(object): + """ + Old class, please delete + """ + + def __init__(self, arena_coverage=None, fps=None): + + # TODO: Pass in name for __eq__ + + if arena_coverage is None: + return + + self.indices = np.flatnonzero(arena_coverage) + self.times = arena_coverage.flat[self.indices] / fps + + #arena_coverage_r = np.reshape(arena_coverage, arena_coverage.size, 'F') + #self.indices = np.nonzero(arena_coverage_r)[0] + #self.times = arena_coverage_r[self.indices] / fps + #wtf3 = np.nonzero(arena_coverage) + + #self.indices = np.transpose(np.nonzero(arena_coverage)) + #self.times = arena_coverage[self.indices[:,0],self.indices[:,1]]/fps + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + + return utils.correlation( + self.indices, + other.indices, + 'Duration.indices') and utils.correlation( + self.times, + other.times, + 'Duration.times') + + @classmethod + def from_disk(cls, saved_duration_elem): + + self = cls.__new__(cls) + # TODO: Use utils loader + self.indices = saved_duration_elem['indices'].value[0] + self.times = saved_duration_elem['times'].value[0] + + return self + + +class Arena(object): + + """ + + This is constructed from the Duration constructor. + """ + + def __init__(self, sx, sy, arena_size, create_null=False): + + if create_null: + self.height = np.nan + self.width = np.nan + self.min_x = np.nan + self.min_y = np.nan + self.max_x = np.nan + self.max_y = np.nan + else: + self.height = arena_size[0] + self.width = arena_size[1] + self.min_x = np.nanmin(sx) + self.min_y = np.nanmin(sy) + self.max_x = np.nanmax(sx) + self.max_y = np.nanmax(sy) + + def __eq__(self, other): + # NOTE: Due to rounding differences between Matlab and numpy + # the height and width values are different by 1 + return utils.compare_is_equal( + self.height, + other.height, + 'Arena.height', + 1) and utils.compare_is_equal( + self.width, + other.width, + 'Arena.width', + 1) and utils.compare_is_equal( + self.min_x, + other.min_x, + 'Arena.min_x') and utils.compare_is_equal( + self.min_y, + other.min_y, + 'Arena.min_y') and utils.compare_is_equal( + self.max_x, + other.max_x, + 'Arena.max_x') and utils.compare_is_equal( + self.max_y, + other.max_y, + 'Arena.max_y') + + def __repr__(self): + return utils.print_object(self) + + @classmethod + def from_disk(cls, saved_arena_elem): + + self = cls.__new__(cls) + self.height = saved_arena_elem['height'].value[0, 0] + self.width = saved_arena_elem['width'].value[0, 0] + self.min_x = saved_arena_elem['min']['x'].value[0, 0] + self.min_y = saved_arena_elem['min']['y'].value[0, 0] + self.max_x = saved_arena_elem['max']['x'].value[0, 0] + self.max_y = saved_arena_elem['max']['y'].value[0, 0] + + return self + + + +class NewRange(Feature): + + """ + Feature: path.range + + Attributes + ------------------ + value : + + """ + + def __init__(self, wf, feature_name): + + self.name = feature_name + + nw = wf.nw + contour_x = nw.contour_x + contour_y = nw.contour_y + + # Get average per frame + #------------------------------------------------ + mean_cx = contour_x.mean(axis=0) + mean_cy = contour_y.mean(axis=0) + + # Average over all frames for subtracting + #------------------------------------------------- + x_centroid_cx = np.nanmean(mean_cx) + y_centroid_cy = np.nanmean(mean_cy) + + self.value = np.sqrt( + (mean_cx - x_centroid_cx) ** 2 + (mean_cy - y_centroid_cy) ** 2) + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = utils.get_nested_h5_field(wf.h, ['path', 'range']) + return self + + +class DurationFeature(Feature): + + """ + Feature: path.duration.[section_name] + + This currently borrowing heavily from DurationElement. Eventually we'll + want to move that code here + + """ + + def __init__(self, wf, feature_name, section_name): + self.name = feature_name + + parent_name = generic_features.get_parent_feature_name(feature_name) + duration_main = self.get_feature(wf, parent_name) + duration_element = getattr(duration_main, section_name) + self.value = duration_element.times + + @classmethod + def from_schafer_file(cls, wf, feature_name, section_name): + return cls(wf, feature_name, section_name) + + +class Curvature(Feature): + + """ + Feature: path.curvature + """ + + def __init__(self, wf, feature_name): + + self.name = feature_name + BODY_DIFF = 0.5 + + nw = wf.nw + x = nw.skeleton_x + y = nw.skeleton_y + + video_info = wf.video_info + fps = video_info.fps + ventral_mode = video_info.ventral_mode + + # https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/%2Bseg_worm/%2Bfeatures/%40path/wormPathCurvature.m + + BODY_I = slice(44, 3, -1) + + # This was nanmean but I think mean will be fine. nanmean was + # causing the program to crash + diff_x = np.mean(np.diff(x[BODY_I, :], axis=0), axis=0) + diff_y = np.mean(np.diff(y[BODY_I, :], axis=0), axis=0) + avg_body_angles_d = np.arctan2(diff_y, diff_x) * 180 / np.pi + + # NOTE: This is what is in the MRC code, but differs from their description. + # In this case I think the skeleton filtering makes sense so we'll keep + # it. + speed, ignored_variable, motion_direction = \ + velocity_module.compute_speed(fps, x[BODY_I, :], y[BODY_I, :], + avg_body_angles_d, BODY_DIFF, ventral_mode) + + + # Compute the angle differentials and distances. + speed = np.abs(speed) + + # At each frame, we'll compute the differences in motion direction using + # some frame in the future relative to the current frame + # + #i.e. diff_motion[current_frame] = motion_direction[current_frame + frame_scale] - motion_direction[current_frame] + #------------------------------------------------ + + frame_scale = velocity_module.get_frames_per_sample(fps, BODY_DIFF) + + half_frame_scale = int(round((frame_scale + 1) / 2)) + #substract one to deal with python indexes + fs_ind = max(1, frame_scale -1) #ensure this values are larger than 1 + h_fs_ind = max(1, half_frame_scale - 1) + + diff_motion = np.full(speed.shape, np.nan) + diff_motion[:-fs_ind] = motion_direction[fs_ind:] - motion_direction[:-fs_ind] + s1 = speed[h_fs_ind:-fs_ind] + s2 = speed[(h_fs_ind+fs_ind):] + distance = np.full(speed.shape, np.nan) + distance[h_fs_ind:-fs_ind] = (s1+s2)/2 * BODY_DIFF + + + #correct out of range + with np.errstate(invalid='ignore'): + diff_motion[diff_motion >= 180] -= 360 + diff_motion[diff_motion <= -180] += 360 + distance[distance < 1] = np.nan + + self.value = (diff_motion / distance) * (np.pi / 180) + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = utils.get_nested_h5_field(wf.h, ['path', 'curvature']) + return self diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/posture_features.py b/tierpsy/features/open_worm_analysis_toolbox/features/posture_features.py new file mode 100755 index 00000000..fcc38952 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/posture_features.py @@ -0,0 +1,1379 @@ +# -*- coding: utf-8 -*- +""" +Posture features + +""" + + +from __future__ import division + +import scipy.ndimage.filters as filters +import numpy as np +import warnings +import os +import tables + +import cv2 + +from . import generic_features +from .generic_features import Feature +from .. import config, utils +from . import events + + +class Bends(object): + """ + Posture Bends + + Attributes + ---------- + head : BendSection + midbody : BendSection + tail : BendSection + hips : BendSection + neck : BendSection + + """ + + def __init__(self, features_ref): + nw = features_ref.nw + + p = nw.get_partition_subset('normal') + + self.posture_bend_keys = p.keys() + + for partition_key in self.posture_bend_keys: + # Retrieve the part of the worm we are currently looking at: + bend_angles = nw.get_partition(partition_key, 'angles') + + # shape = (n): + + # Suppress RuntimeWarning: Mean of empty slice for those frames + # that are ALL NaN. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + + # Throws warning on taking the mean of an empty slice + temp_mean = np.nanmean(a=bend_angles, axis=0) + + # Throws warning when degrees of freedom <= 0 for slice + temp_std = np.nanstd(a=bend_angles, axis=0) + + # Sign the standard deviation (to provide the bend's + # dorsal/ventral orientation) + temp_std[temp_mean < 0] *= -1 + + + + + + setattr(self, partition_key, + BendSection(temp_mean, temp_std, partition_key)) + + @classmethod + def create(self, features_ref): + options = features_ref.options + + # TODO: this should be populated by calling + # WormPartition.get_partition_subset('normal'), and + # get_partition_subset should be an @classmethod. + self.posture_bend_keys = ['head', 'midbody', 'tail', 'hips', 'neck'] + + if options.should_compute_feature('locomotion.bends', features_ref): + return Bends(features_ref) + else: + return None + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + is_equal = True + for partition_key in self.posture_bend_keys: + is_equal = is_equal and (getattr(self, partition_key) == + getattr(other, partition_key)) + + return is_equal + + @classmethod + def from_disk(cls, saved_bend_data): + + self = cls.__new__(cls) + + for partition_key in saved_bend_data.keys(): + setattr(self, partition_key, + BendSection.from_disk(saved_bend_data[partition_key], + partition_key)) + + return self + + +class BendSection(object): + + """ + Attributes + ---------- + + See Also + -------- + Bends + + """ + + def __init__(self, mean, std_dev, name): + self.mean = mean + self.std_dev = std_dev + self.name = name + + @classmethod + def from_disk(cls, saved_bend_data, name): + + self = cls.__new__(cls) + + self.mean = utils._extract_time_from_disk(saved_bend_data, 'mean') + + try: + self.std_dev = \ + utils._extract_time_from_disk(saved_bend_data, 'std_dev') + except KeyError: + self.std_dev = \ + utils._extract_time_from_disk(saved_bend_data, 'stdDev') + + self.name = name + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + # TODO: Why is the head.std_dev so low??? + # Are we not mimicing some old error properly??? + return utils.correlation(self.mean, other.mean, + 'posture.bends.' + self.name + '.mean', + high_corr_value=0.95) and \ + utils.correlation(self.std_dev, other.std_dev, + 'posture.bends.' + self.name + '.std_dev', + high_corr_value=0.60) + + + +def get_worm_kinks(features_ref): + """ + Parameters + ---------- + features_ref : open-worm-analysis-toolbox.features.worm_features.WormFeatures + + Returns + ------- + numpy.array + + """ + + nw = features_ref.nw + timer = features_ref.timer + timer.tic() + + options = features_ref.options + + KINK_LENGTH_THRESHOLD_PCT = options.posture.kink_length_threshold_pct + + bend_angles = nw.angles + + # Determine the bend segment length threshold. + n_angles = bend_angles.shape[0] + length_threshold = np.round(n_angles * KINK_LENGTH_THRESHOLD_PCT) + + # Compute a gaussian filter for the angles. + #-------------------------------------------------------------------------- + # JAH NOTE: This is a nice way of getting the appropriate odd value + # unlike the other code with so many if statements ... + #- see window code which tries to get an odd value ... + #- I'd like to go back and fix that code ... + half_length_thr = np.round(length_threshold / 2) + gauss_filter = utils.gausswin(half_length_thr * 2 + 1) / half_length_thr + + # Compute the kinks for the worms. + n_frames = bend_angles.shape[1] + n_kinks_all = np.zeros(n_frames, dtype=float) + n_kinks_all[:] = np.NaN + + #(np.any(np.logical_or(mask_pos,mask_neg),axis=0)).nonzero()[0] + + nan_mask = np.isnan(bend_angles) + + for iFrame in (~np.all(nan_mask, axis=0)).nonzero()[0]: + smoothed_bend_angles = filters.convolve1d( + bend_angles[:, iFrame], gauss_filter, cval=0, mode='constant') + + # This code is nearly identical in getForaging + #------------------------------------------------------- + n_frames = smoothed_bend_angles.shape[0] + + with np.errstate(invalid='ignore'): + dataSign = np.sign(smoothed_bend_angles) + + if np.any(np.equal(dataSign, 0)): + # I don't expect that we'll ever actually reach 0 + # The code for zero was a bit weird, it keeps counting if no sign + # change i.e. + + + 0 + + + => all + + # + # but it counts for both if sign change + # + + 0 - - - => 3 +s and 4 -s + + # I had to change this to a warning and returning NaNs + # to get my corner case unit tests working, i.e. the case + # of a perfectly straight worm. - @MichaelCurrie + n_kinks_all[:] = np.NaN + #raise Warning("Unhandled code case") + return n_kinks_all + + sign_change_I = ( + np.not_equal(dataSign[1:], dataSign[0:-1])).nonzero()[0] + + end_I = np.concatenate( + (sign_change_I, + n_frames * np.ones(1, dtype=np.result_type(sign_change_I)))) + + wtf1 = np.zeros(1, dtype=np.result_type(sign_change_I)) + wtf2 = sign_change_I + 1 + start_I = np.concatenate((wtf1, wtf2)) # +2? due to inclusion rules??? + + # All NaN values are considered sign changes, remove these ... + keep_mask = np.logical_not(np.isnan(smoothed_bend_angles[start_I])) + + start_I = start_I[keep_mask] + end_I = end_I[keep_mask] + + # The old code had a provision for having NaN values in the middle + # of the worm. I have not translated that feature to the newer code. I + # don't think it will ever happen though for a valid frame, only on the + # edges should you have NaN values. + if start_I.size != 0 and \ + np.any(np.isnan(smoothed_bend_angles[start_I[0]:end_I[-1]])): + raise Exception("Unhandled code case") + + #------------------------------------------------------- + # End of identical code ... + + lengths = end_I - start_I + 1 + + # Adjust lengths for first and last: + # Basically we allow NaN values to count towards the length for the + # first and last stretches + if lengths.size != 0: + if start_I[0] != 0: # Due to leading NaNs + lengths[0] = end_I[0] + 1 + if end_I[-1] != n_frames: # Due to trailing NaNs + lengths[-1] = n_frames - start_I[-1] + + n_kinks_all[iFrame] = np.sum(lengths >= length_threshold) + + timer.toc('posture.kinks') + + return n_kinks_all + + +def get_worm_coils(features_ref, midbody_distance): + """ + Get the worm's posture.coils. + + Parameters + ---------- + features_ref : open-worm-analysis-toolbox.features.worm_features.WormFeatures + + This function is currently very reliant on the MRC processor. + + Translated From: + https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + %2Bseg_worm/%2Bfeatures/%40posture/getCoils.m + + """ + options = features_ref.options + posture_options = options.posture + timer = features_ref.timer + + fps = features_ref.video_info.fps + + timer.tic() + + frame_code = features_ref.video_info.frame_code + + COIL_FRAME_THRESHOLD = posture_options.coiling_frame_threshold(fps) + + # These are values that are specific to the MRC processor + COIL_START_CODES = [105, 106] + # Code that indicates a frame was successfully segmented + FRAME_SEGMENTED = 1 + + # Algorithm: Whenever a new start is found, find the + # first segmented frame; that's the end. + + # Add on a frame to allow closing a coil at the end ... + coil_start_mask = (frame_code == COIL_START_CODES[0]) | ( + frame_code == COIL_START_CODES[1]) + np_false = np.zeros((1,), dtype=bool) + coil_start_mask = np.concatenate((coil_start_mask, np_false)) + + # NOTE: These are not guaranteed ends, just possible ends ... + end_coil_mask = frame_code == FRAME_SEGMENTED + np_true = ~np_false + end_coil_mask = np.concatenate((end_coil_mask, np_true)) + + in_coil = False + coil_frame_start = -1 + n_coils = 0 + n_frames_plus1 = len(frame_code) + 1 + + starts = [] + ends = [] + + for iFrame in range(n_frames_plus1): + if in_coil: + if end_coil_mask[iFrame]: + n_coil_frames = iFrame - coil_frame_start + if n_coil_frames >= COIL_FRAME_THRESHOLD: + n_coils += 1 + + starts.append(coil_frame_start) + ends.append(iFrame - 1) + + in_coil = False + elif coil_start_mask[iFrame]: + in_coil = True + coil_frame_start = iFrame + + if options.mimic_old_behaviour: + if (len(starts) > 0) and (ends[-1] == len(frame_code) - 1): + ends[-1] += -1 + starts[-1] += -1 + + temp = events.EventList(np.transpose(np.vstack((starts, ends)))) + + timer.toc('posture.coils') + + return events.EventListWithFeatures(fps, temp, midbody_distance) + +#===================================================================== +# New Features +#===================================================================== + + +class EccentricityAndOrientationProcessor(Feature): + + """ + Temporary Feature: posture.eccentricity_and_orientation + + Attributes + ---------- + eccentricity + orientation + """ + + def __init__(self, wf, feature_name): + """ + Get the eccentricity and orientation of a contour using the moments + + http://en.wikipedia.org/wiki/Image_moment + + Calculated by opencv moments(): + http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html + + + This code might not work if there are redundant points in the contour (green approximation fails if the). + + If there are not contours the code will use the minimal rectangular area. + http://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minarearect#minarearect + The function moments only work on a close non overlaping contour. + The box width and length are used instead of the ellipse minor and major axis + to get an estimate of the eccentricity. + """ + + def _cnt_momentum(cnt): + moments = cv2.moments(cnt) + return moments['mu11'], moments['mu20'], moments['mu02'] + + def _skel_momentum(skel): + mat_cov = np.cov(skel.T) + mu20 = mat_cov[0,0] + mu02 = mat_cov[1,1] + mu11 = mat_cov[0,1] + return mu11, mu20, mu02 + + + def _momentum_eccentricty_orientation(mu11, mu20, mu02): + a1 = (mu20 + mu02) / 2 + a2 = np.sqrt(4 * mu11**2 + + (mu20 - mu02)**2) / 2 + + minor_axis = a1 - a2 + major_axis = a1 + a2 + + ecc = np.sqrt(1 - minor_axis / major_axis) + ang = np.arctan2(2 * mu11, (mu20 - mu02)) / 2 + ang *= 180 / np.pi + return ecc, ang + + def _box_eccentricity_orientation(skel): + (CMx, CMy), (L, W), angle = cv2.minAreaRect(skel) + if W > L: + L, W = W, L # switch if width is larger than length + angle += 90 # this means that the angle is shifted too + quirkiness = np.sqrt(1 - W**2 / L**2) + return quirkiness, angle + + self.name = feature_name + + wf.timer.tic() + + #Try to use the contour, otherwise use the skeleton + try: + points = wf.nw.contour_without_redundant_points + _get_momentum = _cnt_momentum + + except: + points = wf.nw.skeleton + _get_momentum = _skel_momentum + + + + # OpenCV does not like float64, this actually make sense for image + # data where we do not require a large precition in the decimal part. + # This could save quite a lot of space + points = points.astype(np.float32) + + tot = points.shape[-1] + + eccentricity = np.full(tot, np.nan) + orientation = np.full(tot, np.nan) + for ii in range(tot): + frame_points = points[:, :, ii] + + if ~np.any(np.isnan(frame_points)): + mu11, mu20, mu02 = _get_momentum(frame_points) + eccentricity[ii], orientation[ii] = \ + _momentum_eccentricty_orientation(mu11, mu20, mu02) + + wf.timer.toc(self.name) + + self.eccentricity = eccentricity + self.orientation = orientation + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.eccentricity = utils.get_nested_h5_field( + wf.h, ['posture', 'eccentricity']) + + # This isn't saved to disk + self.orientation = None + return self + + +class Eccentricity(Feature): + """ + Feature: 'posture.eccentricity' + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'posture.eccentricity_and_orientation').eccentricity + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class AmplitudeAndWavelengthProcessor(Feature): + + """ + Temporary Feature: posture.amplitude_wavelength_processor + + Attributes + ---------- + amplitude_max + amplitude_ratio + primary_wavelength + secondary_wavelength + track_length + """ + + def __init__(self, wf, feature_name): + """ + Calculates amplitude of rotated worm (relies on orientation + aka theta_d) + + Parameters + ---------- + theta_d + sx + sy + worm_lengths + + """ + + self.name = feature_name + theta_d = self.get_feature( + wf, 'posture.eccentricity_and_orientation').orientation + + timer = wf.timer + timer.tic() + + options = wf.options + + nw = wf.nw + sx = nw.skeleton_x + sy = nw.skeleton_y + worm_lengths = nw.length + + # TODO: Move these into posture options + + wave_options = wf.options.posture.wavelength + + # https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + # %2Bseg_worm/%2Bfeatures/%40posture/getAmplitudeAndWavelength.m + N_POINTS_FFT = wave_options.n_points_fft + HALF_N_FFT = int(N_POINTS_FFT / 2) + MIN_DIST_PEAKS = wave_options.min_dist_peaks + WAVELENGTH_PCT_MAX_CUTOFF = wave_options.pct_max_cutoff + WAVELENGTH_PCT_CUTOFF = wave_options.pct_cutoff + + assert sx.shape[0] <= N_POINTS_FFT # of points used in the FFT + # must be more than the number of points in the skeleton + + # Rotate the worm so that it lies primarily along a single axis + #------------------------------------------------------------- + theta_r = theta_d * (np.pi / 180) + wwx = sx * np.cos(theta_r) + sy * np.sin(theta_r) + wwy = sx * -np.sin(theta_r) + sy * np.cos(theta_r) + + # Subtract mean + #----------------------------------------------------------------- + #??? - Why isn't this done before the rotation? + wwx = wwx - np.mean(wwx, axis=0) + wwy = wwy - np.mean(wwy, axis=0) + + # Calculate track amplitude + #----------------------------------------------------------------- + amp1 = np.amax(wwy, axis=0) + amp2 = np.amin(wwy, axis=0) + amplitude_max = amp1 - amp2 + amp2 = np.abs(amp2) + + # Ignore NaN division warnings + with np.errstate(invalid='ignore'): + amplitude_ratio = np.divide(np.minimum(amp1, amp2), + np.maximum(amp1, amp2)) + + # Calculate track length + #----------------------------------------------------------------- + # This is the x distance after rotation, and is different from the + # worm length which follows the skeleton. This will always be smaller + # than the worm length. If the worm were perfectly straight these + # values would be the same. + track_length = np.amax(wwx, axis=0) - np.amin(wwx, axis=0) + + # Wavelength calculation + #----------------------------------------------------------------- + dwwx = np.diff(wwx, 1, axis=0) + + # Does the sign change? This is a check to make sure that the + # change in x is always going one way or the other. Is sign of all + # differences the same as the sign of the first, or rather, are any + # of the signs not the same as the first sign, indicating a "bad + # worm orientation". + # + # NOT: This means that within a frame, if the worm x direction + # changes, then it is considered a bad worm and is not + # evaluated for wavelength + # + + with np.errstate(invalid='ignore'): + bad_worm_orientation = np.any( + np.not_equal(np.sign(dwwx), np.sign(dwwx[0, :])), axis=0) + + n_frames = bad_worm_orientation.size + primary_wavelength = np.full(n_frames, np.nan) + secondary_wavelength = np.full(n_frames, np.nan) + + # NOTE: Right now this varies from worm to worm which means the + # spectral resolution varies as well from worm to worm + spatial_sampling_frequency = (wwx.shape[0] - 1) / track_length + + ds = 1 / spatial_sampling_frequency + + frames_to_calculate = \ + (np.logical_not(bad_worm_orientation)).nonzero()[0] + + for cur_frame in frames_to_calculate: + # Create an evenly sampled x-axis, note that ds varies + xx = wwx[:, cur_frame] + yy = wwy[:, cur_frame] + if xx[0] > xx[-1]: #switch we want to have monotonically inceasing values + xx = xx[::-1] + yy = yy[::-1] + + iwwx = utils.colon(xx[0], ds[cur_frame], xx[-1]) + iwwy = np.interp(iwwx, xx, yy) + iwwy = iwwy[::-1] + + temp = np.fft.fft(iwwy, N_POINTS_FFT) + + + if options.mimic_old_behaviour: + iY = temp[0:HALF_N_FFT] + iY = iY * np.conjugate(iY) / N_POINTS_FFT + else: + iY = np.abs(temp[0:HALF_N_FFT]) + + + # Find peaks that are greater than the cutoff + peaks, indx = utils.separated_peaks(iY, + MIN_DIST_PEAKS, + True, + (WAVELENGTH_PCT_MAX_CUTOFF * + np.amax(iY))) + + # This is what the supplemental says, not what was done in + # the previous code. I'm not sure what was done for the actual + # paper, but I would guess they used power. + # + # This gets used when determining the secondary wavelength, as + # it must be greater than half the maximum to be considered a + # secondary wavelength. + + # NOTE: True Amplitude = 2*abs(fft)/ + # (length_real_data i.e. 48 or 49, not 512) + # + # i.e. for a sinusoid of a given amplitude, the above formula + # would give you the amplitude of the sinusoid + + # We sort the peaks so that the largest is at the first index + # and will be primary, this was not done in the previous + # version of the code + I = np.argsort(-1 * peaks) + indx = indx[I] + + frequency_values = (indx - 1) / N_POINTS_FFT * \ + spatial_sampling_frequency[cur_frame] + + all_wavelengths = 1 / frequency_values + + p_temp = all_wavelengths[0] + + if indx.size > 1: + s_temp = all_wavelengths[1] + else: + s_temp = np.NaN + + worm_wavelength_max = (WAVELENGTH_PCT_CUTOFF * + worm_lengths[cur_frame]) + + # Cap wavelengths ... + if p_temp > worm_wavelength_max: + p_temp = worm_wavelength_max + + # ??? Do we really want to keep this as well if p_temp == worm_2x? + # i.e., should the secondary wavelength be valid if the primary is + # also limited in this way ????? + if s_temp > worm_wavelength_max: + s_temp = worm_wavelength_max + + primary_wavelength[cur_frame] = p_temp + secondary_wavelength[cur_frame] = s_temp + + if options.mimic_old_behaviour: + # In the old code, the first peak (i.e. larger wavelength, + # lower frequency) was always the primary wavelength, where as + # the new definition is based on the amplitude of the peaks, + # not their position along the frequency axis + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + mask = secondary_wavelength > primary_wavelength + + temp = secondary_wavelength[mask] + secondary_wavelength[mask] = primary_wavelength[mask] + primary_wavelength[mask] = temp + + self.amplitude_max = amplitude_max + self.amplitude_ratio = amplitude_ratio + self.primary_wavelength = primary_wavelength + self.secondary_wavelength = secondary_wavelength + self.track_length = track_length + + timer.toc('posture.amplitude_and_wavelength') + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.amplitude_max = utils.get_nested_h5_field( + wf.h, ['posture', 'amplitude', 'max']) + self.amplitude_ratio = utils.get_nested_h5_field( + wf.h, ['posture', 'amplitude', 'ratio']) + self.primary_wavelength = utils.get_nested_h5_field( + wf.h, ['posture', 'wavelength', 'primary']) + self.secondary_wavelength = utils.get_nested_h5_field( + wf.h, ['posture', 'wavelength', 'secondary']) + self.track_length = utils.get_nested_h5_field( + wf.h, ['posture', 'tracklength']) + + return self + + +class AmplitudeMax(Feature): + """ + Feature: posture.amplitude_max + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'posture.amplitude_wavelength_processor').amplitude_max + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class AmplitudeRatio(Feature): + """ + Feature: posture.amplitude_ratio + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'posture.amplitude_wavelength_processor').amplitude_ratio + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class PrimaryWavelength(Feature): + """ + Feature: posture.primary_wavelength + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'posture.amplitude_wavelength_processor').primary_wavelength + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + def __eq__(self, other): + + return utils.correlation(self.value, other.value, + self.name, high_corr_value=0.98, + merge_nans=True) + + +class SecondaryWavelength(Feature): + """ + Feature: posture.secondary_wavelength + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'posture.amplitude_wavelength_processor').secondary_wavelength + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + def __eq__(self, other): + + return utils.correlation(self.value, other.value, + self.name, high_corr_value=0.98, + merge_nans=True) + + +class TrackLength(Feature): + """ + Feature: posture.track_length + """ + + def __init__(self, wf, feature_name): + self.name = feature_name + self.value = self.get_feature( + wf, 'posture.amplitude_wavelength_processor').track_length + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class Coils(Feature): + + def __init__(self, wf, feature_name): + """ + Feature Name: posture.coils + + Get the worm's posture.coils. + + + """ + + self.name = feature_name + midbody_distance = self.get_feature( + wf, 'locomotion.velocity.mibdody.distance').value + + options = wf.options + posture_options = options.posture + timer = wf.timer + + fps = wf.video_info.fps + + timer.tic() + + frame_code = wf.video_info.frame_code + + COIL_FRAME_THRESHOLD = posture_options.coiling_frame_threshold(fps) + + # These are values that are specific to the MRC processor + COIL_START_CODES = [105, 106] + # Code that indicates a frame was successfully segmented + FRAME_SEGMENTED = 1 + + # Algorithm: Whenever a new start is found, find the + # first segmented frame; that's the end. + + # Add on a frame to allow closing a coil at the end ... + coil_start_mask = (frame_code == COIL_START_CODES[0]) | ( + frame_code == COIL_START_CODES[1]) + np_false = np.zeros((1,), dtype=bool) + coil_start_mask = np.concatenate((coil_start_mask, np_false)) + + # NOTE: These are not guaranteed ends, just possible ends ... + end_coil_mask = frame_code == FRAME_SEGMENTED + np_true = ~np_false + end_coil_mask = np.concatenate((end_coil_mask, np_true)) + + in_coil = False + coil_frame_start = -1 + n_coils = 0 + n_frames_plus1 = len(frame_code) + 1 + + starts = [] + ends = [] + + for iFrame in range(n_frames_plus1): + if in_coil: + if end_coil_mask[iFrame]: + n_coil_frames = iFrame - coil_frame_start + if n_coil_frames >= COIL_FRAME_THRESHOLD: + n_coils += 1 + + starts.append(coil_frame_start) + ends.append(iFrame - 1) + + in_coil = False + elif coil_start_mask[iFrame]: + in_coil = True + coil_frame_start = iFrame + + if options.mimic_old_behaviour: + if (len(starts) > 0) and (ends[-1] == len(frame_code) - 1): + ends[-1] += -1 + starts[-1] += -1 + + temp = events.EventList(np.transpose(np.vstack((starts, ends)))) + + timer.toc('posture.coils') + + self.value = events.EventListWithFeatures(fps, temp, midbody_distance) + + self.no_events = self.value.is_null + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + ref = utils.get_nested_h5_field( + wf.h, ['posture', 'coils'], resolve_value=False) + self.value = events.EventListWithFeatures.from_disk(ref, 'MRC') + self.no_events = self.value.is_null + return self + + +class Kinks(Feature): + + def __init__(self, wf, feature_name): + """ + Feature Name: posture.kinks + + Parameters + ---------- + features_ref : open-worm-analysis-toolbox.features.worm_features.WormFeatures + + Returns + ------- + numpy.array + + """ + + self.name = feature_name + + nw = wf.nw + timer = wf.timer + timer.tic() + + options = wf.options + + KINK_LENGTH_THRESHOLD_PCT = options.posture.kink_length_threshold_pct + + bend_angles = nw.angles + + # Determine the bend segment length threshold. + n_angles = bend_angles.shape[0] + length_threshold = np.round(n_angles * KINK_LENGTH_THRESHOLD_PCT) + + # Compute a gaussian filter for the angles. + #---------------------------------------------------------------------- + # JAH NOTE: This is a nice way of getting the appropriate odd value + # unlike the other code with so many if statements ... + #- see window code which tries to get an odd value ... + #- I'd like to go back and fix that code ... + half_length_thr = np.round(length_threshold / 2) + gauss_filter = utils.gausswin( + half_length_thr * 2 + 1) / half_length_thr + + # Compute the kinks for the worms. + n_frames = bend_angles.shape[1] + n_kinks_all = np.full(n_frames, np.nan, dtype=float) + + #(np.any(np.logical_or(mask_pos,mask_neg),axis=0)).nonzero()[0] + + nan_mask = np.isnan(bend_angles) + + for iFrame in (~np.all(nan_mask, axis=0)).nonzero()[0]: + smoothed_bend_angles = filters.convolve1d( + bend_angles[:, iFrame], gauss_filter, cval=0, mode='constant') + + # This code is nearly identical in getForaging + #------------------------------------------------------- + n_frames = smoothed_bend_angles.shape[0] + + with np.errstate(invalid='ignore'): + dataSign = np.sign(smoothed_bend_angles) + + if np.any(np.equal(dataSign, 0)): + # I don't expect that we'll ever actually reach 0 + # The code for zero was a bit weird, it keeps counting if no sign + # change i.e. + + + 0 + + + => all + + # + # but it counts for both if sign change + # + + 0 - - - => 3 +s and 4 -s + + #this case does happend. I will continue (default nan) instead of risign an error (AEJ) + #raise Exception("Unhandled code case") + continue + + sign_change_I = ( + np.not_equal(dataSign[1:], dataSign[0:-1])).nonzero()[0] + + end_I = np.concatenate( + (sign_change_I, + n_frames * np.ones(1, dtype=np.result_type(sign_change_I)))) + + wtf1 = np.zeros(1, dtype=np.result_type(sign_change_I)) + wtf2 = sign_change_I + 1 + # +2? due to inclusion rules??? + start_I = np.concatenate((wtf1, wtf2)) + + # All NaN values are considered sign changes, remove these ... + keep_mask = np.logical_not(np.isnan(smoothed_bend_angles[start_I])) + + start_I = start_I[keep_mask] + end_I = end_I[keep_mask] + + # The old code had a provision for having NaN values in the middle + # of the worm. I have not translated that feature to the newer code. I + # don't think it will ever happen though for a valid frame, only on the + # edges should you have NaN values. + if start_I.size != 0 and \ + np.any(np.isnan(smoothed_bend_angles[start_I[0]:end_I[-1]])): + #this case does happend. I will continue (default nan) instead of risign an error (AEJ) + #raise Exception("Unhandled code case") + continue + + #------------------------------------------------------- + # End of identical code ... + + lengths = end_I - start_I + 1 + + # Adjust lengths for first and last: + # Basically we allow NaN values to count towards the length for the + # first and last stretches + if lengths.size != 0: + if start_I[0] != 0: # Due to leading NaNs + lengths[0] = end_I[0] + 1 + if end_I[-1] != n_frames: # Due to trailing NaNs + lengths[-1] = n_frames - start_I[-1] + + n_kinks_all[iFrame] = np.sum(lengths >= length_threshold) + + timer.toc('posture.kinks') + + self.value = n_kinks_all + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = utils.get_nested_h5_field(wf.h, ['posture', 'kinks']) + return self + + +def load_eigen_worms(): + """ + Load the eigen_worms, which are stored in a Matlab data file + + The eigenworms were computed by the Schafer lab based on N2 worms + + Returns + ---------- + eigen_worms: [7 x 48] + + From http://stackoverflow.com/questions/50499/ + + """ + + eigen_worm_file_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + config.EIGENWORM_FILE) + + with tables.File(eigen_worm_file_path, 'r'): + eigen_worms = h.get_node('/eigenWorms')[:] + + return np.transpose(eigen_worms) + + +class EigenProjectionProcessor(Feature): + + def __init__(self, wf, feature_name): + """ + Feature: 'posture.all_eigenprojections' + + Parameters + ---------- + features_ref : open-worm-analysis-toolbox.features.worm_features.WormFeatures + + Returns + ------- + eigen_projections: [N_EIGENWORMS_USE, n_frames] + + """ + + self.name = feature_name + + posture_options = wf.options.posture + N_EIGENWORMS_USE = posture_options.n_eigenworms_use + timer = wf.timer + timer.toc + # eigen_worms: [7,48] + eigen_worms = load_eigen_worms() + + + + sx = wf.nw.skeleton_x + sy = wf.nw.skeleton_y + #nw.angles calculation is inconsistent with this one... + # I think bends angles should be between -180 to 180, while for the eigenworms they must be continous. + angles = np.arctan2(np.diff(sy, n=1, axis=0), np.diff(sx, n=1, axis=0)) + if wf.nw.video_info.ventral_mode == 2: + #switch in the angle sign in case of the contour orientation is anticlockwise + angles = -angles + + n_frames = angles.shape[1] + + # need to deal with cases where angle changes discontinuously from -pi + # to pi and pi to -pi. In these cases, subtract 2pi and add 2pi + # respectively to all remaining points. This effectively extends the + # range outside the -pi to pi range. Everything is re-centred later + # when we subtract off the mean. + false_row = np.zeros((1, n_frames), dtype=bool) + + # NOTE: By adding the row of falses, we shift the trues + # to the next value, which allows indices to match. Otherwise after every + # find statement we would need to add 1, I think this is a bit faster + # ... + + with np.errstate(invalid='ignore'): + mask_pos = np.concatenate( + (false_row, np.diff(angles, n=1, axis=0) > np.pi), axis=0) + mask_neg = np.concatenate( + (false_row, np.diff(angles, n=1, axis=0) < -np.pi), axis=0) + + # Only fix the frames we need to, in which there is a jump in going + # from one segment to the next ... + fix_frames_I = ( + np.any(np.logical_or(mask_pos, mask_neg), axis=0)).nonzero()[0] + + for cur_frame in fix_frames_I: + + positive_jump_I = (mask_pos[:, cur_frame]).nonzero()[0] + negative_jump_I = (mask_neg[:, cur_frame]).nonzero()[0] + + # subtract 2pi from remainging data after positive jumps + # Note that the jumps impact all subsequent frames + for cur_pos_jump in positive_jump_I: + angles[cur_pos_jump:, cur_frame] -= 2 * np.pi + + # add 2pi to remaining data after negative jumps + for cur_neg_jump in negative_jump_I: + angles[cur_neg_jump:, cur_frame] += 2 * np.pi + + angles = angles - np.mean(angles, axis=0) + + + eigen_projections = np.dot(eigen_worms[0:N_EIGENWORMS_USE, :], angles) + + #change signs for anticlockwise + #if nw.video_info.ventral_mode == 2: + # eigen_projections = -eigen_projections + + + timer.toc('posture.eigenworms') + + self.value = eigen_projections + + @classmethod + def from_schafer_file(cls, wf, feature_name): + self = cls.__new__(cls) + self.name = feature_name + self.value = np.transpose(utils.get_nested_h5_field( + wf.h, ['posture', 'eigenProjection'], is_matrix=True)) + return self + + +class EigenProjection(Feature): + + def __init__(self, wf, feature_name): + self.name = feature_name + projection_matrix = self.get_feature( + wf, 'posture.all_eigenprojections').value + index = int(feature_name[-1]) + self.value = projection_matrix[index, :] + + @classmethod + def from_schafer_file(cls, wf, feature_name): + return cls(wf, feature_name) + + +class Bend(Feature): + """ + + Old MRC code used very different indices for this part: + #s are in Matlab format, 1 based and inclusive + Indices Mismatch + % OLD NEW + %--------------------------------------------- + %head : 1:9 1:8 + %neck : 9:17 9:16 + %midbody : 17:32 (mean) 17:31 (std) 17:33 + %hip : 31:39 34:41 + %tail : 39:48 42:49 + """ + + def __init__(self, wf, feature_name, bend_name): + + self.name = 'posture.bends.' + bend_name + + nw = wf.nw + + # Retrieve the part of the worm we are currently looking at: + bend_angles = nw.get_partition(bend_name, 'angles') + + # shape = (n): + + # Suppress RuntimeWarning: Mean of empty slice for those frames + # that are ALL NaN. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + + # Throws warning on taking the mean of an empty slice + temp_mean = np.nanmean(a=bend_angles, axis=0) + + # Throws warning when degrees of freedom <= 0 for slice + temp_std = np.nanstd(a=bend_angles, axis=0) + + # Sign the standard deviation (to provide the bend's + # dorsal/ventral orientation) + temp_std[temp_mean < 0] *= -1 + + self.mean = temp_mean + self.std_dev = temp_std + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + self = cls.__new__(cls) + self.name = feature_name + self.mean = utils.get_nested_h5_field( + wf.h, ['posture', 'bends', bend_name, 'mean']) + self.std_dev = utils.get_nested_h5_field( + wf.h, ['posture', 'bends', bend_name, 'stdDev']) + + return self + + +class BendMean(Feature): + + def __init__(self, wf, feature_name, bend_name): + + parent_name = generic_features.get_parent_feature_name(feature_name) + self.name = feature_name + self.value = self.get_feature(wf, parent_name).mean + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + return cls(wf, feature_name, bend_name) + + def __eq__(self, other): + return utils.correlation(self.value, other.value, + self.name, high_corr_value=0.95) + + +class BendStdDev(Feature): + + def __init__(self, wf, feature_name, bend_name): + + parent_name = generic_features.get_parent_feature_name(feature_name) + self.name = feature_name + self.value = self.get_feature(wf, parent_name).std_dev + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + return cls(wf, feature_name, bend_name) + + def __eq__(self, other): + return utils.correlation(self.value, other.value, + self.name, high_corr_value=0.60) + + +class Skeleton(Feature): + + """ + Feature: posture.skeleton + + This just holds onto the skeleton x & y coordinates from normalized worm. + We don't do anything with these coordinates as far as feature processing. + """ + + def __init__(self, wf, feature_name): + + nw = wf.nw + + self.name = feature_name + self.x = nw.skeleton_x + self.y = nw.skeleton_y + + @classmethod + def from_schafer_file(cls, wf, feature_name, bend_name): + self = cls.__new__(cls) + self.name = feature_name + self.x = utils.get_nested_h5_field( + wf.h, ['posture', 'skeleton', 'x'], is_matrix=True) + self.y = utils.get_nested_h5_field( + wf.h, ['posture', 'skeleton', 'y'], is_matrix=True) + return self + + @classmethod + def from_disk(cls, skeleton_ref): + self = cls.__new__(cls) + + x_temp = utils._extract_time_from_disk(skeleton_ref, 'x', + is_matrix=True) + y_temp = utils._extract_time_from_disk(skeleton_ref, 'y', + is_matrix=True) + self.x = x_temp.transpose() + self.y = y_temp.transpose() + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + eq_skeleton_x = utils.correlation(np.ravel(self.x), + np.ravel(other.x), + 'posture.skeleton.x') + eq_skeleton_y = utils.correlation(np.ravel(self.y), + np.ravel(other.y), + 'posture.skeleton.y') + + return eq_skeleton_x and eq_skeleton_y + + +class Direction(Feature): + + """ + + Implements Features: + -------------------- + posture.directions.tail2head + posture.directions.tail + posture.directions.head + + """ + + def __init__(self, wf, feature_name, key_name): + """ + Feature: posture.directions.[key_name] + + Parameters + ---------- + wf : + + """ + + nw = wf.nw + + sx = nw.skeleton_x + sy = nw.skeleton_y + wp = nw.worm_partitions + + if key_name == 'tail2head': + tip_I = wp['head'] # I - "indices" - really a tuple of start,stop + # tail is referencing a vector tail, not the worm's tail + tail_I = wp['tail'] + elif key_name == 'head': + tip_I = wp['head_tip'] + tail_I = wp['head_base'] + else: + tip_I = wp['tail_tip'] + tail_I = wp['tail_base'] + + tip_slice = slice(*tip_I) + tail_slice = slice(*tail_I) + + # Compute the centroids of the tip and tail + # then compute a direction vector between them (tip - tail) + + tip_x = np.mean(sx[tip_slice, :], axis=0) + tip_y = np.mean(sy[tip_slice, :], axis=0) + tail_x = np.mean(sx[tail_slice, :], axis=0) + tail_y = np.mean(sy[tail_slice, :], axis=0) + + + #attempt to match segworm behaviour. This should shift the angles by 180. + # dy = (tip_y - tail_y) + # dx = (tip_x - tail_x) + # dir_value = 180 / np.pi * (-np.arctan2(dy,-dx)) + dir_value = 180 / np.pi * np.arctan2(tip_y - tail_y, tip_x - tail_x) + self.value = dir_value + + @classmethod + def from_schafer_file(cls, wf, feature_name, key_name): + self = cls.__new__(cls) + self.value = utils.get_nested_h5_field( + wf.h, ['posture', 'directions', key_name]) + return self + diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/velocity.py b/tierpsy/features/open_worm_analysis_toolbox/features/velocity.py new file mode 100755 index 00000000..6d9b3aa6 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/velocity.py @@ -0,0 +1,559 @@ +# -*- coding: utf-8 -*- +""" +Velocity calculation methods: used in locomotion and in path features + +""" +from __future__ import division + +import warnings +import numpy as np + +__ALL__ = ['get_angles', + 'get_partition_angles', + 'h__computeAngularSpeed', + 'compute_speed', + 'get_frames_per_sample'] + + +def get_angles(segment_x, segment_y, head_to_tail=False): + """ + + TODO: This might be better named as: + get_average_angle_per_frame + + Obtain the average angle (see definition in output) for a subset of the + worm for each frame. + + Parameters + ---------- + segment_x, segment_y: numpy arrays of shape (p,n) where + - 'p' is the size of the partition of the 49 points + - 'n' is the number of frames in the video + Values are typically (always) the skeleton x & y positions. + head_to_tail: bool + - if True + - closer to tail are tips (where vector is tip - tail) + - closer to head are tails + 1 2 3 4 + ======> #This is supposed to be the vector + - if False + - closer to tail are tails + - closer to head are tips + 1 2 3 4 + <====== + True means the worm points are ordered head to tail. + + Returns + ------- + Angles in degrees. A numpy array of shape (n) and stores the worm body's + "angle" (in degrees) for each frame of video. All angles for a given + frame are averaged together. Angles are computed between sequential points + and not smoothed. + + Implementation Notes + -------------------- + JAH: I'm surprised this works given that there is relatively little control + over the density of sampling of the skeleton (or segment_x and segment_y). + As the spatial sampling changes, I would expect the results of this function + to change drastically. + + """ + + if not head_to_tail: + # reverse the worm points so we go from tail to head + segment_x = segment_x[::-1, :] + segment_y = segment_y[::-1, :] + + # Diff calculates each point's difference between the segment's points + # then we take the mean of these differences for each frame + # ignore mean of empty slice from np.nanmean + with warnings.catch_warnings(): + + # TODO: Why do we need to filter if we are catching warnings????? + # + # This warning arises when all values are NaN in an array + # This occurs in not for all values but only for some rows, other rows + # may be a mix of valid and NaN values + warnings.simplefilter("ignore") + # with np.errstate(invalid='ignore'): #doesn't work, numpy warning + # is not of the invalid type, just says "mean of empty slice" + average_diff_x = np.nanmean( + np.diff(segment_x, n=1, axis=0), axis=0) # shape (n) + average_diff_y = np.nanmean( + np.diff(segment_y, n=1, axis=0), axis=0) # shape (n) + + # angles has shape (n) and stores the worm body's "angle" + # for each frame of video + angles = np.degrees(np.arctan2(average_diff_y, average_diff_x)) + + return angles + + +def get_partition_angles(nw, partition_key, data_key='skeleton', + head_to_tail=False): + """ + + Obtain the "angle" of a subset of the 49 points of a worm for each frame. + + This is a wrapper around get_angles(). This function resolves a body + partition into actual x,y data for that function. + + Parameters + ---------- + nw: NormalizedWorm + + partition_key : str + Examples include head, tail, or body + data_key : str + The attribute of normalized worm to use. Examples include: + - skeleton + - anything else? + head_to_tail : bool + - True means the worm points are ordered head to tail. + TODO: Explain this more + + Returns + ------- + numpy array of shape (n) + Stores the worm body's "angle" (in degrees) for each frame of video. + See the get_angles() function for how the angles are calculated. + + See Also + -------- + get_angles + + """ + + # the shape of both segment_x and segment_y is (partition length, n) + segment_x, segment_y = nw.get_partition(partition_key, data_key, True) + + return get_angles(segment_x, segment_y, head_to_tail) + + +def h__computeAngularSpeed(time, segment_x, segment_y, + left_I, right_I, ventral_mode): + """ + + This function is called by compute_speeed(). + + TODO: These units are wrong ... + TODO: This is actually angular velocity + + Parameters + ---------- + time : + segment_x : numpy array, shape (p,n) + The x's of the partition being considered. + segment_y : numpy array, shape (p,n) + The y's of the partition being considered. + left_I : numpy array + For each frame, an index (earlier in time) from which to compute + the desired value. These values only exist for cases in which + both left_I and right_I were computed to be valid. + right_I : numpy array + For each frame, an index (later in time) from which to compute the + desired vlaue. + ventral_mode : + 0, 1, or 2, specifying that the ventral side is... + 0 = unknown + 1 = clockwise + 2 = anticlockwise + + Returns + ------- + a numpy array of shape n, in units of degrees per second + + See Also + -------- + compute_speed + get_angles + + """ + # Compute the body part direction for each frame + point_angle_d = get_angles(segment_x, segment_y, head_to_tail=False) + + angular_speed = point_angle_d[right_I] - point_angle_d[left_I] + + # Correct any jumps that result during the subtraction process + # i.e. 1 - 359 ~= -358 + # by forcing -180 <= angular_speed[i] <= 180 + angular_speed = (angular_speed + 180) % (360) - 180 + + # Change units from degrees per frame to degrees per second + angular_speed = angular_speed / time + + # Sign the direction for dorsal/ventral locomotion. + # if ventral_mode is anything but anticlockwise, then negate angular_speed: + if(ventral_mode < 2): + angular_speed = -angular_speed + + return angular_speed + + +def h__getSpeedIndices(frames_per_sample, good_frames_mask): + """ + + For each point, we calculate the speed using frames prior to and following + a frame. Given that some frames are not valid (have NaN), we move the index + backward (prior frame) or forward (following frame), essentially slightly + widening the time frame over which the speed is computed. + + This function determines what the indices are that each frame will use to + calculate the velocity at that frame. For example, at frame 5 we might decide + to use frames 2 and 8. + + Parameters + ---------- + frames_per_sample : int + Our sample scale, in frames. The integer must be odd. + + good_frames_mask : + Shape (num_frames), false if underlying angle is NaN + + Returns + ------- + (keep_mask,left_I,right_I) + keep_mask : shape (num_frames), this is used to indicate + which original frames have valid velocity values, + and which don't. + NOTE: sum(keep_mask) == n_valid_velocity_values + left_I : shape (n_valid_velocity_values), for a given sample, this + indicates the index to the left of (less than) the sample + that should be used to calculate the velocity + right_I : shape (n_valid_velocity_values) + + Explanation + ----------- + Assume we have the following data ........ + half_scale = 1 # minimum shift for computing value at a frame + full_scale = 2 # maximum shift for computing value at a frame + frame : 0 1 2 3 4 5 6 7 8 9 + is_good : T F T T T F T F T F + + Our first possibly valid frame is 1, because we require 1 on each side + (i.e. half_scale = 1). + + frame 0: + can't go left, left is invalid + check 1, it is invalid + check 2, it is valid, right index is 2 + + So for frame 1: + check 0, it is valid, left index is set + check 2, also valid, right index is set + + Now onto 2: + check 1, invalid + check 0, valid, left index at 0 + check 3, valid, right index is 3 + + So far we have: + left_I = [NaN, 0, 0] + right_I = [2, 2, 3] + + Interpretation: + frame 0: left index is invalid, so we don't compute the value + frame 1: compute value taking values at frame 0 (left) and frame 2 (right) + + This is a frame by frame approach, but below we actually loop over + the shifts since the shifts tends to be much less than in size than the + # of frames + + i.e. (by shift approach explained) + For all frames, check 1 to the left. For all valid frames, set these + as the left_I values for the current frames. + Do the same thing as well for frames that are 1 to the right. + Now we are done with a shift size of 1. + + Now repeat this process for a shift size of 2, however only override left_I + and right_I values if they were not set for a shift size of 1. + + + """ + + # Require that frames_per_sample be an odd integer + assert(isinstance(frames_per_sample, int)) + assert(frames_per_sample % 2 == 1) + + num_frames = len(good_frames_mask) + + # Create a "half" scale + # NOTE: Since the scale is odd, the half + # scale will be even, because we subtract 1 + scale_minus_1 = frames_per_sample - 1 + half_scale = int(scale_minus_1 / 2) + + # First frame for which we can assign a valid velocity: + start_index = half_scale + # Final frame for which we can assign a valid velocity, plus one: + end_index = num_frames - half_scale + + # These are the indices we will use to compute the speed. We add + # a half scale here to avoid boundary issues (i.e. because we pad the. We'll subtract it out later. + # See below for more details + middle_I = np.array(np.arange(start_index, end_index, 1) + half_scale, + dtype='int32') + + """ + Our start_index frame can only have one valid start frame (frame 0) + afterwards it is invalid. In other words, if frame 0 is not good, we + can't check frame -1, or -2. + + However, in general I'd prefer to avoid doing some check on the bounds + of the frames, i.e. for looking at starting frames, I'd like to avoid + checking if the frame value is 0 or greater. + + To get around this we'll pad with bad values (which we'll never match) + then shift the final indices. In this way, we can check these "frames", + as they will have positive indices. + + e.g. + scale = 5 + half_scale = 2 + + This means the first frame in which we could possibly get a valid + velocity is frame 2, computed using frames 0 and 4 + + + F F F T T <- example good_frames_mask_padded values + 0 1 2 <- true indices (frame numbers) + 0 1 2 3 4 <- temporary indices + + NOTE: Now if frame 0 is bad, we can go left by half_scale + 1 to temp + index 1 (frame -1) or even further to temp_index 0 (frame -2). we'll + never use those values however because below we state that the values + at those indices are bad (see good_frames_mask_padded) + + """ + + # This tells us whether each value is useable or not for velocity + # Better to do this out of the loop. + # For real indices (frames 1:num_frames), we rely on whether or not the + # mean position is NaN, for fake padding frames they can never be good so we + # set them to be false + stub_mask = np.zeros(half_scale, dtype=bool) + good_frames_mask_padded = \ + np.concatenate((stub_mask, good_frames_mask, stub_mask)) + + # These will be the final indices from which we estimate the velocity. + # i.e. delta_position(I) = position(right_indices(I)) - + # position(left_indices(I)) + left_I = np.empty(len(middle_I), dtype='int32') + right_I = np.empty(len(middle_I), dtype='int32') + # numpy integer arrays cannot accept NaN, which is a float concept, but + # filling them with NaN fills them with the largest negative number + # possible, -2**31. We can easily filter for this later. + left_I.fill(np.NaN) + right_I.fill(np.NaN) + + # Track which ends we haven't yet matched, for each of the middle_I's. + # since we are loopering over each possible shift, we need to track + # whether valid ends have been found for each of the middle_I's. + unmatched_left_mask = np.ones(len(middle_I), dtype=bool) + unmatched_right_mask = np.ones(len(middle_I), dtype=bool) + + # Instead of looping over each centered velocity, we loop over each possible + # shift. A shift is valid if the frame of the shift is good, and we have yet + # to encounter a match for that centered index + for shift_size in range(half_scale, frames_per_sample): + # We grab indices that are the appropriate distance from the current + # value. If we have not yet found a bound on the given side, and the + # index is valid, we keep it. + left_indices_temp = middle_I - shift_size + right_indices_temp = middle_I + shift_size + + is_good_left_mask = good_frames_mask_padded[left_indices_temp] + is_good_right_mask = good_frames_mask_padded[right_indices_temp] + + use_left_mask = unmatched_left_mask & is_good_left_mask + use_right_mask = unmatched_right_mask & is_good_right_mask + + # Change only those left_I's to our newly shifted outwards + # left_indices_temp, that the use_left_mask says should be used. + left_I[use_left_mask] = left_indices_temp[use_left_mask] + right_I[use_right_mask] = right_indices_temp[use_right_mask] + + # Flag the matched items as being matched + unmatched_left_mask[use_left_mask] = False + unmatched_right_mask[use_right_mask] = False + + # Remove the offset used to pad the numbers (discussed above) + # We have to avoid decrementing negative numbers because our negative + # number is our NaN proxy and it's already as negative as it can be + # without wrapping back up to positive again + left_I[left_I > 0] -= half_scale + right_I[right_I > 0] -= half_scale + middle_I -= half_scale + + # Filter down to usable values, in which both left and right are defined + # Remember than np.NaN is not valid number for integer numpy arrays + # so instead of checking for which entries are NaN, we check for + # which entries are negative, since no array indices can be + # negative! + valid_indices_mask = (left_I >= 0) & (right_I >= 0) + left_I = left_I[valid_indices_mask] + right_I = right_I[valid_indices_mask] + middle_I = middle_I[valid_indices_mask] + + keep_mask = np.zeros(num_frames, dtype=bool) + keep_mask[middle_I] = True + + # sum(keep_mask) should equal the number of valid velocity values + # left_I and right_I should store just these valid velocity values + assert sum(keep_mask) == len(left_I) == len(right_I) + assert all(left_I >= 0) and all(left_I < num_frames) + assert all(right_I >= 0) and all(right_I < num_frames) + return keep_mask, left_I, right_I + + +def compute_speed(fps, sx, sy, avg_body_angle, sample_time, ventral_mode=0): + """ + + Previous Name: compute_velocity + + The speed is computed not using the nearest values but values + that are separated by a sufficient time (sample_time). + If the encountered values are not valid (i.e. NaNs), the width of + time is expanded up to a maximum of 2*sample_time (or technically, + 1 sample_time in either direction) + + Parameters + ---------- + sx, sy: Two numpy arrays of shape (p, n) where p is the size of the + partition of worm's 49 points, and n is the number of frames + in the video + The worm skeleton's x and y coordinates, respectively. + + avg_body_angle: 1-dimensional numpy array of floats, of size n. + The angles between the mean of the first-order differences. + + sample_time: int + Time over which to compute velocity, in seconds. + + ventral_mode: int (0,1,2) + Options for specification of the ventral side: + - 0 = unknown (default) + - 1 = clockwise + - 2 = anticlockwise + + Returns + ------- + (speed, angular_speed, motion_direction) + Three numpy arrays of shape (n), speed, angular_speed, motion_direction + speed: + angular_speed: + motion_direction: + + Known Callers + ------------- + LocomotionVelocity + + """ + + num_frames = np.shape(sx)[1] + speed = np.full((num_frames), np.nan) + angular_speed = np.full((num_frames), np.nan) + motion_direction = np.full((num_frames), np.nan) + + # We need to go from a time over which to compute the velocity + # to a # of samples. The # of samples should be odd. + frames_per_sample = get_frames_per_sample(fps, sample_time) + + # If we don't have enough frames to satisfy our sampling scale, + # return with nothing. + if(frames_per_sample > num_frames): + return speed, angular_speed, motion_direction + + # Compute the indices that we will use for computing the velocity. We + # calculate the velocity roughly centered on each sample, but with a + # considerable width between frames that smooths the velocity estimate. + good_frames_mask = ~np.isnan(avg_body_angle) + keep_mask, left_I, right_I = h__getSpeedIndices(frames_per_sample, + good_frames_mask) + + # 1) Compute speed + # -------------------------------------------------------- + # Centroid of the current skeletal segment, frame-by-frame: + x_mean = np.mean(sx, 0) + y_mean = np.mean(sy, 0) + + dX = x_mean[right_I] - x_mean[left_I] + dY = y_mean[right_I] - y_mean[left_I] + + distance = np.sqrt(dX ** 2 + dY ** 2) + time = (right_I - left_I) / fps + + #import pdb + # pdb.set_trace() + + speed[keep_mask] = distance / time + + # 2) Compute angular speed (Formally known as direction :/) + # -------------------------------------------------------- + angular_speed[keep_mask] = h__computeAngularSpeed(time, sx, sy, + left_I, right_I, + ventral_mode) + + # 3) Sign the speed. + # ------------------------------------------------------------ + # We want to know how the worm's movement direction compares + # to the average angle it had (apparently at the start) + motion_direction[keep_mask] = np.degrees(np.arctan2(dY, dX)) + + # This recentres the definition, as we are really just concerned + # with the change, not with the actual value + body_direction = np.full((num_frames), np.nan) + body_direction[keep_mask] = motion_direction[keep_mask] - \ + avg_body_angle[left_I] + + # Force all angles to be within -pi and pi + with np.errstate(invalid='ignore'): + body_direction = (body_direction + 180) % (360) - 180 + + # Sign speed[i] as negative if the angle + # body_direction[i] lies in Q2 or Q3 + with np.errstate(invalid='ignore'): + speed[abs(body_direction) > 90] = -speed[abs(body_direction) > 90] + + # (Added for wormPathCurvature) + # Sign motion_direction[i] as negative if the angle + # body_direction[i] lies in Q3 or Q4 + + with np.errstate(invalid='ignore'): + motion_direction[body_direction < 0] = - \ + motion_direction[body_direction < 0] + + if(ventral_mode == 2): # i.e. if ventral side is anticlockwise: + motion_direction = -motion_direction + + return speed, angular_speed, motion_direction + + +def get_frames_per_sample(fps, sample_time): + """ + Converts a specified sample_time from seconds to # of samples. + + Parameters + ---------- + fps: float + Video sampling rate + sample_time: float + Duration (in seconds) to sample + + Returns + ------- + int + The # of samples that corresponds to the specified sample_time. This + value will always be odd. Values are always increased to obtain the + odd value (e.g. an exact sampling of 4 samples becomes 5). Odd values + are meant to specify grabbing a set of points that is inclusive of the + end point so as to occupy the specified duration. + """ + + ostensive_sampling_scale = sample_time * fps + + half_scale = int(round(ostensive_sampling_scale / 2)) + sampling_scale = 2 * half_scale + 1 + + assert(isinstance(sampling_scale, int) or sampling_scale.is_integer()) + return int(sampling_scale) diff --git a/tierpsy/features/open_worm_analysis_toolbox/features/worm_features.py b/tierpsy/features/open_worm_analysis_toolbox/features/worm_features.py new file mode 100755 index 00000000..474bf08b --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/features/worm_features.py @@ -0,0 +1,1259 @@ +# -*- coding: utf-8 -*- +""" +WormFeatures module + +Contains the classes needed for users to calculate the features +of a worm from a NormalizedWorm instance. + +Classes +--------------------------------------- +WormMorphology +WormLocomotion +WormPosture +WormPath + +WormFeatures +FeatureProcessingSpec + + +A translation of Matlab code written by Jim Hokanson, in the +SegwormMatlabClasses GitHub repo. + +Original code path: +SegwormMatlabClasses/+seg_worm/@feature_calculator/features.m + +""" + +import copy +import csv +import os +import warnings +import tables # For loading from disk +import numpy as np +import collections # For namedtuple, OrderedDict +import pandas as pd + +from .. import utils + +from . import feature_processing_options as fpo +from . import events +from . import generic_features +from . import path_features +from . import posture_features +from . import locomotion_features +from . import locomotion_bends +from . import locomotion_turns +from . import morphology_features + + +FEATURE_SPEC_CSV_PATH = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'feature_metadata', + 'features_list.csv') + +""" +=============================================================================== +=============================================================================== +""" + + +class WormMorphology(object): + """ + The worm's morphology features class. + + Nature Methods Description + --------------------------------------- + 1. Length. Worm length is computed from the segmented skeleton by + converting the chain-code pixel length to microns. + + 2. Widths. Worm width is computed from the segmented skeleton. The + head, midbody, and tail widths are measured as the mean of the widths + associated with the skeleton points covering their respective sections. + These widths are converted to microns. + + 3. Area. The worm area is computed from the number of pixels within the + segmented contour. The sum of the pixels is converted to microns2. + + 4. Area/Length. + + 5. Midbody Width/Length. + + + Notes + --------------------------------------- + Formerly SegwormMatlabClasses / + +seg_worm / @feature_calculator / getMorphologyFeatures.m + + Old files that served as a reference: + morphology_process.m + schaferFeatures_process.m + + """ + + def __init__(self, features_ref): + """ + + Parameters: + ----------- + features_ref : WormFeatures + + """ + print('Calculating Morphology Features') + + nw = features_ref.nw + + self.length = nw.length + + self.width = morphology_features.Widths(features_ref) + + self.area = nw.area + + self.area_per_length = self.area / self.length + self.width_per_length = self.width.midbody / self.length + + @classmethod + def from_disk(cls, m_var): + + self = cls.__new__(cls) + + self.length = utils._extract_time_from_disk(m_var, 'length') + self.width = morphology_features.Widths.from_disk(m_var['width']) + self.area = utils._extract_time_from_disk(m_var, 'area') + self.area_per_length = utils._extract_time_from_disk( + m_var, 'areaPerLength') + self.width_per_length = utils._extract_time_from_disk( + m_var, 'widthPerLength') + + return self + + def __eq__(self, other): + + return utils.correlation( + self.length, + other.length, + 'morph.length') and self.width == other.width and utils.correlation( + self.area, + other.area, + 'morph.area') and utils.correlation( + self.area_per_length, + other.area_per_length, + 'morph.area_per_length') and utils.correlation( + self.width_per_length, + other.width_per_length, + 'morph.width_per_length') + + def __repr__(self): + return utils.print_object(self) + + def save_for_gepetto(self): + # See + # https://github.com/openworm/org.geppetto.recording/blob/master/org/geppetto/recording/CreateTestGeppettoRecording.py + pass + + +""" +=============================================================================== +=============================================================================== +""" + + +class WormLocomotion(object): + + """ + The worm's locomotion features class. + + Attributes + ---------- + velocity : + motion_events : + motion_mode : + crawling_bends : + foraging_bends : + turns : + + """ + + def __init__(self, features_ref): + """ + Initialization method for WormLocomotion + + Parameters + ---------- + features_ref : WormFeatures + + """ + print('Calculating Locomotion Features') + + nw = features_ref.nw + video_info = features_ref.video_info + + self.velocity = locomotion_features.LocomotionVelocity(features_ref) + + self.motion_events = \ + locomotion_features.MotionEvents(features_ref, + self.velocity.midbody.speed, + nw.length) + + self.motion_mode = self.motion_events.get_motion_mode() + + self.crawling_bends = locomotion_bends.LocomotionCrawlingBends( + features_ref, + nw.angles, + self.motion_events.is_paused, + video_info.is_segmented) + + self.foraging_bends = locomotion_bends.LocomotionForagingBends( + features_ref, + video_info.is_segmented, + video_info.ventral_mode) + + is_stage_movement = video_info.is_stage_movement + + self.turns = locomotion_turns.LocomotionTurns( + features_ref, + nw.angles, + is_stage_movement, + self.velocity.get_midbody_distance(), + nw.skeleton_x, + nw.skeleton_y) + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + + # TODO: Allow for a global config that provides more info ... + # in case anything fails ... + # + # JAH: I'm not sure how this will work. We might need to move + # away from the equality operator to a function that returns + # an equality result + + # The order here matches the order the properties are populated + # in the constructor + same_locomotion = True + + if not (self.velocity == other.velocity): + same_locomotion = False + + if not (self.motion_events == other.motion_events): + same_locomotion = False + + # Test motion codes + if not utils.correlation(self.motion_mode, other.motion_mode, + 'locomotion.motion_mode'): + same_locomotion = False + + # TODO: Define ne for all functions (instead of needing not(eq)) + if not (self.crawling_bends == other.crawling_bends): + print('Mismatch in locomotion.crawling_bends events') + same_locomotion = False + + if not (self.foraging_bends == other.foraging_bends): + print('Mismatch in locomotion.foraging events') + same_locomotion = False + + # TODO: Make eq in events be an error - use test_equality instead + # NOTE: turns is a container class that implements eq, and is not + # an EventList + if not (self.turns == other.turns): + print('Mismatch in locomotion.turns events') + same_locomotion = False + + return same_locomotion + + @classmethod + def from_disk(cls, m_var): + """ + Parameters + ---------- + + """ + + self = cls.__new__(cls) + + self.velocity = locomotion_features.LocomotionVelocity.from_disk(m_var) + + self.motion_events = locomotion_features.MotionEvents.from_disk(m_var) + + self.motion_mode = self.motion_events.get_motion_mode() + + bend_ref = m_var['bends'] + self.crawling_bends = \ + locomotion_bends.LocomotionCrawlingBends.from_disk(bend_ref) + + self.foraging_bends = \ + locomotion_bends.LocomotionForagingBends.\ + from_disk(bend_ref['foraging']) + + self.turns = locomotion_turns.LocomotionTurns.from_disk(m_var['turns']) + + return self + + +""" +=============================================================================== +=============================================================================== +""" + + +class WormPosture(object): + + """ + Worm posture feature class. + + Notes + ----- + Formerly: + SegwormMatlabClasses/+seg_worm/@feature_calculator/getPostureFeatures.m + + Former usage: + + Prior to this, it was originally "schaferFeatures_process" + + Formerly, + - Indices were inconsistently defined for bends relative to other code + - stdDev for bends is signed as well, based on means ... + + Unfinished Status + --------------------------------------- + (@JimHokanson, is this still true?) + - seg_worm.feature_helpers.posture.wormKinks - not yet examined + - distance - missing input to function, need to process locomotion + first + + """ + + def __init__(self, features_ref, midbody_distance): + """ + Initialization method for WormPosture + + Parameters + ---------- + normalized_worm: a NormalizedWorm instance + + """ + print('Calculating Posture Features') + + self.bends = posture_features.Bends.create(features_ref) + + self.eccentricity, self.orientation = \ + posture_features.get_eccentricity_and_orientation(features_ref) + + amp_wave_track = posture_features.AmplitudeAndWavelength( + self.orientation, features_ref) + + self.amplitude_max = amp_wave_track.amplitude_max + self.amplitude_ratio = amp_wave_track.amplitude_ratio + self.primary_wavelength = amp_wave_track.primary_wavelength + self.secondary_wavelength = amp_wave_track.secondary_wavelength + self.track_length = amp_wave_track.track_length + + self.kinks = posture_features.get_worm_kinks(features_ref) + + self.coils = posture_features.get_worm_coils(features_ref, + midbody_distance) + + self.directions = posture_features.Directions(features_ref) + + self.skeleton = posture_features.Skeleton(features_ref, 'temp') + + self.eigen_projection = posture_features.get_eigenworms(features_ref) + + """ + We need these six @property methods because otherwise eigen_projections + are the only first-class sub-extended features that are not fully + addressable by nested object references. Without these, I'd have to say: + + worm_features_object.posture.eigen_projection[0] + + Instead I can say: + + worm_features_object.posture.eigen_projection0 + + ...which is crucial for the object-data mapping, for example when + pulling a pandas DataFrame via WormFeatures.getDataFrame. + """ + @property + def eigen_projection0(self): + return self.eigen_projection[0] + + @property + def eigen_projection1(self): + return self.eigen_projection[1] + + @property + def eigen_projection2(self): + return self.eigen_projection[2] + + @property + def eigen_projection3(self): + return self.eigen_projection[3] + + @property + def eigen_projection4(self): + return self.eigen_projection[4] + + @property + def eigen_projection5(self): + return self.eigen_projection[5] + + @classmethod + def from_disk(cls, p_var): + + self = cls.__new__(cls) + + self.bends = posture_features.Bends.from_disk(p_var['bends']) + + temp_amp = p_var['amplitude'] + + self.amplitude_max = utils._extract_time_from_disk(temp_amp, 'max') + self.amplitude_ratio = utils._extract_time_from_disk(temp_amp, + 'ratio') + + temp_wave = p_var['wavelength'] + self.primary_wavelength = utils._extract_time_from_disk(temp_wave, + 'primary') + self.secondary_wavelength = utils._extract_time_from_disk(temp_wave, + 'secondary') + + self.track_length = utils._extract_time_from_disk(p_var, + 'tracklength') + self.eccentricity = utils._extract_time_from_disk(p_var, + 'eccentricity') + self.kinks = utils._extract_time_from_disk(p_var, 'kinks') + + self.coils = events.EventListWithFeatures.from_disk(p_var['coils'], + 'MRC') + + self.directions = \ + posture_features.Directions.from_disk(p_var['directions']) + + # TODO: Add contours + + self.skeleton = posture_features.Skeleton.from_disk(p_var['skeleton']) + + temp_eigen_projection = \ + utils._extract_time_from_disk(p_var, 'eigenProjection', + is_matrix=True) + + self.eigen_projection = temp_eigen_projection.transpose() + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + + # TODO: It would be nice to see all failures before returning false + # We might want to make a comparison class that handles these details + # and then prints the results + + # Doing all of these comparisons and then computing the results + # allows any failures to be printed, which at this point is useful for + # getting the code to align + + # Note that the order of these matches the order in which they are + # populated in the constructor + eq_bends = self.bends == other.bends + eq_amplitude_max = utils.correlation(self.amplitude_max, + other.amplitude_max, + 'posture.amplitude_max') + eq_amplitude_ratio = utils.correlation(self.amplitude_ratio, + other.amplitude_ratio, + 'posture.amplitude_ratio', + high_corr_value=0.985) + + eq_primary_wavelength = \ + utils.correlation(self.primary_wavelength, + other.primary_wavelength, + 'posture.primary_wavelength', + merge_nans=True, + high_corr_value=0.97) + + eq_secondary_wavelength = \ + utils.correlation(self.secondary_wavelength, + other.secondary_wavelength, + 'posture.secondary_wavelength', + merge_nans=True, + high_corr_value=0.985) + + # TODO: We need a more lazy evaluation for these since they don't match + # Are they even close? + # We could provide a switch for exactly equal vs mimicing the old setup + # in which our goal could be to shoot for close + eq_track_length = utils.correlation(self.track_length, + other.track_length, + 'posture.track_length') + eq_eccentricity = utils.correlation(self.eccentricity, + other.eccentricity, + 'posture.eccentricity', + high_corr_value=0.99) + eq_kinks = utils.correlation(self.kinks, other.kinks, + 'posture.kinks') + + eq_coils = self.coils.test_equality(other.coils, 'posture.coils') + eq_directions = self.directions == other.directions + eq_skeleton = self.skeleton == other.skeleton + eq_eigen_projection = \ + utils.correlation(np.ravel(self.eigen_projection), + np.ravel(other.eigen_projection), + 'posture.eigen_projection') + + # TODO: Reorder these as they appear above + return \ + eq_bends and \ + eq_eccentricity and \ + eq_amplitude_ratio and \ + eq_track_length and \ + eq_kinks and \ + eq_primary_wavelength and \ + eq_secondary_wavelength and \ + eq_amplitude_max and \ + eq_skeleton and \ + eq_coils and \ + eq_directions and \ + eq_eigen_projection + + +""" +=============================================================================== +=============================================================================== +""" + + +class WormPath(object): + + """ + Worm posture feature class. + + Properties + ------------------------ + range : + duration : + coordinates : + curvature : + + Notes + --------------------------------------- + Formerly SegwormMatlabClasses / + +seg_worm / @feature_calculator / getPathFeatures.m + + """ + + def __init__(self, features_ref): + """ + Initialization method for WormPosture + + Parameters: + ----------- + features_ref: a WormFeatures instance + """ + print('Calculating Path Features') + + nw = features_ref.nw + + self.range = path_features.Range(nw.contour_x, nw.contour_y) + + # Duration (aka Dwelling) + self.duration = path_features.Duration(features_ref, 'temp') + + self.coordinates = path_features.Coordinates(features_ref, 'temp') + + # Curvature + self.curvature = path_features.worm_path_curvature(features_ref) + + # TODO: Move to class in path_features + @classmethod + def _create_coordinates(cls, x, y): + Coordinates = collections.namedtuple('Coordinates', ['x', 'y']) + return Coordinates(x, y) + + @classmethod + def from_disk(cls, path_var): + + self = cls.__new__(cls) + + self.range = path_features.Range.from_disk(path_var) + self.duration = path_features.Duration.from_disk(path_var['duration']) + + self.coordinates = \ + path_features.Coordinates.from_disk(path_var['coordinates']) + + # Make a call to utils loader + self.curvature = path_var['curvature'].value[:, 0] + + return self + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + + return \ + self.range == other.range and \ + self.duration == other.duration and \ + self.coordinates == other.coordinates and \ + utils.correlation(self.curvature, other.curvature, + 'path.curvature', + high_corr_value=0.95, + merge_nans=True) + + # NOTE: Unfortunately the curvature is slightly different. It + # looks the same but I'm guessing there are a few off-by-1 errors + # in it. + + +""" +=============================================================================== +=============================================================================== +""" + + +class WormFeatures(object): + + """ + This is the new features class. It will eventually replace the old class + when things are all ready. + + Accessing Features + ------------------ + Features should normally be accessed via get_feature() or via iteration + over this object. + + Design Decisions + ---------------- + # When iterating over the features, should we include null features? + => YES? => this introduces stability between different videos + # What should be returned from get_feature when it doesn't exist? + => An empty feature object NYI + # What is returned when a feature can't be computed because of a missing + dependency? + => An empty feature object NYI + + + . Alternatively + it is possible to access features directly via the 'features' attribute + however this attribute only contains computed features. + + + Attributes + ---------- + video_info : + options : + nw : + timer : + specs : {FeatureProcessingSpec} + features : {Feature} + Contains all computed features that have been requested by the user. + + + When loading from Schafer File + h : hdf5 file reference + + + + """ + + def __init__(self, nw, processing_options=None, specs='all'): + """ + + Parameters + ---------- + nw : NormalizedWorm object + specs : + + #The options will most likely change. We should have the options + #be accessible from the specs + processing_options: movement_validation.features.feature_processing_options + + #TODO: Expose just passing in feature names as well + + + """ + if processing_options is None: + processing_options = \ + fpo.FeatureProcessingOptions() + + # These are saved locally for reference by others when processing + self.video_info = nw.video_info + + self.options = processing_options + self.nw = nw + self.timer = utils.ElementTimer() + + self.initialize_features() + + # TODO: We should eventually support a list of specs as well + # TODO: We might also allow transforming the specs (like changing options), + # which this doesn't handle since we are only extracting the names + if isinstance(specs, pd.core.frame.DataFrame): + # This wouldn't be good if the specs have changed. + # We would need to change the initialize_features() call + self.get_features(specs['feature_name']) + else: + self._retrieve_all_features() + + def __iter__(self): + """ Let's allow iteration over the features """ + all_features = self.features + for temp in all_features: + yield temp + + def copy(self, new_features): + """ + This method was introduced for "feature expansion" + + Parameters + ---------- + new_features : list or dict (only list supported) + """ + new_self = self.__new__(self.__class__) + + # We need to setup features and specs + # get specs from features + + # specs : {FeatureProcessingSpec} + # features : {Feature} + + d = self.__dict__ + for key in d: + temp = d[key] + if key in ['features', 'specs', 'h', '_temp_features']: + pass + # do nothing + # setattr(new_self,'spec',temp.copy()) + else: + setattr(new_self, key, copy.copy(temp)) + + # Currently assuming a list for features + + temp_features = collections.OrderedDict() + new_specs = collections.OrderedDict() + for cur_feature in new_features: + feature_name = cur_feature.name + temp_features[feature_name] = cur_feature + new_specs[feature_name] = cur_feature.spec + + new_self._features = temp_features + new_self.specs = new_specs + + return new_self + + @classmethod + def from_disk(cls, data_file_path): + """ + Creates an instance of the class from disk. + + Ideally we would support loading of any file type. For now + we'll punt on building in any logic until we have more types to deal + with. + """ + # This ideally would allow us to load any file from disk. + # + # For now we'll punt on this logic + return cls._from_schafer_file(data_file_path) + + @classmethod + def _from_schafer_file(cls, data_file_path): + """ + Load features from the Schafer lab feature (.mat) files. + """ + + self = cls.__new__(cls) + self.timer = utils.ElementTimer() + self.initialize_features() + + # I'm not thrilled about this approach. I think we should + # move the source specification into intialize_features + all_specs = self.specs + for key in all_specs: + spec = all_specs[key] + spec.source = 'mrc' + + # Load file reference for getting files from disk + with tables.File(data_file_path, 'r') as fid: + self.h = fid.get_node('worm')[:] + + # Retrieve all features + # Do we need to differentiate what we can and can not load? + self._retrieve_all_features() + + return self + + @property + def features(self): + """ + We need to filter out temporary features and features that are + not user requested + """ + d = self._features + return [ + d[x] for x in d if ( + x is not None and not d[x].is_temporary and d[x].is_user_requested)] + + def _retrieve_all_features(self): + """ + Simple function for retrieving all features. + """ + spec_dict = self.specs + # Trying to avoid 2v3 differences in Python dict iteration + for key in spec_dict: + spec = spec_dict[key] + # TODO: We could pass in the spec instance ... + # rather than resolving the instance from the name + + try: + + self._get_and_log_feature(spec.name) + except Exception as e: + msg_warn = '{} was NOT calculated. {}'.format(spec.name, e) + warnings.warn(msg_warn) + + def initialize_features(self): + """ + Reads the feature specs and initializes necessary attributes. + """ + + f_specs = get_feature_specs(as_table=False) + + self.specs = \ + collections.OrderedDict([(value.name, value) for value in f_specs]) + + self._features = collections.OrderedDict() + + # This will be removed soon + self._temp_features = collections.OrderedDict() + + def get_features(self, feature_names): + """ + This is the public interface to the user for retrieving a feature. + + Parameters + ---------- + feature_names : {string, list, pandas.Series} + + Returns + ------- + TODO: finish + + """ + if isinstance( + feature_names, + list) or isinstance( + feature_names, + pd.Series): + output = [] + for feature_name in feature_names: + self._get_and_log_feature(feature_name) + output.append(self._features[feature_name]) + else: + # Currently assuming a string + self._get_and_log_feature(feature_names) + output = self._features[feature_names] + + return output + + def _get_and_log_feature(self, feature_name, internal_request=False): + """ + TODO: Update documentation. get_features is now the public interface + + A feature is returned if it has already been computed. If it has not + been previously computed it is computed then returned. + + This function may become recursive if the feature being computed + requires other features to be computed. + + Improvements + ------------ + - retrieve multiple features, probably via a different method + - allow passing in specs + - have a feature that returns specs by regex or wildcard + - have this call an internal requester that exposes the internal_request + rather than exposing it to the user + + See Also + -------- + FeatureProcessingSpec.get_feature + """ + + # Early return if already computed + #---------------------------------- + if feature_name in self._features: + # Check if it is being requested now via the user instead of + # internally + cur_feature = self._features[feature_name] + if not cur_feature.is_user_requested and not internal_request: + # If the logged feature is not currently user requested but + # this request is from the user, then toggle the value + # + # otherwise we don't care, leave it as it currently is + cur_feature.is_user_requested = True + + return cur_feature + + # Ensure that the feature name is valid + #------------------------------------------- + if feature_name in self.specs: + spec = self.specs[feature_name] + else: + raise KeyError( + 'Specified feature name not found in the feature specifications') + + temp = spec.compute_feature(self, internal_request=internal_request) + + # TODO: this will change + # A feature can return None, which means we can't ask the feature + # what the name is, so we go based on the spec + self._features[spec.name] = temp + return temp + + def __repr__(self): + return utils.print_object(self) + + def get_histograms(self, other_feature_sets=None): + """ + + TODO: + - Create histogram manager + - + + Improvements: + - allow filtering of which features should be included + + """ + + pass + + @staticmethod + def get_feature_spec(extended=False, show_temp_features=False): + """ + TODO: This method needs to be documented! + + I think this method is old. Jim and Michael need to talk about what + this is trying to do and how to incorporate this into the current + design. Most likely we would load via pandas and then have our current + methods rely on processing of the loaded pandas data. + + Parameters + ---------- + extended: boolean (default False) + If True, return the full 726 features, not just the 93. + show_temp_features: boolean + If False, return only actual features. Raises an exception + if both show_temp_features and extended are True + + Returns + ------------ + A pandas.DataFrame object + Contains all the feature specs in one table + + """ + # Use pandas to load the features specification + feature_spec_path = os.path.join('..', 'documentation', + 'database schema', + 'Features Specifications.xlsx') + + # Let's ignore a PendingDeprecationWarning here since my release of + # pandas seems to be using tree.getiterator() instead of tree.iter() + # It's probably fixed in the latest pandas release already + # Here's an exmaple of the issue in a different repo, along with the + # fix. https://github.com/python-excel/xlrd/issues/104 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + excel_file = pd.ExcelFile(feature_spec_path) + + feature_spec = excel_file.parse('FeatureSpecifications') + + # I haven't bothered to work on the logic that I'd need + # if extended == True and show_temp_features == True, so + # let's just raise an exception in that case + assert(not (extended and show_temp_features)) + + if not show_temp_features: + feature_spec = feature_spec[feature_spec['is_feature'] == 'y'] + + #feature_spec = feature_spec[['remove_partial_events']].astype(bool) + #feature_spec = feature_spec[['make_zero_if_empty']].astype(bool) + + if not extended: + feature_spec = feature_spec.set_index('sub-extended feature ID') + return feature_spec + else: + # Extend the 93 features into 726, then return that + all_data_types = ['all', 'absolute', 'positive', 'negative'] + all_motion_types = ['all', 'forward', 'paused', 'backward'] + + motion_types = pd.DataFrame({'motion_type': + ['All'] + all_motion_types}) + motion_types['is_time_series'] = [False, True, True, True, True] + + data_types = pd.DataFrame({'data_type': ['All'] + all_data_types}) + data_types['is_signed'] = [False, True, True, True, True] + + # The effect of these two left outer joins is to duplicate any + # feature rows where we have multiple data or motion types. + # Thus the number of rows expands from 93 to 726 + feature_spec_expanded = feature_spec.merge(motion_types, + on='is_time_series', + how='left') + + feature_spec_expanded = feature_spec_expanded.merge(data_types, + on='is_signed', + how='left') + + feature_spec_expanded = \ + feature_spec_expanded.set_index('sub-extended feature ID', + 'motion_type', 'data_type') + + return feature_spec_expanded + + +def get_feature_specs(as_table=True): + """ + + Loads all specs that specify how features should be processed/created. + + Currently in /features/feature_metadata/features_list.csv + + Parameters + ---------- + as_table : logical + If true, returns a Pandas dataframe. Otherwise it returns a list of + FeatureProcessingSpec objects. + + See Also + -------- + FeatureProcessingSpec + + Returns + ------- + a list of FeatureProcessingSpec + + """ + + if as_table: + #spec_df = pd.read_csv(FEATURE_SPEC_CSV_PATH,dtype={'is_final_feature':bool},true_values=['y'],false_values =['f']) + # I don't like not specifying data types initially since I don't trust + # them to guess correctly. TODO: Need to make things explicit + # there are already some incorrect guesses + df = pd.read_csv(FEATURE_SPEC_CSV_PATH) + df.is_final_feature = df.is_final_feature == 'y' + df['is_temporary'] = ~df.is_final_feature + + #self.flags = d['processing_flags'] + + # This number conversion shouldn't have happened since I think it is + # better to compare to '1' + df.is_signed = df.is_signed == 1 + df.has_zero_bin = df.has_zero_bin == 1 + df.remove_partial_events = df.remove_partial_events == 1 + + # Why didn't these convert to numbers then? + df.make_zero_if_empty = df.make_zero_if_empty == '1' + df.is_time_series = df.is_time_series == '1' + + return df + + else: + f_specs = [] + + with open(FEATURE_SPEC_CSV_PATH) as feature_metadata_file: + feature_metadata = csv.DictReader(feature_metadata_file) + + for row in feature_metadata: + f_specs.append(FeatureProcessingSpec(row)) + + return f_specs + + +class FeatureProcessingSpec(object): + """ + Information on how to get a feature. + + These are all loaded from a csv specificaton file. See the function + get_feature_processing_specs which instatiates these specifications. + + Attributes + ---------- + source : + - new - from the normalized worm + - mrc + name : string + Feature name + module_name : string + Name of the module that contains the executing code + module : module + A module instance + class_name : string + Name of the class which should be called to create the feature + class_method : method + A method instance + flags : string + This is a string that can be passed to the class method + + See Also + -------- + get_feature_processing_specs + + """ + + # This is how I am resolving a string to a module. + # Perhaps there is a better way ... + modules_dict = {'morphology_features': morphology_features, + 'locomotion_features': locomotion_features, + 'generic_features': generic_features, + 'locomotion_bends': locomotion_bends, + 'locomotion_turns': locomotion_turns, + 'path_features': path_features, + 'posture_features': posture_features} + + def __init__(self, d): + """ + Parameters + ---------- + d: dict + Data in a row of the features file + + See Also + -------- + get_feature_processing_specs + + """ + + self.source = 'new' + + self.is_temporary = d['is_final_feature'] == 'n' + self.name = d['feature_name'] + self.module_name = d['module'] + + # TODO: Wrap this in a try clause with clear error if the module + # hasn't been specified in the dictionary + self.module_name = self.module_name + + # We won't store these so as to facilitate pickeling + #----------------------------------------------------- + #self.module = self.modules_dict[self.module_name] + #self.class_method = getattr(self.module, self.class_name) + + self.class_name = d['class_name'] + + # We retrieve the class constructor or function from the module + + self.flags = d['processing_flags'] + + # TODO: We might write a __getattr__ function and just hold + # onto the dict + self.type = d['type'] + self.category = d['category'] + self.display_name = d['display_name'] + self.short_display_name = d['short_display_name'] + self.units = d['units'] + if self.is_temporary: + self.bin_width = 1 + else: + self.bin_width = float(d['bin_width']) + + self.is_signed = d['is_signed'] == '1' + self.has_zero_bin = d['has_zero_bin'] == '1' + self.signing_field = d['signing_field'] + self.remove_partial_events = d['remove_partial_events'] == '1' + self.make_zero_if_empty = d['make_zero_if_empty'] == '1' + self.is_time_series = d['is_time_series'] == '1' + + def compute_feature(self, wf, internal_request=False): + """ + Note, the only caller of this function should be from: + WormFeaturesDos._get_and_log_feature() + + This method takes care of the logic of retrieving a feature. + + ALL features are created or loaded via this method. + + Arguments + --------- + wf : WormFeaturesDos + This is primarily needed to facilitate requesting additional + features from the feature currently being computed + + """ + + #print("feature: " + self.name) + + # Resolve who is going to populate the feature + #-------------------------------------------- + #'source' should be overwritten by the feature loading method + # after loading all the specs + + module = self.modules_dict[self.module_name] + class_method = getattr(module, self.class_name) + + if self.source == 'new': + final_method = class_method + else: # mrc #TODO: make explicit check for MRC otherwise throw an error + final_method = getattr(class_method, 'from_schafer_file') + + + timer = wf.timer + timer.tic() + + # The flags input is optional, if no flag is present + # we currently assume that the constructor doesn't require + # the input + if len(self.flags) == 0: + temp = final_method(wf, self.name) + else: + # NOTE: All current flags are just a single string. We don't have + # anything fancy in place for multiple parameters or for doing + # any fancy parsing + temp = final_method(wf, self.name, self.flags) + + elapsed_time = timer.toc(self.name) + + # This is an assigment of global attributes that the spec knows about + # This could eventually be handled by a super() call to Feature + # but for now this works. The only problem is that we override values + # after the constructor rather than letting the constructor override + # values from super() + + if temp is None: + import pdb + pdb.set_trace() + + # TODO: This is confusing for children features that rely on a + # temporary parent. Unfortunately this is populated after + # the feature has been computed. + # We could allow children to copy the value from the parent but + # then we would need to check for that here ... + temp.computation_time = elapsed_time + + # We can get rid of the name assignments in class and use this ... + temp.name = self.name + temp.is_temporary = self.is_temporary + temp.spec = self + temp.is_user_requested = not internal_request + + # Problem features + #-------------------------- + if not hasattr(temp, 'missing_from_disk'): + temp.missing_from_disk = False + + if not hasattr(temp, 'missing_dependency'): + temp.missing_dependency = False + + if not hasattr(temp, 'empty_video'): + temp.empty_video = False + + if not hasattr(temp, 'no_events'): + temp.no_events = False + + return temp + + def __repr__(self): + return utils.print_object(self) + + def copy(self): + # Not sure if I'll need to do anything here ... + return copy.copy(self) diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/__init__.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/basic_worm.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/basic_worm.py new file mode 100755 index 00000000..4757838d --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/basic_worm.py @@ -0,0 +1,716 @@ +# -*- coding: utf-8 -*- +""" +BasicWorm, WormPartition, JSON_Serializer + +Credit to Christopher R. Wagner at +http://robotfantastic.org/serializing-python-data-to-json-some-edge-cases.html +for the following six functions: +isnamedtuple +serialize +restore +data_to_json +json_to_data +nested_equal + +""" + +import numpy as np +import warnings +import copy +import tables +import matplotlib.pyplot as plt + +import json +from collections import namedtuple, Iterable, OrderedDict + +from .. import config, utils +from .pre_features import WormParsing +from .video_info import VideoInfo + +#%% + + +class JSON_Serializer(): + """ + A class that can save all of its attributes to a JSON file, or + load them from a JSON file. + + """ + + def __init__(self): + pass + + def save_to_JSON(self, JSON_path): + serialized_data = data_to_json(list(self.__dict__.items())) + + with open(JSON_path, 'w') as outfile: + outfile.write(serialized_data) + + def load_from_JSON(self, JSON_path): + with open(JSON_path, 'r') as infile: + serialized_data = infile.read() + + member_list = json_to_data(serialized_data) + + for member in member_list: + setattr(self, member[0], member[1]) + +#%% + + +class UnorderedWorm(JSON_Serializer): + """ + Encapsulates the notion of worm contour or skeleton data that might have + been obtained from a computer vision operation + + * We don't assume the contour or skeleton points are evenly spaced, + but we do assume they are in order as you walk along the skeleton. + + * We DON'T assume that the head and the tail are at points 0 and -1, + respectively - hence the use of the word "unordered" in the name of this + class. + + * We don't assume that there is the same number of contour points + in each frame. This means we can't use a simple ndarray representation + for the contour. Instead, we use a list of single-dimension numpy + arrays. + + + """ + + def __init__(self, other): + attributes = ['unordered_contour', 'unordered_skeleton', + 'head', 'tail', 'video_info'] + + if other is None: + for a in attributes: + setattr(self, a, None) + + else: + # Copy constructor + for a in attributes: + setattr(self, a, copy.deepcopy(getattr(other, a))) + + @classmethod + def from_skeleton_factory(cls, skeleton, head=None, tail=None): + """ + A factory method taking the simplest possible input: just a skeleton. + Assumes 0th point is head, n-1th point is tail. No contour. + + Parameters + ---------- + skeleton : list of ndarray or ndarray + If ndarray, we are in the simpler "homocardinal" case + If list of ndarray, each frame can have a varying number of points + head: ndarray containing the position of the head. + tail: ndarray containing the position of the tail. + + """ + uow = cls() + + # if len(np.shape(skeleton)) != 3 or np.shape(skeleton)[1] != 2: + # raise Exception("Provided skeleton must have " + # "shape (n_points,2,n_frames)") + uow.skeleton = skeleton + + if tail is None: + uow.tail = skeleton[0, :, :] + else: + uow.tail = tail + + if head is None: + uow.head = skeleton[-1, :, :] + else: + uow.head = head + + # TODO: First check for ndarray or list, if ndarray use skeleton.shape + # if len(np.shape(skeleton)) != 3 or np.shape(skeleton)[1] != 2: + # raise Exception("Provided skeleton must have " + # "shape (n_points,2,n_frames)") + + # TODO: We need to handle the list case + + return uow + + @classmethod + def from_contour_factory(cls, contour, head=None, tail=None): + pass + + def ordered_ventral_contour(self): + """ + Return the vulva side of the ordered heterocardinal contour. + + i.e. with tail at position -1 and head at position 0. + + """ + # TODO + pass + + def ordered_dorsal_contour(self): + """ + Return the non-vulva side of the ordered heterocardinal contour. + + i.e. with tail at position -1 and head at position 0. + + """ + # TODO + pass + + def ordered_skeleton(self): + """ + Return the ordered skeleton. + + i.e. with tail at position -1 and head at position 0. + + """ + # TODO + pass + +#%% + + +class BasicWorm(JSON_Serializer): + """ + A worm's skeleton and contour, not necessarily "normalized" to 49 points, + and possibly heterocardinal (i.e. possibly with a varying number of + points per frame). + + Attributes + ---------- + h_skeleton : list, where each element is a numpy array of shape (2,k_i) + Each element of the list is a frame. + Where k_i is the number of skeleton points in frame i. + The first axis of the numpy array, having len 2, is the x and y. + Missing frames should be identified by None. + h_ventral_contour: Same type and shape as skeleton (see above) + The vulva side of the contour. + h_dorsal_contour: Same type and shape as skeleton (see above) + The non-vulva side of the contour. + video_info : An instance of the VideoInfo class. + (contains metadata attributes of the worm video) + + """ + + def __init__(self, other=None): + attributes = ['_h_skeleton', '_h_ventral_contour', + '_h_dorsal_contour'] + + if other is None: + for a in attributes: + setattr(self, a, None) + + self.video_info = VideoInfo() + else: + # Copy constructor + for a in attributes: + setattr(self, a, copy.deepcopy(getattr(other, a))) + + @classmethod + def from_schafer_file_factory(cls, data_file_path): + bw = cls() + + with tables.File(data_file_path, 'r') as h: + # These are all HDF5 'references' + all_ventral_contours_refs = h.get_node('all_vulva_contours')[:] + all_dorsal_contours_refs = h.get_node('all_non_vulva_contours')[:] + all_skeletons_refs = h.get_node('all_skeletons')[:] + + is_stage_movement = utils._extract_time_from_disk( + h, 'is_stage_movement') + is_valid = utils._extract_time_from_disk(h, 'is_valid') + + all_skeletons = [] + all_ventral_contours = [] + dorsal_contour = [] + + for valid_frame, iFrame in zip(is_valid, range(is_valid.size)): + if valid_frame: + all_skeletons.append( + h[all_skeletons_refs[iFrame][0]].value) + all_ventral_contours.append( + h[all_ventral_contours_refs[iFrame][0]].value) + dorsal_contour.append( + h[all_dorsal_contours_refs[iFrame][0]].value) + else: + all_skeletons.append(None) + all_ventral_contours.append(None) + dorsal_contour.append(None) + + # Video Metadata + is_stage_movement = is_stage_movement.astype(bool) + is_valid = is_valid.astype(bool) + + # A kludge, we drop frames in is_stage_movement that are in excess + # of the number of frames in the video. It's unclear why + # is_stage_movement would be longer by 1, which it was in our + # canonical example. + is_stage_movement = is_stage_movement[0:len(all_skeletons)] + + # 5. Derive frame_code from the two pieces of data we have, + # is_valid and is_stage_movement. + bw.video_info.frame_code = (1 * is_valid + + 2 * is_stage_movement + + 100 * ~(is_valid | is_stage_movement)) + + # We purposely ignore the saved skeleton information contained + # in the BasicWorm, preferring to derive it ourselves. + bw.__remove_precalculated_skeleton() + #bw.h_skeleton = all_skeletons + + bw._h_ventral_contour = all_ventral_contours + bw._h_dorsal_contour = dorsal_contour + + return bw + + @classmethod + def from_contour_factory(cls, ventral_contour, dorsal_contour): + """ + Return a BasicWorm from a normalized ventral_contour and dorsal_contour + + Parameters + --------------- + ventral_contour: numpy array of shape (49,2,n) + dorsal_contour: numpy array of shape (49,2,n) + + Returns + ---------------- + BasicWorm object + + """ + + + if not isinstance(ventral_contour, (list,tuple)): + # we need to change the data from a (49,2,n) array to a list of (2,49) + assert(np.shape(ventral_contour) == np.shape(dorsal_contour)) + assert ventral_contour.shape[1] == 2 + h_ventral_contour = WormParsing._h_array2list(ventral_contour) + h_dorsal_contour = WormParsing._h_array2list(dorsal_contour) + else: + h_ventral_contour = ventral_contour + h_dorsal_contour = dorsal_contour + + # Here I am checking that the contour missing frames are aligned. + # I prefer to populate the frame_code in normalized worm. + assert all( v == d for v,d in zip(h_ventral_contour, h_dorsal_contour) if v is None or d is None) + + # Having converted our normalized contour to a heterocardinal-type + # contour that just "happens" to have all its frames with the same + # number of skeleton points, we can just call another factory method + # and we are done: + + bw = cls() + bw.h_ventral_contour = h_ventral_contour + bw.h_dorsal_contour = h_dorsal_contour + + return bw + + @classmethod + def from_skeleton_factory(cls, skeleton, extrapolate_contour=False): + if not extrapolate_contour: + ''' + Construct the object using only the skeletons without contours. + This is a better default because the contour interpolation will produce a fake contour. + ''' + bw = cls() + + #other option will be to give a list of None, but this make more obvious when there is a mistake + bw.h_ventral_contour = None + bw.h_dorsal_contour = None + if isinstance(skeleton, (list,tuple)): + bw._h_skeleton = skeleton + else: + assert skeleton.shape[1] == 2 + bw._h_skeleton = WormParsing._h_array2list(skeleton) + return bw + + else: + + """ + Derives a contour from the skeleton + THIS PART IS BUGGY, THE INTERPOLATION WORKS ONLY IN A LIMITED NUMBER OF CASES + TODO: right now the method creates the bulge entirely in the y-axis, + across the x-axis. Instead the bulge should be rotated to + apply across the head-tail orientation. + + TODO: the bulge should be more naturalistic than the simple sine wave + currently used. + + + """ + # Make ventral_contour != dorsal_contour by making them "bulge" + # in the middle, in a basic simulation of what a real worm looks like + bulge_x = np.zeros((config.N_POINTS_NORMALIZED)) + # Create the "bulge" + x = np.linspace(0, 1, config.N_POINTS_NORMALIZED) + bulge_y = np.sin(x * np.pi) * 50 + + # Shape is (49,2,1): + bulge_frame1 = np.rollaxis(np.dstack([bulge_x, bulge_y]), + axis=0, start=3) + # Repeat the bulge across all frames: + num_frames = skeleton.shape[2] + bulge_frames = np.repeat(bulge_frame1, num_frames, axis=2) + + # Apply the bulge above and below the skeleton + ventral_contour = skeleton + bulge_frames + dorsal_contour = skeleton - bulge_frames + + # Now we are reduced to the contour factory case: + return BasicWorm.from_contour_factory(ventral_contour, dorsal_contour) + + + @property + def h_ventral_contour(self): + return self._h_ventral_contour + + @h_ventral_contour.setter + def h_ventral_contour(self, x): + self._h_ventral_contour = x + self.__remove_precalculated_skeleton() + + @property + def h_dorsal_contour(self): + return self._h_dorsal_contour + + @h_dorsal_contour.setter + def h_dorsal_contour(self, x): + self._h_dorsal_contour = x + self.__remove_precalculated_skeleton() + + def __remove_precalculated_skeleton(self): + """ + Removes the precalculated self._h_skeleton, if it exists. + + This is typically called if we've potentially changed something, + i.e. if we've loaded new values for self.h_ventral_contour or + self.h_non_vulva contour. + + In these cases we must be sure to delete h_skeleton, since it is + derived from ventral_contour and dorsal_contour. + + It will be recalculated if it's ever asked for. + + """ + try: + del(self._h_skeleton) + except AttributeError: + pass + + @property + def h_skeleton(self): + """ + If self._h_skeleton has been defined, then return it. + + Otherwise, try to extrapolate it from the contour. + + Note: This method does not have an obvious use case. The normal + pipeline is to call NormalizedWorm.from_BasicWorm_factory, which will + calculate a skeleton. + + """ + try: + return self._h_skeleton + except AttributeError: + # Extrapolate skeleton from contour + # TODO: improve this: for now + self._h_widths, self._h_skeleton = \ + WormParsing.compute_skeleton_and_widths(self.h_ventral_contour, self.h_dorsal_contour) + #how can i call _h_widths??? + + return self._h_skeleton + + def plot_frame(self, frame_index): + """ + Plot the contour and skeleton the worm for one of the frames. + + Parameters + ---------------- + frame_index: int + The desired frame # to plot. + + """ + vc = self.h_ventral_contour[frame_index] + dc = self.h_dorsal_contour[frame_index] + s = self.h_skeleton[frame_index] + + plt.scatter(vc[0, :], vc[1, :]) + plt.scatter(dc[0, :], dc[1, :]) + plt.scatter(s[0, :], s[1, :]) + plt.gca().set_aspect('equal', adjustable='box') + plt.show() + + def validate(self): + """ + Validate that self is a well-defined BasicWorm instance. + + """ + assert(len(self.h_ventral_contour) == len(self.h_dorsal_contour)) + + def __repr__(self): + return utils.print_object(self) + + def __eq__(self, other): + """ + Compare this BasicWorm against another. + + """ + attribute_list = ['h_ventral_contour', 'h_dorsal_contour', + 'h_skeleton', 'video_info'] + + return utils.compare_attributes(self, other, attribute_list) + + +#%% +class WormPartition(): + + def __init__(self): + # These are RANGE values, so the last value is not inclusive + self.worm_partitions = {'head': (0, 8), + 'neck': (8, 16), + 'midbody': (16, 33), + 'old_midbody_velocity': (20, 29), + 'hips': (33, 41), + 'tail': (41, 49), + # refinements of ['head'] + 'head_tip': (0, 3), + 'head_base': (5, 8), # "" + # refinements of ['tail'] + 'tail_base': (41, 44), + 'tail_tip': (46, 49), # "" + 'all': (0, 49), + # neck, midbody, and hips + 'body': (8, 41)} + + self.worm_partition_subsets = { + 'normal': ( + 'head', 'neck', 'midbody', 'hips', 'tail'), 'first_third': ( + 'head', 'neck'), 'second_third': ( + 'midbody',), 'last_third': ( + 'hips', 'tail'), 'all': ( + 'all',)} + + def get_partition_subset(self, partition_type): + """ + There are various ways of partitioning the worm's 49 points. + this method returns a subset of the worm partition dictionary + + TODO: This method still is not obvious to me. Also, we should move + these things to a separate class. + + Parameters + --------------------------------------- + partition_type: string + e.g. 'head' + + Usage + --------------------------------------- + For example, to see the mean of the head and the mean of the neck, + use the partition subset, 'first_third', like this: + + nw = NormalizedWorm(....) + + width_dict = {k: np.mean(nw.get_partition(k), 0) for k in ('head', 'neck')} + + OR, using self.worm_partition_subsets, + + s = nw.get_paritition_subset('first_third') + # i.e. s = {'head':(0,8), 'neck':(8,16)} + + width_dict = {k: np.mean(nw.get_partition(k), 0) for k in s.keys()} + + Notes + --------------------------------------- + Translated from get.ALL_NORMAL_INDICES in SegwormMatlabClasses / + +seg_worm / @skeleton_indices / skeleton_indices.m + + """ + + # parition_type is assumed to be a key for the dictionary + # worm_partition_subsets + p = self.worm_partition_subsets[partition_type] + + # Return only the subset of partitions contained in the particular + # subset of interest, p. + return {k: self.worm_partitions[k] for k in p} + + def get_subset_partition_mask(self, name): + """ + Returns a boolean mask - for working with arrays given a partition. + + """ + keys = self.worm_partition_subsets[name] + mask = np.zeros(49, dtype=bool) + for key in keys: + mask = mask | self.partition_mask(key) + + return mask + + def partition_mask(self, partition_key): + """ + Returns a boolean numpy array corresponding to the partition requested. + + """ + mask = np.zeros(49, dtype=bool) + slice_val = self.worm_partitions[partition_key] + mask[slice(*slice_val)] = True + return mask + + def get_partition(self, partition_key, data_key='skeletons', + split_spatial_dimensions=False): + """ + Retrieve partition of a measurement of the worm, that is, across all + available frames but across only a subset of the 49 points. + + Parameters + --------------------------------------- + partition_key: string + The desired partition. e.g. 'head', 'tail', etc. + + #TODO: This should be documented better + + INPUT: a partition key, and an optional data key. + If split_spatial_dimensions is True, the partition is returned + separated into x and y + OUTPUT: a numpy array containing the data requested, cropped to just + the partition requested. + (so the shape might be, say, 4xn if data is 'angles') + + data_key: string (optional) + The desired measurement (default is 'skeletons') + + split_spatial_dimensions: bool (optional) + If True, the partition is returned separated into x and y + + Returns + --------------------------------------- + A numpy array containing the data requested, cropped to just + the partition requested. + (so the shape might be, say, 4xn if data is 'angles') + + Notes + --------------------------------------- + Translated from get.ALL_NORMAL_INDICES in SegwormMatlabClasses / + +seg_worm / @skeleton_indices / skeleton_indices.m + + """ + # We use numpy.split to split a data_dict element into three, cleaved + # first by the first entry in the duple worm_partitions[partition_key], + # and second by the second entry in that duple. + + # Taking the second element of the resulting list of arrays, i.e. [1], + # gives the partitioned component we were looking for. + part = self.worm_partitions[partition_key] + + worm_attribute_values = getattr(self, data_key) + if(len(worm_attribute_values) != 0): + # Let's suppress the warning about zero arrays being reshaped + # since that's irrelevant since we are only looking at the + # non-zero array in the middle i.e. the 2nd element i.e. [1] + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=FutureWarning) + partition = np.split(worm_attribute_values, + part)[1] + if(split_spatial_dimensions): + return partition[:, 0, :], partition[:, 1, :] + else: + return partition + else: + return None + + +#%% + +def isnamedtuple(obj): + """ + Heuristic check if an object is a namedtuple. + + """ + return isinstance(obj, tuple) \ + and hasattr(obj, "_fields") \ + and hasattr(obj, "_asdict") \ + and callable(obj._asdict) + + +def serialize(data): + """ + """ + + if data is None or isinstance(data, (bool, int, float, str)): + return data + if isinstance(data, list): + return [serialize(val) for val in data] + if isinstance(data, OrderedDict): + return {"py/collections.OrderedDict": + [[serialize(k), serialize(v)] for k, v in data.items()]} + if isnamedtuple(data): + return {"py/collections.namedtuple": { + "type": type(data).__name__, + "fields": list(data._fields), + "values": [serialize(getattr(data, f)) for f in data._fields]}} + if isinstance(data, dict): + if all(isinstance(k, str) for k in data): + return {k: serialize(v) for k, v in data.items()} + return {"py/dict": [[serialize(k), serialize(v)] + for k, v in data.items()]} + if isinstance(data, tuple): + return {"py/tuple": [serialize(val) for val in data]} + if isinstance(data, set): + return {"py/set": [serialize(val) for val in data]} + if isinstance(data, np.ndarray): + return {"py/numpy.ndarray": { + "values": data.tolist(), + "dtype": str(data.dtype)}} + raise TypeError("Type %s not data-serializable" % type(data)) + + +def restore(dct): + """ + """ + + if "py/dict" in dct: + return dict(dct["py/dict"]) + if "py/tuple" in dct: + return tuple(dct["py/tuple"]) + if "py/set" in dct: + return set(dct["py/set"]) + if "py/collections.namedtuple" in dct: + data = dct["py/collections.namedtuple"] + return namedtuple(data["type"], data["fields"])(*data["values"]) + if "py/numpy.ndarray" in dct: + data = dct["py/numpy.ndarray"] + return np.array(data["values"], dtype=data["dtype"]) + if "py/collections.OrderedDict" in dct: + return OrderedDict(dct["py/collections.OrderedDict"]) + return dct + + +def data_to_json(data): + """ + """ + + return json.dumps(serialize(data)) + + +def json_to_data(s): + """ + """ + + return json.loads(s, object_hook=restore) + + +def nested_equal(v1, v2): + """ + Compares two complex data structures. + + This handles the case where numpy arrays are leaf nodes. + + """ + if isinstance(v1, str) or isinstance(v2, str): + return v1 == v2 + if isinstance(v1, np.ndarray) or isinstance(v2, np.ndarray): + return np.array_equal(v1, v2) + if isinstance(v1, dict) and isinstance(v2, dict): + return nested_equal(v1.items(), v2.items()) + if isinstance(v1, Iterable) and isinstance(v2, Iterable): + return all(nested_equal(sub1, sub2) for sub1, sub2 in zip(v1, v2)) + return v1 == v2 diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/normalized_worm.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/normalized_worm.py new file mode 100755 index 00000000..e0d42c37 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/normalized_worm.py @@ -0,0 +1,785 @@ +# -*- coding: utf-8 -*- +""" +This module defines the NormalizedWorm class + +""" + +import numpy as np +import scipy.io + +import copy +import warnings +import os +import matplotlib.pyplot as plt + +from .. import config, utils +from .basic_worm import WormPartition +from .basic_worm import BasicWorm +from .pre_features import WormParsing +from .pre_features_helpers import WormParserHelpers +from .video_info import VideoInfo + + +class NormalizedWorm(WormPartition): + """ + Encapsulates the notion of a worm's elementary measurements, scaled + (i.e. "normalized") to 49 points along the length of the worm. + + The data consists of 7 Numpy arrays (where n is the number of frames): + - Of shape (49,2,n): + ventral_contour + dorsal_contour + skeleton + - Of shape (49,n): + angles + widths + - Of shape (n): + length + area + + It also contains video metadata, in : + video_info : An instance of VideoInfo + + """ + + def __init__(self, other=None): + """ + Populates an empty normalized worm. + If other is specified, this becomes a copy constructor. + + """ + WormPartition.__init__(self) + + if other is None: + # By default, leave all variables uninitialized + # Initialize an empty VideoInfo instance, though: + self.video_info = VideoInfo() + else: + # Copy constructor + attributes = ['skeleton', 'ventral_contour', 'dorsal_contour', + 'angles', 'widths', 'length', 'area' + 'video_info'] + for a in attributes: + setattr(self, a, copy.deepcopy(getattr(other, a))) + + @classmethod + def from_BasicWorm_factory(cls, basic_worm, frames_to_plot_widths=[]): + """ + Factory classmethod for creating a normalized worm with a basic_worm + as input. This requires calculating all the "pre-features" of + the worm. + + Parameters + ----------- + basic_worm: Instance of BasicWorm. Contains either: + h_skeleton AND/OR + h_ventral_contour and h_dorsal_contour + frames_to_plot_widths: list of ints + Optional list of frames to plot, to show exactly how the + widths and skeleton were calculated. + + Returns + ----------- + An instance of NormalizedWorm + + # TODO: Need to add on testing for normalized data as an input + + """ + nw = cls() + bw = basic_worm + nw.video_info = bw.video_info + + if bw.h_ventral_contour is not None: + # 1. Derive skeleton and widths from contour + nw.widths, h_skeleton = \ + WormParsing.compute_skeleton_and_widths(bw.h_ventral_contour, + bw.h_dorsal_contour, + frames_to_plot_widths) + # 3. Normalize the skeleton, widths and contour to 49 points + # per frame + nw.skeleton = WormParserHelpers.\ + normalize_all_frames_xy(h_skeleton, + config.N_POINTS_NORMALIZED) + + nw.widths = WormParserHelpers.\ + normalize_all_frames(nw.widths, h_skeleton, + config.N_POINTS_NORMALIZED) + + nw.ventral_contour = WormParserHelpers.\ + normalize_all_frames_xy(bw.h_ventral_contour, + config.N_POINTS_NORMALIZED) + + nw.dorsal_contour = WormParserHelpers.\ + normalize_all_frames_xy(bw.h_dorsal_contour, + config.N_POINTS_NORMALIZED) + else: + # With no contour, let's assume we have a skeleton. + # Measurements that cannot be calculated (e.g. areas) are simply + # marked None. + nw.skeleton = WormParserHelpers.\ + normalize_all_frames_xy(bw.h_skeleton, + config.N_POINTS_NORMALIZED) + nw.ventral_contour = None + nw.dorsal_contour = None + + + # Give frame code 1 if the frame is good and the general error + # code 100 if the frame is bad. + nan_mask = np.all(np.isnan(nw.skeleton), axis=(0,1)) + nw.video_info.frame_code = 1 * ~nan_mask + 100 * nan_mask + + return nw + + @classmethod + def from_normalized_array_factory(cls, skeleton, widths, ventral_contour, dorsal_contour): + ''' + I want to be able to construct a normalized worm by giving previously normalized data as input. + Probably some inputs could be made optional, but in that case it is better to use BasicWorm + ''' + #check the dimensions are correct + dat = (skeleton, ventral_contour, dorsal_contour, widths) #pack to make calculations easier + tot_frames = skeleton.shape[-1] + assert all(x.shape[0] == config.N_POINTS_NORMALIZED for x in dat) + assert all(x.shape[-1] == tot_frames for x in dat) + assert all(x.shape[1] == 2 for x in (skeleton, ventral_contour, dorsal_contour)) + + nw = cls() + nw.ventral_contour = ventral_contour + nw.dorsal_contour = dorsal_contour + nw.skeleton = skeleton + nw.widths = widths + + # Give frame code 1 if the frame is good and the general error + # code 100 if the frame is bad. + nan_mask = np.all(np.isnan(nw.skeleton), axis=(0,1)) + nw.video_info.frame_code = 1 * ~nan_mask + 100 * nan_mask + + return nw + + + + + @property + def length(self): + try: + return self._length + except: + self._length = WormParsing.compute_skeleton_length(self.skeleton) + return self._length + + + @property + def area(self): + try: + return self._area + except: + if self.signed_area is not None: + self._area = np.abs(self.signed_area) + else: + self._area = None + return self._area + + @property + def signed_area(self): + try: + return self._signed_area + except: + if self.ventral_contour is not None: + self._signed_area = WormParsing.compute_signed_area(self.contour) + else: + self._x = None + return self._signed_area + + + + + @property + def angles(self): + try: + return self._angles + except: + #I could use the unnormalized skeleton (more points), but it shouldn't make much a huge difference. + self._angles = WormParsing.compute_angles(self.skeleton) + + if self.video_info.ventral_mode == 2: + #switch in the angle sign in case of the contour orientation is anticlockwise + self._angles = -self._angles + + return self._angles + + #A second option would be to use the contour orientation to find the correct side + # if self.signed_area is not None: + # #I want to use the signed area to determine the contour orientation. + # #first assert all the contours have the same orientation. This might be a problem + # #if the worm does change ventral/dorsal orientation, but for the moment let's make it a requirement. + # valid = self.signed_area[~np.isnan(self.signed_area)] + # assert np.all(valid>=0) if valid[0]>=0 else np.all(valid<=0) + + # #if the orientation is anticlockwise (negative signed area) change the sign of the angles + # if valid[0] < 0: + # self._angles *= -1 + + + + + + + @classmethod + def from_schafer_file_factory(cls, data_file_path): + """ + Load full Normalized Worm data from the Schafer File + + data_file_path: the path to the MATLAB file + + These files were created at the Schafer Lab in a format used + prior to MATLAB's switch to HDF5, which they did in + MATLAB version 7.3. + + """ + nw = cls() + nw.video_info = VideoInfo() + + if(not os.path.isfile(data_file_path)): + raise Exception("Data file not found: " + data_file_path) + else: + data_file = scipy.io.loadmat(data_file_path, + # squeeze unit matrix dimensions: + squeeze_me=True, + # force return numpy object + # array: + struct_as_record=False) + + # All the action is in data_file['s'], which is a numpy.ndarray + # where data_file['s'].dtype is an array showing how the data is + # structured. It is structured in precisely the order specified + # in data_keys below: + + staging_data = data_file['s'] + + # NOTE: These are aligned to the order in the files. + # these will be the keys of the dictionary data_dict + data_keys = [ + # this just contains a string for where to find the + # eigenworm file. we do not use this, however, since + # the eigenworm postures are universal to all worm files, + # so the file is just stored in the /features directory + # of the source code, and is loaded at the features + # calculation step + 'EIGENWORM_PATH', + # We can't load this one since we have an @property method + # whose name clashes with it, derived from frame_codes + #'segmentation_status', + # Each code corresponds to a success / failure mode of the + # computer vision algorithm. + 'frame_codes', # shape is (n) integer + 'vulva_contours', # shape is (49, 2, n) integer + 'non_vulva_contours', # shape is (49, 2, n) integer + 'skeletons', # shape is (49, 2, n) integer + 'angles', # shape is (49, n) integer (degrees) + 'in_out_touches', # shape is (49, n) + 'lengths', # shape is (n) integer + 'widths', # shape is (49, n) integer + 'head_areas', # shape is (n) integer + 'tail_areas', # shape is (n) integer + 'vulva_areas', # shape is (n) integer + 'non_vulva_areas', # shape is (n) integer + 'x', # shape is (49, n) integer + 'y'] # shape is (49, n) integer + + # Here I use powerful python syntax to reference data elements of + # s dynamically through built-in method getattr + # that is, getattr(s, x) works syntactically just like s.x, + # only x is a variable, so we can do a list comprehension with it! + for key in data_keys: + # Some pre-features are dynamically calculated so their actual + # data is stored with a '_' prefix. + if key == 'angles': + out_key = '_' + key + else: + out_key = key + setattr(nw, out_key, getattr(staging_data, key)) + + # We don't need the eigenworm path here, as it's the same + # for all worm files. + del(nw.EIGENWORM_PATH) + # x and y are redundant since that information is already + # in "skeletons" + del(nw.x) + del(nw.y) + # in_out_touches: I can find no reference to them in Ev's thesis, + # nor does any of the feature calculation code depend on them. + # the only reason we have them at all is because the one example + # file we've been using has in_out_touches as an array. + # the shape is (49, 4642) in that file, and ALL entries are NaN. + # Thus for all the above reasons I'm going to ignore it. + del(nw.in_out_touches) + + # Now for something pedantic: only use plural nouns for + # those measurements taken along multiple points per frame + # for those with just one data point per frame, it should be + # singular. + # i.e. plural for numpy arrays of shape (49, n) + # singular for numpy arrays of shape (n) + # and singular for numpy arrays of shape (49, 2, n) + # (where n is the number of frames) + + nw.skeleton = nw.skeletons + nw.ventral_contour = nw.vulva_contours + nw.dorsal_contour = nw.non_vulva_contours + del(nw.skeletons) + del(nw.vulva_contours) + del(nw.non_vulva_contours) + nw.head_area = nw.head_areas + nw.tail_area = nw.tail_areas + nw.vulva_area = nw.vulva_areas + nw.non_vulva_area = nw.non_vulva_areas + del(nw.head_areas) + del(nw.tail_areas) + del(nw.vulva_areas) + del(nw.non_vulva_areas) + + # Frame codes should be stored in the VideoInfo object + nw.video_info.frame_code = nw.frame_codes + del(nw.frame_codes) + + # Somehow those four areas apparently add up to the total area + # of the worm. No feature requires knowing anything more + # than just the total area of the worm, so for NormalizedWorm + # we just store one variable, area, for the whole worm's area. + + nw._area = nw.head_area + nw.tail_area + \ + nw.vulva_area + nw.non_vulva_area + del(nw.head_area) + del(nw.tail_area) + del(nw.vulva_area) + del(nw.non_vulva_area) + + + nw._length = nw.lengths + del(nw.lengths) + + return nw + + def get_BasicWorm(self): + """ + Return an instance of NormalizedSkeletonAndContour containing this + instance of NormalizedWorm's basic data. + + There is no purpose for this within the standard pipeline - going + back to a BasicWorm from a NormalizedWorm would only be done + for verification of code integrity purposes. + + Note that we can't "de-normalize" the worm so if the original + BasicWorm from which this NormalizedWorm was derived was properly + heterocardinal, that information is lost. All frames in our + generated BasicWorm here will have 49 points and thus will remain + normalized. + + """ + bw = BasicWorm.from_contour_factory(self.ventral_contour, + self.dorsal_contour) + + bw.video_info = self.video_info + + return bw + + + def validate(self): + """ + Checks array lengths, etc. to ensure that this is a valid + instance and no further problems will arise if further + processing is attempted on this instance + + """ + # TODO + return True + + def rotated(self, theta_d): + """ + Returns a NormalizedWorm instance with each frame rotated by + the amount given in the per-frame theta_d array. + + Parameters + --------------------------------------- + theta_d: 1-dimensional ndarray of dtype=float + The frame-by-frame rotation angle in degrees. + A 1-dimensional n-element array where n is the number of + frames, giving a rotation angle for each frame. + + Returns + --------------------------------------- + A new NormalizedWorm instance with the same worm, rotated + in each frame by the requested amount. + + """ + #theta_r = theta_d * (np.pi / 180) + + #%Unrotate worm + #%----------------------------------------------------------------- + # wwx = bsxfun(@times,sx,cos(theta_r)) + \ + # bsxfun(@times,sy,sin(theta_r)); + # wwy = bsxfun(@times,sx,-sin(theta_r)) + + # bsxfun(@times,sy,cos(theta_r)); + + # TODO + return self + + @property + def centre(self): + """ + Frame-by-frame mean of the skeleton points + + Returns + --------------------------------------- + A numpy array of length n, where n is the number of + frames, giving for each frame the mean of the skeleton points. + + """ + try: + return self._centre + except AttributeError: + # We do this to avoid a RuntimeWarning taking the nanmean of + # frames with nothing BUT nan entries: for those frames nanmean + # returns nan (correctly) but still raises a RuntimeWarning. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + self._centre = np.nanmean(self.skeleton, 0, keepdims=False) + + return self._centre + + @property + def angle(self): + """ + Frame-by-frame mean of the skeleton points + + Returns + --------------------------------------- + A numpy array of length n, giving for each frame + the angle formed by the first and last skeleton point. + + """ + try: + return self._angle + except AttributeError: + s = self.skeleton + # obtain vector between first and last skeleton point + v = s[-1, :, :] - s[0, :, :] + # find the angle of this vector + self._angle = np.arctan2(v[1, :], v[0, :]) * (180 / np.pi) + + return self._angle + + @property + def dropped_frames_mask(self): + """ + Which frames are "dropped", i.e. which frames have the first + skeleton X-coordinate set to NaN. + + Returns + ------------------ + boolean numpy array of shape (n,) where n is the number of frames + True if frame is dropped in the skeleton or contour + + Note + ------------------ + We are assuming that self.validate() == True, i.e. that the + skeleton and contour are NaN along all points in frames where + ANY of the points are NaN. + + """ + return np.isnan(self.skeleton[0, 0, :]) + + @property + def centred_skeleton(self): + """ + Return a skeleton numpy array with each frame moved so the + centroid of the worm is 0,0 + + Returns + --------------------------------------- + A numpy array with the above properties. + + """ + try: + return self._centred_skeleton + except AttributeError: + s = self.skeleton + + if s.size != 0: + s_mean = np.ones(s.shape) * self.centre + self._centred_skeleton = s - s_mean + else: + self._centred_skeleton = s + + return self._centred_skeleton + + @property + def orientation_free_skeleton(self): + """ + Perform both a rotation and a translation of the skeleton + + Returns + --------------------------------------- + A numpy array, which is the centred and rotated normalized + worm skeleton. + + Notes + --------------------------------------- + To perform this matrix multiplication we are multiplying: + rot_matrix * s + This is shape 2 x 2 x n, times 2 x 49 x n. + Basically we want the first matrix treated as two-dimensional, + and the second matrix treated as one-dimensional, + with the results applied elementwise in the other dimensions. + + To make this work I believe we need to pre-broadcast rot_matrix + into the skeleton points dimension (the one with 49 points) so + that we have + 2 x 2 x 49 x n, times 2 x 49 x n + #s1 = np.rollaxis(self.skeleton, 1) + + #rot_matrix = np.ones(s1.shape) * rot_matrix + + #self.skeleton_rotated = rot_matrix.dot(self.skeleton) + + """ + try: + return self._orientation_free_skeleton + except AttributeError: + orientation = self.angle + + # Flip and convert to radians + a = -orientation * (np.pi / 180) + + rot_matrix = np.array([[np.cos(a), -np.sin(a)], + [np.sin(a), np.cos(a)]]) + + # We need the x,y listed in the first dimension + s1 = np.rollaxis(self.centred_skeleton, 1) + + # For example, here is the first point of the first frame rotated: + # rot_matrix[:,:,0].dot(s1[:,0,0]) + + # ATTEMPTING TO CHANGE rot_matrix from 2x2x49xn to 2x49xn + # rot_matrix2 = np.ones((2, 2, s1.shape[1], + # s1.shape[2])) * rot_matrix + + s1_rotated = [] + + # Rotate the worm frame-by-frame and add these skeletons to a list + for frame_index in range(self.num_frames): + s1_rotated.append(rot_matrix[:, :, frame_index].dot + (s1[:, :, frame_index])) + # print(np.shape(np.rollaxis(rot_matrix[:,:,0].dot(s1[:,:,0]),0))) + + # Save the list as a numpy array + s1_rotated = np.array(s1_rotated) + + # Fix the axis settings + self._orientation_free_skeleton = \ + np.rollaxis(np.rollaxis(s1_rotated, 0, 3), 1) + + return self._orientation_free_skeleton + + @property + def num_frames(self): + """ + The number of frames in the video. + + Returns + --------------------------------------- + int + number of frames in the video + + """ + try: + return self._num_frames + except AttributeError: + self._num_frames = self.skeleton.shape[2] + + return self._num_frames + + def position_limits(self, dimension, measurement='skeleton'): + """ + Maximum extent of worm's travels projected onto a given axis + + Parameters + --------------------------------------- + dimension: specify 0 for X axis, or 1 for Y axis. + + Notes + --------------------------------------- + Dropped frames show up as NaN. + nanmin returns the min ignoring such NaNs. + + """ + d = getattr(self, measurement) + if(len(d.shape) < 3): + raise Exception("Position Limits Is Only Implemented for 2D data") + return (np.nanmin(d[:, dimension, :]), + np.nanmax(d[:, dimension, :])) + + @property + def contour(self): + return self.get_contour(keep_redundant_points=True) + + @property + def contour_without_redundant_points(self): + return self.get_contour(keep_redundant_points=False) + + def get_contour(self, keep_redundant_points=True): + """ + The contour of the worm as one 96-point or 98-point polygon. + + That is: + + Go from ventral_contour shape (49,2,n) and + dorsal_contour shape (49,2,n) to + contour with shape (96,2,n) or (98,2,n) + + Why 96 instead of 49x2 = 98? + Because the first and last points are duplicates, so if + keep_redundant_points=False, we omit those on the second set. + + In either case we reverse the contour so that + it encompasses an "out and back" contour. + + """ + if keep_redundant_points: + return np.concatenate((self.ventral_contour, + self.dorsal_contour[::-1, :, :])) + else: + return np.concatenate((self.ventral_contour, + self.dorsal_contour[-2:0:-1, :, :])) + + @property + def contour_x(self): + # Note that this includes 2 redundant points. + return self.contour[:, 0, :] + + @property + def contour_y(self): + # Note that this includes 2 redundant points. + return self.contour[:, 1, :] + + @property + def skeleton_x(self): + return self.skeleton[:, 0, :] + + @property + def skeleton_y(self): + return self.skeleton[:, 1, :] + + @property + def ventral_contour_x(self): + return self.ventral_contour[:, 0, :] + + @property + def ventral_contour_y(self): + return self.ventral_contour[:, 1, :] + + @property + def dorsal_contour_x(self): + return self.dorsal_contour[:, 0, :] + + @property + def dorsal_contour_y(self): + return self.dorsal_contour[:, 1, :] + + def __eq__(self, other): + """ + Compare this Normalized worm against another. + + TODO: Idea from @JimHokanson: + Do this on a frame by frame basis, do some sort of distance + computation rather than all together. This might hide bad frames + i.e. besides using correlation for comparison, a normalized distance + comparison that could catch extreme outliers would also be useful + + """ + attribute_list = ['skeleton_x', 'skeleton_y', + 'ventral_contour_x', 'ventral_contour_y', + 'dorsal_contour_x', 'dorsal_contour_y', + 'angles', 'widths', 'length', 'area'] + + return utils.compare_attributes(self, other, attribute_list, + high_corr_value=0.94, + merge_nans_list=['angles']) + + def __repr__(self): + # TODO: This omits the properties above ... + return utils.print_object(self) + + def plot_path(self, posture_index): + """ + Plot the path of the contour, skeleton and widths + + Parameters + ---------------- + posture_index: int + The desired posture point (along skeleton and contour) to plot. + + """ + vc = self.ventral_contour[posture_index, :, :] + nvc = self.dorsal_contour[posture_index, :, :] + skeleton_x = self.skeleton[posture_index, 0, :] + skeleton_y = self.skeleton[posture_index, 1, :] + + plt.scatter(vc[0, :], vc[1, :]) + plt.scatter(nvc[0, :], nvc[1, :]) + plt.scatter(skeleton_x, skeleton_y) + plt.gca().set_aspect('equal', adjustable='box') + plt.show() + + def plot_posture(self, frame_index): + """ + Show a scatterplot of the contour, skeleton and widths of frame #frame + + Parameters + ---------------- + frame_index: int + The desired frame # to plot. + + """ + vc = self.ventral_contour[:, :, frame_index] + nvc = self.dorsal_contour[:, :, frame_index] + skeleton = self.skeleton[:, :, frame_index] + + plt.scatter(vc[:, 0], vc[:, 1], c='red') + plt.scatter(nvc[:, 0], nvc[:, 1], c='blue') + plt.scatter(skeleton[:, 0], skeleton[:, 1], c='black') + plt.gca().set_aspect('equal', adjustable='box') + plt.show() + + def plot_contour(self, frame_index): + NormalizedWorm.plot_contour_with_labels( + self.contour[:, :, frame_index]) + + @staticmethod + def plot_contour_with_labels(contour, frame_index=0): + """ + Makes a beautiful plot with all the points labeled. + + Parameters: + One frame's worth of a contour + + """ + contour_x = contour[:, 0, frame_index] + contour_y = contour[:, 1, frame_index] + plt.plot(contour_x, contour_y, 'r', lw=3) + plt.scatter(contour_x, contour_y, s=35) + labels = list(str(l) for l in range(0, len(contour_x))) + for label_index, (label, x, y), in enumerate( + zip(labels, contour_x, contour_y)): + # Orient the label for the first half of the points in one direction + # and the other half in the other + if label_index <= len(contour_x) // 2 - \ + 1: # Minus one since indexing + xytext = (20, -20) # is 0-based + else: + xytext = (-20, 20) + plt.annotate( + label, xy=( + x, y), xytext=xytext, textcoords='offset points', ha='right', va='bottom', bbox=dict( + boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict( + arrowstyle='->', connectionstyle='arc3,rad=0')) # , xytext=(0,0)) diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/pre_features.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/pre_features.py new file mode 100755 index 00000000..b043dd81 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/pre_features.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +""" +Pre-features calculation methods. + +These methods are exclusively called by NormalizedWorm.from_BasicWorm_factory + +The methods of the classes below are all static, so their grouping into +classes is only for convenient grouping and does not have any further +functional meaning. + +Original notes from Schafer Lab paper on this process: +------------------------------------------------------ +"Once the worm has been thresholded, its contour is extracted by tracing the +worm's perimeter. The head and tail are located as sharp, convex angles on +either side of the contour. The skeleton is extracted by tracing the midline of +the contour from head to tail. During this process, widths and angles are +measured at each skeleton point to be used later for feature computation. At +each skeleton point, the width is measured as the distance between opposing +contour points that determine the skeleton midline. Similarly, each skeleton +point serves as a vertex to a bend and is assigned the supplementary angle to +this bend. The supplementary angle can also be expressed as the difference in +tangent angles at the skeleton point. This angle provides an intuitive +measurement. Straight, unbent worms have an angle of 0 degrees. Right angles +are 90 degrees. And the largest angle theoretically possible, a worm bending +back on itself, would measure 180 degress. The angle is signed to provide the +bend's dorsal-ventral orientation. When the worm has its ventral side internal +to the bend (i.e., the vectors forming the angle point towards the ventral +side), the bending angle is signed negatively. + +Pixel count is a poor measure of skeleton and contour lengths. For this reason, +we use chain-code lengths (Freeman 1961). Each laterally-connected pixel is +counted as 1. Each diagonally-connected pixel is counted as sqrt(2). The +supplementary angle is determined, per skeleton point, using edges 1/12 the +skeleton''s chain-code length, in opposing directions, along the skeleton. When +insufficient skeleton points are present, the angle remains undefined (i.e. the +first and last 1/12 of the skeleton have no bending angle defined). 1/12 of the +skeleton has been shown to effectively measure worm bending in previous +trackers and likely reflects constraints of the bodywall muscles, their +innervation, and cuticular rigidity (Cronin et al. 2005)." + +""" +import warnings +import numpy as np + +from .. import config, utils +from .skeleton_calculator1 import SkeletonCalculatorType1 +from .pre_features_helpers import WormParserHelpers + +#%% + + +class WormParsing(object): + """ + This might eventually move somewhere else, but at least it is + contained within the class. It was originally in the Normalized Worm + code which was making things a bit overwhelming. + + TODO: Self does not refer to WormParsing ... + + """ + + @staticmethod + def compute_skeleton_and_widths(h_ventral_contour, + h_dorsal_contour, + frames_to_plot=[]): + """ + Compute widths and a heterocardinal skeleton from a heterocardinal + contour. + + Parameters + ------------------------- + h_ventral_contour: list of numpy arrays. + Each frame is an entry in the list. + h_dorsal_contour: + frames_to_plot: list of ints + Optional list of frames to plot, to show exactly how the + widths and skeleton were calculated. + + Returns + ------------------------- + (h_widths, h_skeleton): tuple + h_widths : the heterocardinal widths, frame by frame + h_skeleton : the heterocardinal skeleton, frame by frame. + + Notes + -------------------------- + This is just a wrapper method for the real method, contained in + SkeletonCalculatorType1. In the future we might use + alternative algorithms so this may become the place we swap them in. + + """ + (h_widths, h_skeleton) = \ + SkeletonCalculatorType1.compute_skeleton_and_widths( + h_ventral_contour, + h_dorsal_contour, + frames_to_plot=[]) + + return (h_widths, h_skeleton) + #%% + + @staticmethod + def _h_array2list(h_vector): + ''' we need to change the data from a (49,2,n) array to a list of (2,49), + a bit annoying but necessary + ''' + # we need it to be shape (2,49) instead of (49,2) so we transpose + h_list = [h_vector[:,:, ii].T for ii in range(h_vector.shape[2])] + # let's use None instead of a all nan vector to indicate an invalid skeleton + h_list = [None if np.all(np.isnan(x)) else x for x in h_list] + return h_list + + @staticmethod + def compute_angles(h_skeleton): + """ + Calculate the angles + + From the Schafer Lab description: + + "Each normalized skeleton point serves as a vertex to a bend and + is assigned the supplementary angle to this bend. The supplementary + angle can also be expressed as the difference in tangent angles at + the skeleton point. This angle provides an intuitive measurement. + Straight, unbent worms have an angle of 0 degrees. Right angles + are 90 degrees. And the largest angle theoretically possible, a + worm bending back on itself, would measure 180 degress." + + Parameters + ---------------- + h_skeleton: list of length n, of lists of skeleton coordinate points. + The heterocardinal skeleton + + Returns + ---------------- + numpy array of shape (49,n) + An angle for each normalized point in each frame. + + Notes + ---------------- + Original code in: + https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + %2Bseg_worm/%2Bworm/%40skeleton/skeleton.m + https://github.com/JimHokanson/SegwormMatlabClasses/tree/master/ + %2Bseg_worm/%2Bcv/curvature.m + + Note, the above code is written for the non-normalized worm ... + edge_length= total_length/12 + + Importantly, the above approach calculates angles not between + neighboring pairs but over a longer stretch of pairs (pairs that + exceed the edge length). The net effect of this approach is to + smooth the angles + + vertex index - First one where the distance from the tip to this + point is greater than the edge length + + s = norm_data[] + + temp_s = np.full([config.N_POINTS_NORMALIZED,n_frames],np.NaN) + for iFrame in range(n_frames): + temp_ + + TODO: sign these angles using ventral_mode ?? - @MichaelCurrie + + """ + #%% + temp_angle_list = [] # TODO: pre-allocate the space we need + + #i am changing the skeleton to a list, this function can deal with 3D numpy arrays (like in the case of normalized worm) + if not isinstance(h_skeleton, list): + h_skeleton = WormParsing._h_array2list(h_skeleton) + + for frame_index, cur_skeleton in enumerate(h_skeleton): + if cur_skeleton is None: + temp_angle_list.append([]) + else: + assert cur_skeleton.shape[0] == 2 + + sx = cur_skeleton[0, :] + sy = cur_skeleton[1, :] + cur_skeleton2 = np.rollaxis(cur_skeleton, 1) + cc = WormParserHelpers.chain_code_lengths_cum_sum( + cur_skeleton2) + + # This is from the old code + edge_length = cc[-1] / 12 + + # We want all vertices to be defined, and if we look starting + # at the left_I for a vertex, rather than vertex for left and + # right then we could miss all middle points on worms being + # vertices + + left_lengths = cc - edge_length + right_lengths = cc + edge_length + + valid_vertices_I = utils.find((left_lengths > cc[0]) & + (right_lengths < cc[-1])) + + left_lengths = left_lengths[valid_vertices_I] + right_lengths = right_lengths[valid_vertices_I] + + left_x = np.interp(left_lengths, cc, sx) + left_y = np.interp(left_lengths, cc, sy) + + right_x = np.interp(right_lengths, cc, sx) + right_y = np.interp(right_lengths, cc, sy) + + d2_y = sy[valid_vertices_I] - right_y + d2_x = sx[valid_vertices_I] - right_x + d1_y = left_y - sy[valid_vertices_I] + d1_x = left_x - sx[valid_vertices_I] + + frame_angles = np.arctan2(d2_y, d2_x) - np.arctan2(d1_y, d1_x) + + frame_angles[frame_angles > np.pi] -= 2 * np.pi + frame_angles[frame_angles < -np.pi] += 2 * np.pi + + # Convert to degrees + frame_angles *= 180 / np.pi + + all_frame_angles = np.full_like(cc, np.NaN) + all_frame_angles[valid_vertices_I] = frame_angles + + temp_angle_list.append(all_frame_angles) + + + return WormParserHelpers.normalize_all_frames( + temp_angle_list, h_skeleton, config.N_POINTS_NORMALIZED) + #%% + #%% + @staticmethod + def compute_signed_area(contour): + """ + Compute the signed area of the worm, for each frame of video, from a + normalized contour. The area should be negative for an clockwise contour + and positive for a anticlockwise. + + Parameters + ------------------------- + contour: a (98,2,n)-shaped numpy array + The contour points, in order around the worm, with two redundant + points at the head and tail. + + + Returns + ------------------------- + area: float + A numpy array of scalars, giving the area in each + frame of video. so if there are 4000 frames, area would have + shape (4000,) + + """ + # We do this to avoid a RuntimeWarning taking the nanmean of + # frames with nothing BUT nan entries: for those frames nanmean + # returns nan (correctly) but still raises a RuntimeWarning. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + contour_mean = np.nanmean(contour, 0, keepdims=False) + + # Centre the contour about the origin for each frame + # this is technically not necessary but it shrinks the magnitude of + # the coordinates we are about to multiply for a potential speedup + contour -= contour_mean + + # We want a new 3D array, where all the points (i.e. axis 0) + # are shifted forward by one and wrapped. + # That is, for a given frame: + # x' sub 1 = x sub 2, + # x' sub 2 = x sub 3, + # ..., + # x` sub n = x sub 1. (this is the "wrapped" index) + # Same for y. + contour_plus_one = contour.take(range(1, contour.shape[0] + 1), + mode='wrap', axis=0) + + # Now we use the Shoelace formula to calculate the area of a simple + # polygon for each frame. + # Credit to Avelino Javer for suggesting this. + signed_area = np.nansum(contour[:,0,:] * contour_plus_one[:,1,:] - contour[:,1,:] * contour_plus_one[:,0,:],0) / 2 + + # Frames where the contour[:,:,k] is all NaNs will result in a + # signed_area[k] = 0. We must replace these 0s with NaNs. + signed_area[np.flatnonzero(np.isnan(contour[0, 0, :]))] = np.NaN + + return signed_area + + #%% + @staticmethod + def compute_skeleton_length(skeleton): + """ + Computes the length of the skeleton for each frame. + + Computed from the skeleton by converting the chain-code + pixel length to microns. + + Parameters + ---------- + skeleton: numpy array of shape (k,2,n) + The skeleton positions for each frame. + (n is the number of frames, and + k is the number points in each frame) + + Returns + ----------- + length: numpy array of shape (n) + The (chain-code) length of the skeleton for each frame. + + """ + # For each frame, sum the chain code lengths to get the total length + return np.sum(WormParserHelpers.chain_code_lengths(skeleton), + axis=0) diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/pre_features_helpers.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/pre_features_helpers.py new file mode 100755 index 00000000..f3ab91d6 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/pre_features_helpers.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jul 3 17:15:41 2015 + +@author: mcurrie +""" +import numpy as np + + +class WormParserHelpers: + + #%% + @staticmethod + def chain_code_lengths(skeleton): + """ + Computes the chain-code lengths of the skeleton for each frame. + + Computed from the skeleton by converting the chain-code + pixel length to microns. + + These chain-code lengths are based on the Freeman 8-direction + chain codes: + + 3 2 1 + 4 P 0 + 5 6 7 + + Given a sequence of (x,y)-coordinates, we could obtain a sequence of + direction vectors, coded according to the following scheme. + + However, since we just need the lengths, we don't need to actually + calculate all of these codes. Instead we just calculate the + Euclidean 2-norm from pixel i to pixel i+1. + + Note: + Our method is actually different than if we stepped from pixel + to pixel in one-pixel increments, since in our method the distances + can be something other than multiples of the 1- or sqrt(2)- steps + characteristic in Freeman 8-direction chain codes. + + + Parameters + ---------- + skeleton: numpy array of shape (k,2,n) + The skeleton positions for each frame. + (n is the number of frames, and + k is the number points in each frame) + + Returns + ----------- + length: numpy array of shape (n) + The (chain-code) length of the skeleton for each frame. + + """ + # For each frame, for x and y, take the difference between skeleton + # points: (hence axis=0). Resulting shape is (k-1,2,n) + skeleton_diffs = np.diff(skeleton, axis=0) + # Now for each frame, for each (diffx, diffy) pair along the skeleton, + # find the magnitude of this vector. Resulting shape is (k-1,n) + chain_code_lengths = np.linalg.norm(skeleton_diffs, axis=1) + + return chain_code_lengths + + #%% + @staticmethod + def chain_code_lengths_cum_sum(skeleton): + """ + Compute the Freeman 8-direction chain-code length. + + Calculate the distance between a set of points and then calculate + their cumulative distance from the first point. + + The first value returned has a value of 0 by definition. + + Parameters + ---------------- + skeleton: numpy array + Shape should be (k,2), where k is the number of + points per frame + + """ + if np.size(skeleton) == 0: + # Handle empty set - don't add 0 as first element + return np.empty([]) + else: + distances = WormParserHelpers.chain_code_lengths(skeleton) + # Prepend a zero element so that distances' numpy array length + # is the same as skeleton's + distances = np.concatenate([np.array([0.0]), distances]) + + return np.cumsum(distances) + + #%% + @staticmethod + def normalize_all_frames_xy(heterocardinal_property, num_norm_points): + """ + Normalize a "heterocardinal" skeleton or contour into a "homocardinal" + one, where each frame has the same number of points. + + Parameters + -------------- + heterocardinal_property: list of numpy arrays + the outermost dimension, that of the lists, has length n + the numpy arrays are of shape (2,ki) + num_norm_points: int + The number of points to normalize to. + + Returns + -------------- + numpy array of shape (49,2,n) + + """ + n_frames = len(heterocardinal_property) + normalized_data = np.full([num_norm_points, 2, n_frames], + np.NaN) + + for iFrame, cur_frame_value in enumerate(heterocardinal_property): + if cur_frame_value is not None: + # We need cur_frame_value to have shape (k,2), not (2,k) + cur_frame_value2 = np.rollaxis(cur_frame_value, 1) + cc = WormParserHelpers.chain_code_lengths_cum_sum( + cur_frame_value2) + + # Normalize both the x and the y + normalized_data[:, 0, iFrame] = WormParserHelpers.normalize_parameter( + cur_frame_value[0, :], cc, num_norm_points) + normalized_data[:, 1, iFrame] = WormParserHelpers.normalize_parameter( + cur_frame_value[1, :], cc, num_norm_points) + + return normalized_data + + #%% + @staticmethod + def normalize_all_frames(property_to_normalize, xy_data, num_norm_points): + """ + Normalize a property as it articulates along a skeleton. + + Normalize a (heterocardinal) array of lists of variable length + down to a numpy array of shape (num_norm_points,n). + + Parameters + -------------- + property_to_normalize: list of length n, of numpy arrays of shape (ki) + The property that needs to be evenly sampled + xy_data: list of length n, of numpy arrays of shape (2, ki) + The skeleton or contour points corresponding to the location + along the worm where the property_to_normalize was recorded + num_norm_points: int + The number of points to normalize to. + + Returns + -------------- + numpy array of shape (49,n) + prop_to_normalize, now normalized down to 49 points per frame + + """ + assert(len(property_to_normalize) == len(xy_data)) + + # Create a blank array of shape (49,n) + normalized_data_shape = [num_norm_points, len(property_to_normalize)] + normalized_data = np.full(normalized_data_shape, np.NaN) + + # Normalize one frame at a time + for frame_index, (cur_frame_value, cur_xy) in \ + enumerate(zip(property_to_normalize, xy_data)): + if cur_xy is not None: + # We need cur_xy to have shape (k,2), not (2,k) + cur_xy_reshaped = np.rollaxis(cur_xy, axis=1) + running_lengths = WormParserHelpers.chain_code_lengths_cum_sum( + cur_xy_reshaped) + + # Normalize cur_frame_value over an evenly-spaced set of + # 49 values spread from running_lengths[0] to + # running_lengths[-1] + normalized_data[:, frame_index] = \ + WormParserHelpers.normalize_parameter(cur_frame_value, + running_lengths, + num_norm_points) + + return normalized_data + + #%% + @staticmethod + def normalize_parameter(prop_to_normalize, running_lengths, + num_norm_points): + """ + This function finds where all of the new points will be when evenly + sampled (in terms of chain code length) from the first to the last + point in the old data. + + These points are then related to the old points. If a new point is at + an old point, the old point data value is used. If it is between two + old points, then linear interpolation is used to determine the new + value based on the neighboring old values. + + NOTE: This method should be called for just one frame's data at a time. + + NOTE: For better or worse, this approach does not smooth the new data, + since it just linearly interpolates. + + See http://docs.scipy.org/doc/numpy/reference/generated/ + numpy.interp.html + + Parameters + ----------- + prop_to_normalize: numpy array of shape (k,) or (2,k) + The parameter to be interpolated, where k is the number of + points. These are the values to be normalized. + running_lengths: numpy array of shape (k) + The positions along the worm where the property was + calculated. It is these positions that are to be "normalized", + or made to be evenly spaced. The parameter will then be + calculated at the new, evenly spaced, positions. + num_norm_points: int + The number of points to normalize to. + + Returns + ----------- + Numpy array of shape (k,) or (k,2) depending on the provided + shape of prop_to_normalize + + Notes + ------------ + Old code: + https://github.com/openworm/SegWorm/blob/master/ComputerVision/ + chainCodeLengthInterp.m + + """ + # Create n evenly spaced points between the first and last point in + # old_lengths + new_lengths = np.linspace(running_lengths[0], running_lengths[-1], + num_norm_points) + + # Interpolate in both the 2-d and 1-d cases + if len(prop_to_normalize.shape) == 2: + # Assume shape is (2,k,n) + interp_x = np.interp(new_lengths, running_lengths, + prop_to_normalize[0, :]) + interp_y = np.interp(new_lengths, running_lengths, + prop_to_normalize[1, :]) + # Compbine interp_x and interp_y together in shape () + return np.rollaxis(np.array([interp_x, interp_y]), axis=1) + else: + # Assume shape is (k,n) + return np.interp(new_lengths, running_lengths, prop_to_normalize) diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/skeleton_calculator1.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/skeleton_calculator1.py new file mode 100755 index 00000000..fd75ad64 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/skeleton_calculator1.py @@ -0,0 +1,768 @@ +# -*- coding: utf-8 -*- +""" +@JimHokason's translation of the Schafer Lab's skeletonization code. + + +""" +import numpy as np +import matplotlib.pyplot as plt + +# If you are interested to know why the following line didn't work: +# import scipy.signal.savgol_filter as sgolay +# check out this: http://stackoverflow.com/questions/29324814/ +# Instead we use the following statement: +from scipy.signal import savgol_filter as sgolay + +from .. import utils +from .pre_features_helpers import WormParserHelpers + +#%% + + +class SkeletonCalculatorType1(object): + + """ + The main method in this clas is compute_skeleton_and_widths. All other + methods are just subfunctions of this main method. + + """ + #%% + @staticmethod + def compute_skeleton_and_widths(h_ventral_contour, + h_dorsal_contour, + frames_to_plot=[]): + """ + Compute widths and a heterocardinal skeleton from a heterocardinal + contour. + + Parameters + ------------------------- + h_ventral_contour: list of numpy arrays. + Each frame is an entry in the list. + h_dorsal_contour: + frames_to_plot: list of ints + Optional list of frames to plot, to show exactly how the + widths and skeleton were calculated. + + + Returns + ------------------------- + (h_widths, h_skeleton): tuple + h_widths : the heterocardinal widths, frame by frame + h_skeleton : the heterocardinal skeleton, frame by frame. + + + Original algorithm notes: + ------------------------- + Original code for this algorithm can be found at: + https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + %2Bseg_worm/%2Bworm/%40skeleton/linearSkeleton.m + Which calls an initial skeletonization algorithm: + https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + %2Bseg_worm/%2Bcv/skeletonize.m + Which then gets refined: + https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + %2Bseg_worm/%2Bworm/%40skeleton/cleanSkeleton.m + + Widths are simply the distance between two "corresponding" sides of + the contour. The question is how to get these two locations. + + From Ev's Thesis: 3.3.1.6 - page 126 (or 110 as labeled in document) + ------------------------- + For each section, we begin at its center on both sides of the contour. + We then walk, pixel by pixel, in either direction until we hit the end + of the section on opposite sides, for both directions. The midpoint, + between each opposing pixel pair, is considered the skeleton and the + distance between these pixel pairs is considered the width for each + skeleton point. + + Food tracks, noise, and other disturbances can form spikes on the + worm contour. When no spikes are present, our walk attempts to + minimize the width between opposing pairs of pixels. When a spike + is present, this strategy may cause one side to get stuck in the + spike while the opposing side walks. + + Therefore, when a spike is present, the spiked side walks while the + other sideremains still. + + """ + FRACTION_WORM_SMOOTH = 1.0 / 12.0 + SMOOTHING_ORDER = 3 + PERCENT_BACK_SEARCH = 0.3 + PERCENT_FORWARD_SEARCH = 0.3 + END_S1_WALK_PCT = 0.15 + + num_frames = len(h_ventral_contour) # == len(h_dorsal_contour) + + h_skeleton = [None] * num_frames + h_widths = [None] * num_frames + + profile_times = {'sgolay': 0, + 'transpose': 0, + 'h__getBounds': 0, + 'compute_normal_vectors': 0, + 'h__getMatches': 0, + 'h__updateEndsByWalking': 0, + 'hstack': 0, + 'final calculations': 0} + + for frame_index, (s1, s2) in \ + enumerate(zip(h_ventral_contour, h_dorsal_contour)): + + # If the frame has no contour values, assign no skeleton + # or widths values + if s1 is None: + continue + assert s1.shape[0] == 2 #x-y must be in the first dimension + + # Smoothing of the contour + #------------------------------------------ + start = utils.timing_function() + # Step 1: filter + + def _smooth_vec(sv): + filter_width_sv = utils.round_to_odd(sv.shape[1] * + FRACTION_WORM_SMOOTH) + try: + sv[0, :] = sgolay(sv[0, :], window_length=filter_width_sv, + polyorder=SMOOTHING_ORDER) + sv[1, :] = sgolay(sv[1, :], window_length=filter_width_sv, + polyorder=SMOOTHING_ORDER) + except ValueError: + pass + return sv + + s1 = _smooth_vec(s1) + s2= _smooth_vec(s2) + + + profile_times['sgolay'] += utils.timing_function() - start + + """ + # We require the contours to be coincident at the beginning + # and end. If they are not, add a middle point that joins them. + if (s1[:,0] - s2[:,0] != 0).any(): + new_first_point = (s1[:,0] + s2[:,0]) / 2 + # We must have same dimensions to concatenate, so go from + # shape (2,) to shape (2,1) + new_first_point = new_first_point[:, None] + # Now concatenate shape (2,1) with shape (2,k) along the + # second axis, for a new array of shape (2,k+1) + s1 = np.concatenate([new_first_point, s1], axis=1) + s2 = np.concatenate([new_first_point, s2], axis=1) + + if (s1[:,-1] - s2[:,-1] != 0).any(): + new_last_point = (s1[:,-1] + s2[:,-1]) / 2 + # We must have same dimensions to concatenate, so go from + # shape (2,) to shape (2,1) + new_last_point = new_last_point[:, None] + + s1 = np.concatenate([s1, new_last_point], axis=1) + s2 = np.concatenate([s2, new_last_point], axis=1) + """ + + # UP/DOWNSAMPLE if number of points is not betwen 49 and 250, + # which seem like reasonable numbers. + if s1.shape[1] < 49 or s1.shape[1] > 250: + if s1.shape[1] < 49: + num_norm_points = 75 + else: + num_norm_points = 200 + # Upsample if we have too few points + s1 = WormParserHelpers.normalize_all_frames_xy( + [s1], num_norm_points=num_norm_points) + + # There is only one frame so let's take that dimension out, + # and transform s1 and s2 from having shape (k,2,n) to (k,2) + s1 = s1[:, :, 0] + + # normalized_all_frames_xy rolls the axis so let's roll it back + s1 = np.rollaxis(s1, 1) + + if s2.shape[1] < 49 or s2.shape[1] > 250: + if s1.shape[1] < 49: + num_norm_points = 75 + else: + num_norm_points = 200 + # For documentation see the above for s1 + s2 = WormParserHelpers.normalize_all_frames_xy( + [s2], num_norm_points=num_norm_points) + s2 = s2[:, :, 0] + s2 = np.rollaxis(s2, 1) + + # Calculation of distances + #----------------------------------- + # Find the distance from each point in s1 to EVERY point in s2 + # Thus dx_across[0,5] gives the x-distance from point 0 on s1 to + # point 5 on s2. The operation gives us an array of shape + # (s1.shape[1],s2.shape[1]) + dx_across = np.transpose(s1[0, :][None, :]) - s2[0, :] + dy_across = np.transpose(s1[1, :][None, :]) - s2[1, :] + + # d_across_partials has shape (ki, ji, 2) + d_across_partials = np.dstack([dx_across, dy_across]) + d_across = np.linalg.norm(d_across_partials, axis=2) + dx_across = dx_across / d_across + dy_across = dy_across / d_across + + # Determine search bounds for possible "projection pairs" + #------------------------------------------------ + start = utils.timing_function() + left_indices, right_indices = \ + SkeletonCalculatorType1.h__getBounds(s1.shape[1], + s2.shape[1], + PERCENT_BACK_SEARCH, + PERCENT_FORWARD_SEARCH) + + profile_times['h__getBounds'] += utils.timing_function() - start + start = utils.timing_function() + + # For each point on side 1, calculate normalized orthogonal values + norm_x, norm_y = utils.compute_normal_vectors(s1) + + profile_times[ + 'compute_normal_vectors'] += utils.timing_function() - start + start = utils.timing_function() + + # For each point on side 1, find which side 2 the point pairs with + match_I1 = SkeletonCalculatorType1.h__getMatches(s1, s2, + norm_x, norm_y, + dx_across, + dy_across, + d_across, + left_indices, + right_indices) + + profile_times['h__getMatches'] += utils.timing_function() - start + start = utils.timing_function() + + # Pair off the points from one contour to the other + I_1, I_2 = SkeletonCalculatorType1.h__updateEndsByWalking( + d_across, + match_I1, + s1, s2, + END_S1_WALK_PCT) + + profile_times[ + 'h__updateEndsByWalking'] += utils.timing_function() - start + + # We're looking to the left and to the right to ensure that + # things are ordered + # current is before next + is_good = np.hstack((True, np.array((I_2[1:-1] <= I_2[2:]) & \ + # current after previous + (I_2[1:-1] >= I_2[:-2])), + True)) + # Filter out invalid entries + I_1 = I_1[is_good] + I_2 = I_2[is_good] + + # TODO: Allow smoothing on x & y + + # Create the skeleton sides + s1 = s1[:, I_1] + s1_p = s2[:, I_2] + # The widths are simply the distance between the sides + h_widths[frame_index] = np.linalg.norm(s1_p - s1, axis=0) + # The skeleton is simply the midpoint between the sides + h_skeleton[frame_index] = (s1 + s1_p) / 2 + + # DEBUG + # print("Final skeleton shape of frame %d: %s" % + # (frame_index, str(h_skeleton[frame_index].shape))) + + # DEBUG + # Optional plotting code + if frame_index in frames_to_plot: + fig = plt.figure() + # ARRANGE THE PLOTS AS: + # AX1 AX1 AX2 + # AX1 AX1 AX3 + ax1 = plt.subplot2grid((2, 3), (0, 0), rowspan=2, colspan=2) + #ax2 = plt.subplot2grid((2,3), (0,2)) + ax3 = plt.subplot2grid((2, 3), (1, 2)) + ax1.set_title("Frame #%d of %d" % (frame_index, + len(h_ventral_contour))) + + # The points along one side of the worm + ax1.scatter(s1[0, :], s1[1, :], marker='o', + edgecolors='r', facecolors='none') + # The points along the other side + ax1.scatter(s2[0, :], s2[1, :], marker='o', + edgecolors='b', facecolors='none') + + # To plot the widths, we need to run + # plot([x1,x2],[y1,y2]), for each line segment + for i in range(s1_p.shape[1]): + ax1.plot([s1_p[0, i], s1[0, i]], [s1_p[1, i], s1[1, i]], + + # ax1.plot([s1_px[i], s1_x[i]], [s1_py[i], + # s1_y[i]], + color='g') + + skeleton = h_skeleton[frame_index] + # The skeleton points + ax1.scatter(skeleton[0, :], skeleton[1, :], marker='D', + edgecolors='b', facecolors='none') + # The skeleton points, connected + ax1.plot(skeleton[0, :], skeleton[1, :], color='navy') + + """ + # TODO: Jim's original method for plotting this was: + # Width should really be plotted as a function of + # distance along the skeleton + cum_dist = h__getSkeletonDistance(skeleton_x, skeleton_y) + + ax2.plot(cum_dist./cum_dist[-1], h_widths[frame_index], + 'r.-') + hold on + ax2.plot(np.linspace(0,1,49), nw_widths[:,iFrame], 'g.-') + hold off + """ + + # Now let's plot each of the 200+ width values as the + # y coordinate. + ax3.set_title = "Worm width at each calculation point" + ax3.set_xlabel("Calculation point") + ax3.set_ylabel("Width") + with plt.style.context('fivethirtyeight'): + ax3.plot(h_widths[frame_index], color='green', + linewidth=2) + + plt.show() + + # DEBUG + # print(profile_times) + return (h_widths, h_skeleton) + + #%% + @staticmethod + def h__getBounds(n1, n2, percent_left_search, percent_right_search): + """ + Get contour "partner" search boundaries. + + Given two contour sides, having n1 and n2 points respectively, we + want to pair off points on side n1 with points on side n2. This + method has a narrower goal: just to return the search boundaries + for each point on n1 in its search for a match on the other side. + + For this, for each point i from 0 to n1-1 on the first side, we + want start_indices[i] to be an integer giving the point index + on the other side to start searching, and stop_indices[i], to be + the final point searched on the other side. + + Parameters + --------------- + n1: int + number of points along one side of the contour + n2: int + number of points along the other side of the contour + percent_left_search: float + percent to search backward + percent_right_search: float + percent to search forward + + Returns + --------------- + (start_indices, stop_indices): Two integer numpy arrays of shape (n1,) + Giving the appropriate start and stop indices for a search + through the other contour. We only want to search for points + along the opposing contour that are within + [-percent_left_search, percent_right_search] of the point on + the first contour. + + """ + # Create array of n1 evenly spaced numbers from 0 to 1 inclusive + percentiles = np.linspace(0, 1, n1) + start_percentiles = percentiles - percent_left_search + stop_percentiles = percentiles + percent_right_search + + # An integer array giving the leftmost to navigate before stopping, + # for each point along n2. + start_indices = np.floor(start_percentiles * n2) + stop_indices = np.ceil(stop_percentiles * n2) + # Truncate any indices pointing outside the range between 0 and n2-1 + start_indices[start_indices < 0] = 0 + stop_indices[stop_indices >= n2] = n2 - 1 + + return start_indices.astype(np.int), stop_indices.astype(np.int) + + #%% + @staticmethod + def h__getMatches(s1, s2, + norm_x, norm_y, + dx_across, dy_across, d_across, + left_I, right_I): + """ + For a given frame, + For each point on side 1, find which side 2 the point pairs with + + Parameters + --------------- + s1: list of numpy arrays, with the arrays having shape (2,ki) + One side of the contour. ki is the number of points in frame i + s2: list of numpy arrays, with the arrays having shape (2,ji) + The other side of the contour. ji is the number of points in + frame i + norm_x: + norm_y: + dx_across: + dy_across: + d_across: 2d numpy array of shape (ki, ji) + A lookup table giving the distance from a point on one + of the contour to any point on the other side. + left_I: + right_I: + + Returns + --------------- + match_indices: numpy array of integers, of shape (ki) where + ki is the number of contour points in s1 + + """ + n_s1 = s1.shape[1] + match_I = np.zeros(n_s1, dtype=int) + match_I[0] = 0 + match_I[-1] = s2.shape[1] + + dp_values = np.zeros(n_s1) + all_signs_used = np.zeros(n_s1) + + # There is no need to do the first and last point + for I, (lb, rb) in enumerate(zip(left_I[1:-1], right_I[1:-1])): + I = I + 1 + [abs_dp_value, dp_I, sign_used] = SkeletonCalculatorType1.\ + h__getProjectionIndex(norm_x[I], norm_y[I], + dx_across[I, lb:rb], dy_across[I, lb:rb], + lb, + d_across[I, lb:rb], 0) + all_signs_used[I] = sign_used + dp_values[I] = abs_dp_value + match_I[I] = dp_I + + if not np.all(all_signs_used[1:-1] == all_signs_used[1]): + if np.sum(all_signs_used) > 0: + sign_use = 1 + I_bad = utils.find(all_signs_used[1:-1] != 1) + 1 + else: + I_bad = utils.find(all_signs_used[1:-1] != -1) + 1 + sign_use = -1 + + for I in I_bad: + lb = left_I[I] + rb = right_I[I] + [abs_dp_value, dp_I, sign_used] = SkeletonCalculatorType1.\ + h__getProjectionIndex(norm_x[I], norm_y[I], + dx_across[I, lb:rb], dy_across[I, lb:rb], + lb, + d_across[I, lb:rb], sign_use) + all_signs_used[I] = sign_used + dp_values[I] = abs_dp_value + match_I[I] = dp_I + + return match_I + + #%% + @staticmethod + def h__getProjectionIndex(vc_dx_ortho, vc_dy_ortho, + dx_across_worm, dy_across_worm, + left_I, d_across, sign_use): + """ + + Matlab code: + nvc_local = nvc(nvc_indices_use,:); + + dx_across_worm = cur_point(1) - nvc_local(:,1); + dy_across_worm = cur_point(2) - nvc_local(:,2); + + d_magnitude = sqrt(dx_across_worm.^2+dy_across_worm.^2); + + dx_across_worm = dx_across_worm./d_magnitude; + dy_across_worm = dy_across_worm./d_magnitude; + + """ + + # SPEED: Compute normalized distances for all pairs ... + # Might need to downsample + + dp = dx_across_worm * vc_dx_ortho + dy_across_worm * vc_dy_ortho + + # I'd like to not have to do this step, it has to do with + # the relationship between the vulva and non-vulva side. This + # should be consistent across the entire animal and could be + # passed in, unless the worm rolls. + sign_used = -1 + if sign_use == 0 and np.sum(dp) > 0: + # Instead of multiplying by -1 we could hardcode the flip of + # the logic below (e.g. max instead of min, > vs <) + dp = -1 * dp + sign_used = 1 + elif sign_use == 1: + dp = -1 * dp + sign_used = 1 + + # This is slow, presumably due to the memory allocation ... + # < right + # possible = [dp(1:end-1) < dp(2:end) false] & \ + # < left + # [false dp(2:end) < dp(1:end-1)] + + # In Matlab: + # possible = (dp(2:end-1) < dp(3:end)) & (dp(2:end-1) < dp(1:end-2)); + possible = (dp[1:-2] < dp[2:-1]) & (dp[1:-2] < dp[0:-3]) + + Ip = utils.find(possible) + if len(Ip) == 1: + dp_I = Ip + 1 + dp_value = dp[dp_I] + elif len(Ip) > 1: + temp_I = np.argmin(d_across[Ip]) + dp_I = Ip[temp_I] + 1 + dp_value = dp[dp_I] + else: + dp_I = np.argmin(dp) + dp_value = dp[dp_I] + + I = left_I + dp_I + + return (dp_value, I, sign_used) + + #%% + @staticmethod + def h__updateEndsByWalking(d_across, match_I1, s1, s2, END_S1_WALK_PCT): + """ + Update ends by walking. + + Parameters + ---------- + d_across: 2d numpy array of shape (ki, ji) + A lookup table giving the distance from a point on one + of the contour to any point on the other side. + match_I1: numpy array of shape (ki,) + current list of matches + s1: list of numpy arrays, with the arrays having shape (2,ki) + One side of the contour. ki is the number of points in frame i + s2: list of numpy arrays, with the arrays having shape (2,ji) + The other side of the contour. ji is the number of points in + frame i + END_S1_WALK_PCT: float + At what % to end the walk along the first contour + + Returns + ------- + (I_1, I_2): tuple of numpy arrays + + """ + n_s1 = s1.shape[1] + n_s2 = s2.shape[1] + + end_s1_walk_I = np.ceil(n_s1 * END_S1_WALK_PCT) + end_s1_walk_I = end_s1_walk_I.astype(np.int) + end_s2_walk_I = 2 * end_s1_walk_I + + p1_I, p2_I = SkeletonCalculatorType1.h__getPartnersViaWalk( + 0, end_s1_walk_I, + 0, end_s2_walk_I, + d_across, + s1, s2) + + + + # Alter the matches somewhat + match_I1[p1_I] = p2_I + + # Keep all our alterations + keep_mask = np.zeros(len(match_I1), dtype=np.bool) + keep_mask[p1_I] = True + + # Add + end_s1_walk_backwards = n_s1 - end_s1_walk_I + 1 + end_s2_walk_backwards = n_s2 - end_s2_walk_I + 1 + + p1_I, p2_I = SkeletonCalculatorType1.h__getPartnersViaWalk( + n_s1 - 1, end_s1_walk_backwards, + n_s2 - 1, end_s2_walk_backwards, + d_across, + s1, s2) + + match_I1[p1_I] = p2_I + keep_mask[p1_I] = True + + # Anything in between we'll use the projection approach + keep_mask[end_s1_walk_I + 1:end_s1_walk_backwards] = True + + # Always keep ends + keep_mask[0] = True + keep_mask[-1] = True + + match_I1[0] = 0 + match_I1[-1] = n_s2 - 1 + + # This isn't perfect but it removes some back and forth behavior + # of the matching. We'd rather drop points and smooth + I_1 = np.flatnonzero(keep_mask) + I_2 = match_I1[keep_mask] + + return (I_1, I_2) + + #%% + @staticmethod + def h__getPartnersViaWalk(s1, e1, s2, e2, d, xy1, xy2): + """ + + Intro + ----- + This is is an implentation of: + https://github.com/JimHokanson/SegwormMatlabClasses/blob/master/ + %2Bseg_worm/%2Bcv/skeletonize.m + + In the SegWorm code this is the main algorithm for going from a + contour to a skeleton (and widths). However in this implementation + it only gets called for the head and tail. The original implementation + calls this code many times after dividing the worm up into chunks + based on finding large bend angles and the midpoint of the worm. + + Why used here + ------------- + This is used at the ends rather than the projection as the projection + is looking for a line that is orthogonal to the slope of the contour + that makes a nice bisection of the worm. However at the ends this + falls apart. Consider a worm that is slighly like a diamond and how + the lines that are orthgonal to the end points form horrible lines + for widths estimation. + + + Example contour of the head or tail with a bit of the body: + + /\ + /\ \ + / \ \ + | \ | + | \| #Bad + |------| #Good + + The hypen line is orthogonal to the left contour and provides a + good estimate of a line that is orthogonal to the skeleton. + + The inner diagonal line is orthogonal to the left contour but is + a poor choice for a line that is orthogonal to the skeleton. + + Algorithm + --------- + More on this can be found in Ev Yemini's thesis. The basic idea is + we start with pairs on both sides of the contour and ask whether or + not each point on one side of the contour should partner with its + current point or the next point on the other side, given the widths + between them. Parterning + + Parameters + ---------- + s1: start index for side 1 + e1: end index for side 1 (inclusive) + s2: start index for side 2 + e2: end index for side 2 (inclusive) + d: distance from I1 to I2 is d(I1,I2) + + Returns + ------- + (p1_I, p2_I) tuple + p1_I: numpy array + Each element represents one of a pair of indices that + belong together. In other words p1_I[i] goes with p2_I[i] + + """ + + # TODO: remove hardcode, base on max of e1-s1+1 + p1_I = np.zeros(200, dtype=np.int) + p2_I = np.zeros(200, dtype=np.int) + + c1 = s1 # Current 1 index + c2 = s2 # Current 2 index + cur_p_I = -1 # Current pair index + + while c1 != e1 and c2 != e2: + cur_p_I += 1 + + # We are either going up or down based on which end we are + # starting from (beggining or end) + if e1 < s1: + next1 = c1 - 1 + next2 = c2 - 1 + else: + next1 = c1 + 1 + next2 = c2 + 1 + + # JAH: At this point + # Need to handle indexing () vs [] and indexing spans (if any) + # as well as 0 vs 1 based indexing (if any) + try: + v_n1c1 = xy1[:, next1] - xy1[:, c1] + except: + import pdb + pdb.set_trace() + + v_n2c2 = xy2[:, next2] - xy2[:, c2] + + # 216,231 + d_n1n2 = d[next1, next2] + d_n1c2 = d[next1, c2] + d_n2c1 = d[c1, next2] + + if d_n1c2 == d_n2c1 or (d_n1n2 <= d_n1c2 and d_n1n2 <= d_n2c1): + # Advance along both contours + + p1_I[cur_p_I] = next1 + p2_I[cur_p_I] = next2 + + c1 = next1 + c2 = next2 + + elif np.sum((v_n1c1 * v_n2c2) > 0): + # Contours go similar directions + # Follow smallest width + if d_n1c2 < d_n2c1: + # Consume smaller distance, then move the base of the + # vector further forward + p1_I[cur_p_I] = next1 + p2_I[cur_p_I] = c2 + + # This bit always confuses me + # c1 n1 + # + # + # c2 x x x n2 + # + # Advance c1 so that d_n2_to_c1 is smaller next time + c1 = next1 + else: + p1_I[cur_p_I] = c1 + p2_I[cur_p_I] = next2 + c2 = next2 + else: + + if cur_p_I == 1: + prev_width = 0 + else: + prev_width = d[p1_I[cur_p_I - 1], p2_I[cur_p_I - 1]] + + if (d_n1c2 > prev_width and d_n2c1 > prev_width): + p1_I[cur_p_I] = next1 + p2_I[cur_p_I] = next2 + + c1 = next1 + c2 = next2 + elif d_n1c2 < d_n2c1: + p1_I[cur_p_I] = next1 + p2_I[cur_p_I] = c2 + c1 = next1 + else: + p1_I[cur_p_I] = c1 + p2_I[cur_p_I] = next2 + c2 = next2 + + p1_I = p1_I[:cur_p_I] + p2_I = p2_I[:cur_p_I] + + return (p1_I, p2_I) + #p1_I[cur_p_I+1:] = [] + #p2_I[cur_p_I+1:] = [] diff --git a/tierpsy/features/open_worm_analysis_toolbox/prefeatures/video_info.py b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/video_info.py new file mode 100755 index 00000000..048bf83d --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/prefeatures/video_info.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +""" +Metadata (i.e. not frame-by-frame information) + +The following attributes are used in downstream processing: + +fps: + - several features + +frame_code: + - 1, 105, 106 is used in posture.coils + - 2 is used in locomotion.turns + +ventral_mode: + Needed to sign (give + or -) several features: + - locomotion.velocity, the motion direction. + - The amplitude and frequency of foraging.bends + - path.curvature + +""" +import os +import numpy as np +import pandas as pd + +from .. import config + + +class VideoInfo(object): + """ + Metadata associated with a a worm video. + + video_name + fps + height + width + microns_per_pixel_x + microns_per_pixel_y + fourcc + length_in_seconds + length_in_frames + + frame_code : numpy array of codes for each frame of the video + frame_code_info : Descriptions of the frame codes, lazy-loaded from csv + ventral_mode : int + The ventral side mode: + 0 = unknown + 1 = clockwise + 2 = anticlockwise + video_type: in ['Schafer Lab', 'Not specified'] + + This can also be used as a base class for a new team that might need + different annotations on their videos. + + """ + + def __init__(self, video_name='', fps=None, + height=None, width=None, + microns_per_pixel_x=None, + microns_per_pixel_y=None, + fourcc=None, + length_in_seconds=None, + length_in_frames=None): + + # Mandatory + if fps is None: + self.fps = config.DEFAULT_FPS + else: + self.fps = fps + self.video_name = video_name + + # Optional (i.e. not used in future processing?) + # TODO: use this info for pixel-to-micron scaling + self.height = height + self.width = width + self.microns_per_pixel_x = microns_per_pixel_x + self.microns_per_pixel_y = microns_per_pixel_y + # The "four-character code" + # (https://en.wikipedia.org/wiki/FourCC) + # "One of the most well-known uses of FourCCs is to identify the + # video codec used in AVI files." + # - Michael Currie + self.fourcc = fourcc + + # TODO: We'll have to do some integrity checks since + # length_in_frames = len(skeleton[0,0,:]) = len(contour[0,0,:]) and + # length_in_frames = fps * length_in_seconds + self.length_in_seconds = length_in_seconds + self.length_in_frames = length_in_frames + + # DEBUG: (Note from @MichaelCurrie:) + # This should be set by the normalized worm file, since each + # worm subjected to an experiment is manually examined to find the + # vulva so the ventral mode can be determined. Here we just set + # the ventral mode to a default value as a stopgap measure + self.ventral_mode = config.DEFAULT_VENTRAL_MODE + self.video_type = 'Not specified' + + def set_ventral_mode(self, ventral_side): + ''' + Set the ventral side mode. Valid options are "clockwise", "anticlockwise" and "unknown". + ''' + if ventral_side == 'clockwise': + self.ventral_mode = 1 + elif ventral_side == 'anticlockwise': + self.ventral_mode = 2 + elif ventral_side == 'unknown': + self.ventral_mode = 0 + else: + raise ValueError('{} is not a recognizable ventral_mode.'.format(ventral_side)) + + + @staticmethod + def sniff_video_properties(file_path): + """ + A utility method to find a video's resolution, frame rate, codec, etc + in case this isn't passed to us and we need to populate it here. + + """ + # TODO + pass + + @property + def is_stage_movement(self): + """ + Returns a mask for all frames with frame code == 2, that is, + with a stage movement. + """ + return self.frame_code == 2 + + @property + def frame_code_info(self): + """ + Frame code descriptions + + I'd like to make this a static property but I don't think + that's possible. + + """ + try: + return self._frame_code_info + except AttributeError: + self.load_frame_code_info() + return self._frame_code_info + + def load_frame_code_info(self): + """ + Load the frame code descriptions + + """ + # Obtain this computer's path to + # open-worm-analysis-toolbox\documentation\frame_codes.csv + cur_file_folder = os.path.dirname(__file__) + package_path = os.path.abspath(os.path.dirname(cur_file_folder)) + frame_codes_path = os.path.join(package_path, + 'documentation', + 'frame_codes.csv') + + # Load frame code information + self._frame_code_info = pd.read_csv(frame_codes_path, + delimiter=';', + quotechar="'") + # Convert the 'Frame Codes' column, which is all int, to int. + self._frame_code_info = \ + self._frame_code_info.convert_objects(convert_numeric=True) + + @property + def is_segmented(self): + """ + Returns a 1-d boolean numpy array of whether + or not, frame-by-frame, the given frame was segmented + + """ + return self.frame_code == 1 + + @property + def segmentation_status(self): + """ + Deprecated in favour of using self.frame_code directly. + + A numpy array of characters 's', 'm', 'd', 'f', where: + s = Segmented (aka frame code 1) + m = Stage movement (aka frame code 2) + d = Dropped frame (aka frame code 3) + f = Segmentation failed (aka frame codes 100+) + + """ + try: + return self._segmentation_status + except AttributeError: + s = self.frame_code == 1 + m = self.frame_code == 2 + d = self.frame_code == 3 + + self._segmentation_status = np.empty(self.num_frames, dtype='= all_indices.size: + return all_indices + else: + return all_indices[0:num_indices_to_return] + else: + return all_indices + + +def separated_peaks(x, dist, use_max, value_cutoff): + """ + Find the peaks (either minimum or maximum) in an array. + The peaks must be separated by, at least, the given distance. + + Note that outputs are not sorted. + + + Parameters + --------------------------------------- + x: numpy array + The values to be searched for peaks + + dist + The minimum distance between peaks + + use_max: boolean + True: find the maximum peaks + False: find the minimum peaks + + chainCodeLengths + The chain-code length at each index; + if empty, the array indices are used instead + + + Returns + --------------------------------------- + peaks + The maximum peaks + + indices + The indices for the peaks + + + Notes + --------------------------------------- + Formerly seg_worm.util.maxPeaksDist + i.e. [PEAKS INDICES] = seg_worm.util.maxPeaksDist \ + (x, dist,use_max,value_cutoff,*chain_code_lengths) + i.e. https://github.com/JimHokanson/SegwormMatlabClasses/ ... + blob/master/%2Bseg_worm/%2Butil/maxPeaksDist.m + + Used in seg_worm.feature_helpers.posture.getAmplitudeAndWavelength + Used in locomotion_bends.py + + See also MINPEAKSDIST, COMPUTECHAINCODELENGTHS + + """ + + chain_code_lengths = colon(1, 1, x.size) + + # Is the vector larger than the search window? + winSize = 2 * dist + 1 + if chain_code_lengths[-1] < winSize: + temp_I = np.argmax(x) + return (x[temp_I], temp_I) + + # xt - "x for testing" in some places in the code below + # it will be quicker (and/or easier) to assume that we want the largest + # value. By negating the data we can look for maxima (which will tell us + # where the minima are) + if not use_max: + xt = -1 * x + else: + xt = x + + # NOTE: I added left/right neighbor comparisions which really helped with + # the fft ..., a point can't be a peak if it is smaller than either of its + # neighbors + + np_true = np.ones(1, dtype=np.bool) + if use_max: + # %> left > right + # Matlab version: + # could_be_a_peak = x > value_cutoff & [true x(2:end) > x(1:end-1)] & + # [x(1:end-1) > x(2:end) true]; + + # greater than values to the left + could_be_a_peak = ( + x > value_cutoff) & np.concatenate( + (np_true, x[ + 1:] > x[ + 0:-1])) + # could_be_a_peak = np.logical_and( + # x > value_cutoff, np.concatenate((np.ones(1, dtype=np.bool), x[1:] > + # x[0:-1]))) + + # greater than values to the right + could_be_a_peak = could_be_a_peak & np.concatenate( + (x[0:-1] > x[1:], np_true)) + + # could_be_a_peak = np.logical_and( + # could_be_a_peak, np.concatenate((x[0:-1] > x[1:], np.ones(1, + # dtype=np.bool)))) + + I1 = could_be_a_peak.nonzero()[0] + I2 = np.argsort(-1 * x[I1]) # -1 => we want largest first + I = I1[I2] + else: + #raise Exception("Not yet implemented") + # pdb.set_trace() + # could_be_a_peak = x < value_cutoff & [true x(2:end) < x(1:end-1)] & [x(1:end-1) < x(2:end) true]; + #I1 = find(could_be_a_peak); + #[~,I2] = sort(x(I1)); + #I = I1(I2); + + could_be_a_peak = ( + x < value_cutoff) & np.concatenate( + (np_true, x[ + 1:] < x[ + 0:-1])) + could_be_a_peak = could_be_a_peak & np.concatenate( + (x[0:-1] < x[1:], np_true)) + + I1 = could_be_a_peak.nonzero()[0] + I2 = np.argsort(x[I1]) + I = I1[I2] + + n_points = x.size + + # This code would need to be fixed if real distances + # are input ... + too_close = dist - 1 + + temp_I = colon(0, 1, n_points - 1) + start_I = temp_I - too_close # Note, separated by dist is ok + # This sets the off limits area, so we go in by 1 + end_I = temp_I + too_close + + start_I[start_I < 0] = 0 + end_I[end_I > n_points] = n_points + + is_peak_mask = np.zeros(n_points, dtype=bool) + # A peak and thus can not be used as a peak + + for cur_index in I: + # NOTE: This gets updated in the loop so we can't just iterate + # over these values + if could_be_a_peak[cur_index]: + # NOTE: Even if a point isn't the local max, it is greater + # than anything that is by it that is currently not taken + # (because of sorting), so it prevents these points + # from undergoing the expensive search of determining + # whether they are the min or max within their + # else from being used, so we might as well mark those indices + # within it's distance as taken as well + temp_indices = slice(int(start_I[cur_index]), int(end_I[cur_index])) + could_be_a_peak[temp_indices] = False + + # This line is really slow ... + # It would be better to precompute the max within a window + # for all windows ... + is_peak_mask[cur_index] = np.max(xt[temp_indices]) == xt[cur_index] + + indices = is_peak_mask.nonzero()[0] + peaks = x[indices] + + return (peaks, indices) + + +def colon(r1, inc, r2): + """ + Matlab's colon operator, althought it doesn't although inc is required + + """ + + s = np.sign(inc) + + if s == 0: + return_value = np.zeros(1) + elif s == 1: + n = ((r2 - r1) + 2 * np.spacing(r2 - r1)) // inc + return_value = np.linspace(r1, r1 + inc * n, n + 1) + else: # s == -1: + # NOTE: I think this is slightly off as we start on the wrong end + # r1 should be exact, not r2 + n = ((r1 - r2) + 2 * np.spacing(r1 - r2)) // np.abs(inc) + temp = np.linspace(r2, r2 + np.abs(inc) * n, n + 1) + return_value = temp[::-1] + + # If the start and steps are whole numbers, we should cast as int + if(np.equal(np.mod(r1, 1), 0) and + np.equal(np.mod(s, 1), 0) and + np.equal(np.mod(r2, 1), 0)): + return return_value.astype(int) + else: + return return_value + + +def print_object(obj): + """ + Goal is to eventually mimic Matlab's default display behavior for objects + + Example output from Matlab + + morphology: [1x1 seg_worm.features.morphology] + posture: [1x1 seg_worm.features.posture] + locomotion: [1x1 seg_worm.features.locomotion] + path: [1x1 seg_worm.features.path] + info: [1x1 seg_worm.info] + + #TODO: For ndarrays we should implement size displays instead of length + #TODO: The @property hack doesn't work for @property values from parent + classes, I would need to look at __bases__ + """ + + # TODO - have some way of indicating nested function and not doing fancy + # print for nested objects ... + + MAX_WIDTH = 70 + + dict_local = obj.__dict__ + + key_names = [k for k in dict_local] + + try: + # TODO: Also include __bases__ + names_of_prop_methods = [ + name for name, value in vars( + obj.__class__).items() if isinstance( + value, property)] + prop_code_ok = True + except: + prop_code_ok = False + + is_prop = [False] * len(key_names) + if prop_code_ok: + is_prop += [True] * len(names_of_prop_methods) + key_names += names_of_prop_methods + + key_lengths = [len(x) for x in key_names] + + if len(key_lengths) == 0: + return "" + + max_key_length = max(key_lengths) + key_padding = [max_key_length - x for x in key_lengths] + + max_leadin_length = max_key_length + 2 + max_value_length = MAX_WIDTH - max_leadin_length + + lead_strings = [' ' * x + y + ': ' for x, y in zip(key_padding, key_names)] + + # TODO: Alphabatize the results ???? + # Could pass in as a option + # TODO: It might be better to test for built in types + # Class::Bio.Entrez.Parser.DictionaryElement + # => show actual dictionary, not what is above + + value_strings = [] + for key, is_prop_local in zip(key_names, is_prop): + if is_prop_local: + temp_str = '@property method' + else: + run_extra_code = False + value = dict_local[key] + if hasattr(value, '__dict__'): + try: # Not sure how to test for classes :/ + class_name = value.__class__.__name__ + module_name = inspect.getmodule(value).__name__ + temp_str = 'Class::' + module_name + '.' + class_name + except: + run_extra_code = True + else: + run_extra_code = True + + if run_extra_code: + # TODO: Change length to shape if available + if isinstance(value, list) and len(value) > max_value_length: + len_value = len(value) + temp_str = 'Type::List, Len %d' % len_value + else: + # Perhaps we want str instead? + # Changed from repr to str because things Python was not + # happy with lists of numpy arrays + temp_str = str(value) + if len(temp_str) > max_value_length: + #type_str = str(type(value)) + #type_str = type_str[7:-2] + try: + len_value = len(value) + except: + len_value = 1 + temp_str = str.format( + 'Type::{}, Len: {}', type(value).__name__, len_value) + + value_strings.append(temp_str) + + final_str = '' + for cur_lead_str, cur_value in zip(lead_strings, value_strings): + final_str += (cur_lead_str + cur_value + '\n') + + return final_str + + +def write_to_CSV(data_dict, filename): + """ + Writes data to a CSV file, by saving it to the directory os.getcwd() + + Parameters + --------------------------------------- + data_dict: a dictionary of 1-dim ndarrays of dtype=float + What is to be written to the file. data.keys() provide the headers, + and each column in turn is provided by the value for that key + filename: string + Name of file to be saved (not including the '.csv' part of the name) + + """ + csv_file = open(filename + '.csv', 'w') + writer = csv.writer(csv_file, lineterminator='\n') + + # The first row of the file is the keys + writer.writerow(list(data_dict.keys())) + + # Find the maximum number of rows across all our columns: + max_rows = max([len(x) for x in list(data_dict.values())]) + + # Combine all the dictionary entries so we can write them + # row-by-row. + columns_to_write = [] + for column_key in data_dict.keys(): + column = list(data_dict[column_key]) + # Create a mask that shows True for any unused "rows" + m = np.concatenate([np.zeros(len(column), dtype=bool), + np.ones(max_rows - len(column), dtype=bool)]) + # Create a masked array of size max_rows with unused entries masked + column_masked = np.ma.array(np.resize(column, max_rows), mask=m) + # Convert the masked array to an ndarray with the masked values + # changed to NaNs + column_masked = column_masked.filled(np.NaN) + # Append this ndarray to our list + columns_to_write.append(column_masked) + + # Combine each column's entries into an ndarray + data_ndarray = np.vstack(columns_to_write) + + # We need the transpose so the individual data lists become transposed + # to columns + data_ndarray = data_ndarray.transpose() + + # We need in the form of nested sequences to satisfy csv.writer + rows_to_write = data_ndarray.tolist() + + for row in rows_to_write: + writer.writerow(list(row)) + + csv_file.close() + + +def interpolate_with_threshold(array, + threshold=None, + make_copy=True, + extrapolate=False): + """ + Linearly interpolate a numpy array along one dimension but only + for missing data n frames from a valid data point. That is, + if there are too many contiguous missing data points, none of + those points get interpolated. + + + Parameters + --------------------------------------- + array: 1-dimensional numpy array + The array to be interpolated + + threshold: int + The maximum size of a contiguous set of missing data points + that gets interpolated. Sets larger than this are left as NaNs. + If threshold is set to NaN then all points are interpolated. + + make_copy: bool + If True, do not modify the array parameter + If False, interpolate the array parameter "in place" + Either way, return a reference to the interpolated array + + extrapolate: bool + If True, extrapolate linearly to the beginning and end of the array + if there are NaNs on either end. + + Returns + --------------------------------------- + numpy array with the values interpolated + + + Usage Example + --------------------------------------- + # example array + a = np.array([10, 12, 15, np.NaN, 17, \ + np.NaN, np.NaN, np.NaN, -5], dtype='float') + + a2 = interpolate_with_threshold(a, 5) + + print(a) + print(a2) + + + Notes + --------------------------------------- + TODO: Extrapolation currently not implemented. Perhaps try + http://stackoverflow.com/questions/2745329/ + + """ + + """ + # (SKIP THIS, THIS IS FOR THE N-DIMENSIONAL CASE WHICH WE + # HAVE NOT IMPLEMENTED YET) + # Check that any frames with NaN in at least one dimension must + # have it in all: + frames_with_at_least_one_NaN = np.all(np.isnan(array), frame_dimension) + frames_with_no_NaNs = np.all(~np.isnan(array), frame_dimension) + # check that each frame is either True for one of these arrays or + # the other but not both. + assert(np.logical_xor(frames_with_at_least_one_NaN, frames_with_no_NaNs)) + frame_dropped = frames_with_at_least_one_NaN + + """ + + if make_copy: + # Use a new array so we don't modify the original array passed to us + new_array = np.copy(array) + else: + new_array = array + + if(threshold == 0): # everything gets left as NaN + return new_array + + # Say array = [10, 12, 15, nan, 17, nan, nan, nan, -5] + # Then np.isnan(array) = + # [False, False, False, True, False True, True, True, False] + # Let's obtain the "x-coordinates" of the NaN entries. + # e.g. [3, 5, 6, 7] + x = np.flatnonzero(np.isnan(new_array)) + + # (If we weren't using a threshold and just interpolating all NaNs, + # we could skip the next four lines.) + if(threshold is not None): + # Group these together using a fancy trick from + # http://stackoverflow.com/questions/2154249/, since + # the lambda function x:x[0]-x[1] on an enumerated list will + # group consecutive integers together + # e.g. [[(0, 3)], [(1, 5), (2, 6), (3, 7)]] + x_grouped = [ + list(group) for key, + group in groupby( + enumerate(x), + lambda i:i[0] - + i[1])] + + # We want to know the first element from each "run", and the run's length + # e.g. [(3, 1), (5, 3)] + x_runs = [(i[0][1], len(i)) for i in x_grouped] + + # We need only interpolate on runs of length <= threshold + # e.g. if threshold = 2, then we have only [(3, 1)] + x_runs = [i for i in x_runs if i[1] <= threshold] + + # now expand the remaining runs + # e.g. if threshold was 5, then x_runs would be [(3,1), (5,3)] so + # x would be [3, 5, 6, 7] + # this give us the x-coordinates of the values to be interpolated: + + if x_runs: + x = np.concatenate([(i[0] + list(range(i[1]))) for i in x_runs]) + else: + # consider th case that there where not valid groups remaining to + # interpolate + return new_array + + # The x-coordinates of the data points, must be increasing. + xp = np.flatnonzero(~np.isnan(new_array)) + # The y-coordinates of the data points, same length as xp + yp = array[~np.isnan(new_array)] + + if extrapolate: + # TODO + # :/ Might need to use scipy + # also, careful of the "left" and "right" settings below + pass + + # Place the interpolated values into the array + # the "left" and "right" here mean that we want to leave NaNs in place + # if the array begins and/or ends with a sequence of NaNs (i.e. don't + # try to extrapolate) + new_array[x] = np.interp(x, xp, yp, left=np.NaN, right=np.NaN) + + return new_array + + +def interpolate_with_threshold_2D(array, threshold=None, extrapolate=False): + """ + Interpolate two-dimensional data along the second axis. Each "row" + is treated as a separate interpolation. So if the first axis has 4 + rows of n frames in the second axis, we are interpolating 4 times. + + Parameters + --------------------------------------- + x_old : [m x n_frames] + The array to be interpolated along the second axis + + threshold: int (Optional) + Number of contiguous frames above which no interpolation is done + If none specified, all NaN frames are interpolated + + extrapolate: bool (Optional) + If yes, values are extrapolated to the start and end, along the second + axis (the axis being interpolated) + + Notes + --------------------------------------- + This could be optimized with a specialized function for the case + when all the NaN entries line up along the first dimension. We'd + only need to calculate the mask once rather than m times. + + """ + new_array = array.copy() + + # NOTE: This version is a bit weird because the size of y is not 1d + for i1 in range(np.shape(array)[0]): + new_array[i1, :] = interpolate_with_threshold(array[i1, :], + threshold, + make_copy=True, + extrapolate=extrapolate) + + return new_array + + +def gausswin(L, alpha=2.5): + """ + An N-point Gaussian window with alpha proportional to the + reciprocal of the standard deviation. The width of the window + is inversely related to the value of alpha. A larger value of + alpha produces a more narrow window. + + + Parameters + ---------------------------- + L : int + alpha : float + Defaults to 2.5 + + Returns + ---------------------------- + + + Notes + ---------------------------- + TODO: I am ignoring some corner cases, for example: + #L - negative, error + #L = 0 + #w => empty + #L = 1 + #w = 1 + + Equivalent of Matlab's gausswin function. + + """ + + N = L - 1 + n = np.arange(0, N + 1) - N / 2 + w = np.exp(-(1 / 2) * (alpha * n / (N / 2)) ** 2) + + return w + + +def get_nested_h5_field(h, fields, resolve_value=True, is_matrix=False): + """ + Meant to be a replacement for _extract_time_from_disk + + Attributes + ---------- + h : HDF5 group + fields : list or string + + """ + + if not isinstance(fields, list): + fields = [fields] # string to list + + for key in fields: + h = h[key] + + if not resolve_value: + return h + + temp = h.value + + if is_matrix: + return temp + + if temp.shape[0] > temp.shape[1]: + wtf = temp[:, 0] + else: + wtf = temp[0, :] + + return wtf + + +def _extract_time_from_disk(parent_ref, name, is_matrix=False): + """ + This is for handling Matlab save vs Python save when we get to that point. + """ + + temp = parent_ref[name].value + + if is_matrix: + wtf = temp + else: + # Assuming vector, need to fix for eigenvectors + if temp.shape[0] > temp.shape[1]: + wtf = temp[:, 0] + else: + wtf = temp[0, :] + + return wtf + + +def filter_non_numeric(data): + """ + Filter a numpy array, removing entries that are either Inf or NaN + + Parameters + ------------------ + data: scalar, or a numpy array + + Returns + ------------------ + numpy array + + Notes + ------------------ + Formerly function mask = h__filterData(data) + + """ + if isinstance(data, np.ndarray): + return data[~get_non_numeric_mask(data)] + else: + if data == np.Inf or data == np.NaN: + return None + else: + return data + + +def get_non_numeric_mask(data): + """ + Obtain a mask for the data numpy array that shows True if + the element of data is either Inf or Nan + + Parameters + ------------------ + data: numpy array + + Returns + ------------------ + boolean numpy array of same size as data + + Notes + ------------------ + Formerly function mask = h__getFilterMask(data) + + """ + try: # DEBUG: remove late + return np.isinf(data) | np.isnan(data) + except TypeError: # DEBUG: remove late + print("uh oh") # DEBUG: remove late + + +def timing_function(): + """ + There's a better timing function available in Python 3.3+ + Otherwise use the old one. + TODO: This could be a static analysis at the top of the module + + """ + if sys.version_info[0] >= 3 and sys.version_info[1] >= 3: + return time.monotonic() + else: + return time.time() + + +def compare_is_equal(x, y, variable_name, tol=1e-6): + """ + This code is meant to implement the functions that actually compare + data between two different instances without knowing anything about + what they are comparing (i.e. just looking at the numbers) + + e.g. this can be used for features comparison. + + """ + if np.isnan(x) and np.isnan(y): + return True + elif np.logical_or(np.isnan(x), np.isnan(y)): + print('Values not equal: %s' % variable_name) + + return False + elif np.abs(x - y) <= tol: + return True + else: + print('Values not equal: %s' % variable_name) + + return False + + +def correlation(x, y, variable_name, high_corr_value=0.99, + merge_nans=False, pct_data_agreement_threshold=0.9): + """ + Compare two numpy arrays using a tolerance threshold + + Parameters + ---------------- + x: numpy array + y: numpy array + variable_name: str + The name that will be displayed for this variable in error messages + high_corr_value: float + The threshold below which an error will be thrown. Default 0.999. + merge_nans: bool + Default False. + + Returns + ---------------- + bool + + NOTE: For now everything is printed; eventually it would be nice + to optionally print things. + + """ + return_value = False + + if not isinstance(x, type(y)): + print('Type mismatch %s vs %s: %s' % (type(x), type(y), + variable_name)) + elif x.shape != y.shape: + print('Shape mismatch %s vs %s: %s' % (str(x.shape), str(y.shape), + variable_name)) + else: + np.reshape(x, x.size) + np.reshape(y, y.size) + + x_nans = np.isnan(x) + y_nans = np.isnan(y) + + if merge_nans: + pct_data_agreement = np.sum( + ~x_nans & ~y_nans) / np.sum(~x_nans | ~y_nans) + if pct_data_agreement < pct_data_agreement_threshold: + # TODO: Add on the pct_data_agreement value + print( + 'Mismatch of NaN values per frame is too high: %s' % + variable_name) + return False + else: + keep_mask = ~(x_nans | y_nans) + #keep_mask = ~np.logical_or(x_nans, y_nans) + xn = x[keep_mask] + yn = y[keep_mask] + else: + # Test for equality of nans + if not np.array_equal(x_nans, y_nans): + # TODO: throw in which element is the first that differs ... + print('Nan mask mismatch: %s' % variable_name) + return False + else: + xn = x[~np.isnan(x)] # xn -> x without NaNs or x no NaN -> xn + yn = y[~np.isnan(y)] + + if (xn.size == 0) and (yn.size == 0): + return_value = True + elif (xn.size == 1) and (yn.size == 1): + # Can't take correlation coefficient with single values + return_value = True + elif xn.shape != yn.shape: + print('Shape mismatch after NaN filter: %s' % variable_name) + else: + c = np.corrcoef(xn, yn) + is_good = c[1, 0] > high_corr_value + if not is_good: + print('Corr value too low for %s: %0.3f' % + (variable_name, c[1, 0])) + return_value = is_good + + return return_value + + +def compare_attributes(obj1, obj2, attribute_list, high_corr_value=0.999, + merge_nans_list=None): + """ + Compare all attributes in attribute_list belonging to obj + + Parameters + ------------- + obj1, obj2: objects + Should have the attributes given in attribute_list + attribute_list: list of strings + A list of the attributes to compare + high_corr_value: float + The threshold below which an error will be thrown. Default 0.999. + merge_nans_list: list of strings + Attributes to merge_nans for. Default None. + + Returns + ------------ + bool + True if comparison passed on all attributes. False otherwise. + + """ + if merge_nans_list is None: + merge_nans_list = [] + + is_equal = True + for attribute in attribute_list: + merge_nans = attribute in merge_nans_list + attrib_equal = correlation(getattr(obj1, attribute), + getattr(obj2, attribute), + attribute, high_corr_value, merge_nans) + if not attrib_equal: + is_equal = False + + # Return True only if all attributes are correlating + return is_equal + + +class ElementTimer(object): + + """ + This class is meant to be called in the following way by code that is + processing a feature. + + timer = utils.ElementTimer + timer.tic() + # Run the feature processing code, or some other code + timer.toc('name of feature being processed') + + #TODO: Consider + + """ + + def __init__(self): + self.names = [] + self.times = [] + + def tic(self): + self.start_time = timing_function() + + def toc(self, name): + elapsed_time = timing_function() - self.start_time + self.times.append(elapsed_time) + self.names.append(name) + return elapsed_time + + # def get_time(self,name): + # return self.times[self.names.index(name)] + + def __repr__(self): + return print_object(self) + + def summarize(self): + """ + This can be called to display each logged function and how long it + took to run + """ + for (name, finish_time) in zip(self.names, self.times): + print('%s: %0.3fs' % (name, finish_time)) + + +def round_to_odd(num): + """ + Round a number to the nearest odd number. + + The only ambiguous case is that of a whole even number, since it's + equally close to its higher and lower neighbour. + In this case, go with the higher (rightmost) one on the number line. + (NOT the highest in absolute value) + + e.g. round_to_odd(4) returns 5 + round_to_odd(2) returns 3 + round_to_odd(-2) returns -1 + round_to_odd(1.5) returns 1 + round_to_odd(0.5) returns 1 + + Parameters + ------------- + num: float + The number to be rounded + + Returns + ------------- + An integer + The nearest odd number to num + + """ + num = np.floor(num) + if num % 2 == 0: + num = num + 1 + + #ensure this value is an int + return int(num) + + +def compute_normal_vectors(curve, clockwise_orientation=True): + """ + Compute normal vectors for a given curve in two dimensions. + (i.e. Obtain vectors of direction 1 perpendicular to the gradient) + + Parameters + --------------- + curve: numpy array of shape (2,k) + This is for a given curve, where k is the number of points + on the curve. + clockwise_orientation: bool + Whether to rotate the gradient vectors clockwise 90 degrees or + counter-clockwise 90 degrees. + + Returns + --------------- + numpy array of shape (k,), numpy array of shape (k,) + x-coordinate of normal vector, y-coordinate of normal vector + + """ + dx = np.gradient(curve[0, :]) + dy = np.gradient(curve[1, :]) + + # Take the perpendicular of each of these gradient vectors by applying + # a 90 degree rotation. + # curve_gradient should have shape (2,k) + if clockwise_orientation: + # (This should give us -1 for the projection) + curve_gradient = [dy, -dx] + else: + # (This should give us 1 for the projection) + curve_gradient = [-dy, dx] + + # Find the magnitude of the gradient (shape (k,)) + curve_gradient_magnitude = np.linalg.norm(curve_gradient, axis=0) + + # Scale each coordinate down so that we have a vector of length 1 + # This numpy array should have shape (2,k) + normal_vector = curve_gradient / curve_gradient_magnitude + + # Split up our return value into x and y coordinates + return normal_vector[0, :], normal_vector[1, :] + + +def compute_q_values(pvalues, + vlambda=None, pi0_method="smoother", + robust=False, smooth_df=3, + smooth_log_pi0=False, pi0=None): + """ + Compute false discovery rate (qvalues) qvalues after the method by + Storey et al. (2002). + Paper link: http://www.genomine.org/papers/directfdr.pdf + + The Python code derives from the R implementation at + http://genomics.princeton.edu/storeylab/qvalue/linux.html. + + Code from http://genomic-association-tester.googlecode.com/hg-history/ + 991fdb4e3208324c39d57a1eca20b3a8f83602fd/gat/Stats.py + + Comments from Matlab equivalent function, mafdr: + + Original Matlab code: _, q_t_all = mafdr(p_t) + http://www.mathworks.com/help/bioinfo/ref/mafdr.html + FDR = mafdr(PValues) estimates a positive FDR (pFDR) value for each + value in PValues, a column vector or DataMatrix object containing + p-values for each feature (for example, gene) in a data set, using + the procedure introduced by Storey, 2002. FDR is a column vector or + a DataMatrix object containing positive FDR (pFDR) values. + [FDR, Q] = mafdr(PValues) also returns a q-value for each p-value + in PValues, using the procedure introduced by Storey, 2002. Q is a + column vector containing measures of hypothesis testing error for + each observation in PValues. + + """ + + if min(pvalues) < 0 or max(pvalues) > 1: + raise ValueError("p-values out of range") + + m = len(pvalues) + pvalues = np.array(pvalues, dtype=np.float) + + if vlambda is None: + vlambda = np.arange(0, 0.95, 0.05) + + if pi0 is None: + if isinstance(vlambda, float): + vlambda = (vlambda,) + + if len(vlambda) > 1 and len(vlambda) < 4: + raise ValueError(" if length of vlambda greater than 1, you " + "need at least 4 values.") + + if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): + raise ValueError("vlambda must be within [0, 1).") + + # estimate pi0 + if len(vlambda) == 1: + vlambda = vlambda[0] + if vlambda < 0 or vlambda >= 1: + raise ValueError("vlambda must be within [0, 1).") + + pi0 = (np.mean([x >= vlambda for x in pvalues]) + / (1.0 - vlambda)) + pi0 = min(pi0, 1.0) + else: + + pi0 = np.zeros(len(vlambda), np.float) + + for i in range(len(vlambda)): + pi0[i] = (np.mean([x >= vlambda[i] for x in pvalues]) + / (1.0 - vlambda[i])) + + if pi0_method == "smoother": + if smooth_log_pi0: + pi0 = np.log(pi0) + tck = sp.interpolate.splrep(vlambda, pi0, k=smooth_df, + s=10000) + pi0 = sp.interpolate.splev(max(vlambda), tck) + + if smooth_log_pi0: + pi0 = np.exp(pi0) + + elif pi0_method == "bootstrap": + minpi0 = min(pi0) + + mse = np.zeros(len(vlambda), np.float) + pi0_boot = np.zeros(len(vlambda), np.float) + + for i in range(100): + # sample pvalues + idx_boot = np.random.random_integers(0, m - 1, m) + pvalues_boot = pvalues[idx_boot] + + for x in range(len(vlambda)): + # compute number of pvalues larger than lambda[x] + pi0_boot[x] = (np.mean(pvalues_boot > vlambda[x]) / + (1.0 - vlambda[x])) + mse += (pi0_boot - minpi0) ** 2 + pi0 = min(pi0[mse == min(mse)]) + else: + raise ValueError("'pi0_method' must be one of 'smoother' or " + "'bootstrap'.") + + pi0 = min(pi0, 1.0) + + if pi0 <= 0: + raise ValueError("The estimated pi0 <= 0 (%f). Check that you have " + "valid p-values or use another vlambda method." % pi0) + + # Compute qvalues + #-------------------------------------- + idx = np.argsort(pvalues) + # Monotonically decreasing bins, so that bins[i-1] > x >= bins[i] + bins = np.unique(pvalues)[::-1] + + # v[i] = number of observations less than or equal to pvalue[i] + # Could this be done more elegantly? + val2bin = len(bins) - np.digitize(pvalues, bins) + v = np.zeros(m, dtype=np.int) + lastbin = None + for x in range(m - 1, -1, -1): + bin = val2bin[idx[x]] + if bin != lastbin: + c = x + v[idx[x]] = c + 1 + lastbin = bin + + qvalues = pvalues * pi0 * m / v + if robust: + qvalues /= (1.0 - (1.0 - pvalues)**m) + + # Bound qvalues by 1 and make them monotonic + qvalues[idx[m - 1]] = min(qvalues[idx[m - 1]], 1.0) + for i in range(m - 2, -1, -1): + qvalues[idx[i]] = min(min(qvalues[idx[i]], qvalues[idx[i + 1]]), 1.0) + + return qvalues + + +def get_files_of_a_type(root_path, file_extension='.mat'): + """ + Recursively traverses from root_path to find all files ending + in a given extension (e.g. '.mat') + + Parameters + ----------------------- + root_path: string + The absolute path to start searching from + file_extension: string of length 3 or 4 + The extension of the files we are looking for + e.g. '.mat' (or 'mat') + + Returns + ----------------------- + filepaths_found: list + The full paths to files ending in the desired extension + + """ + if file_extension[0] != '.': + file_extension = '.' + file_extension + + assert(len(file_extension) == 4) + + filepaths_found = [] + for root, dirs, files in os.walk(root_path): + current_files = [f for f in files if f[-4:] == file_extension] + for f in current_files: + filepaths_found.append(os.path.join(root, f)) + + return filepaths_found diff --git a/tierpsy/features/open_worm_analysis_toolbox/version.py b/tierpsy/features/open_worm_analysis_toolbox/version.py new file mode 100755 index 00000000..8b870931 --- /dev/null +++ b/tierpsy/features/open_worm_analysis_toolbox/version.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__version__ = '3.0.0' + diff --git a/tierpsy/features/tierpsy_features/__init__.py b/tierpsy/features/tierpsy_features/__init__.py new file mode 100755 index 00000000..b0b68bee --- /dev/null +++ b/tierpsy/features/tierpsy_features/__init__.py @@ -0,0 +1,9 @@ +# # -*- coding: utf-8 -*- +import os + +from .version import __version__ + +from .velocities import get_velocity_features +from .postures import get_morphology_features, get_posture_features +from .smooth import SmoothedWorm +from .features import * \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/curvatures.py b/tierpsy/features/tierpsy_features/curvatures.py new file mode 100755 index 00000000..73f6e47e --- /dev/null +++ b/tierpsy/features/tierpsy_features/curvatures.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Sep 7 16:59:23 2017 + +@author: ajaver +""" + +import numpy as np +import warnings +import pandas as pd + +from scipy.interpolate import UnivariateSpline +from scipy.signal import savgol_filter + +from .helper import nanunwrap, DataPartition +from .postures import get_length + +curvature_columns = [ + 'curvature_head', + 'curvature_hips', + 'curvature_midbody', + 'curvature_neck', + 'curvature_tail', + 'curvature_mean_head', + 'curvature_mean_neck', + 'curvature_mean_midbody', + 'curvature_mean_hips', + 'curvature_mean_tail', + 'curvature_std_head', + 'curvature_std_neck', + 'curvature_std_midbody', + 'curvature_std_hips', + 'curvature_std_tail' + ] + +def _curvature_angles(skeletons, window_length = None, lengths=None): + if window_length is None: + window_length = 7 + + points_window = int(round(window_length/2)) + + def _tangent_angles(skels, points_window): + '''this is a vectorize version to calculate the angles between segments + segment_size points from each side of a center point. + ''' + s_center = skels[:, points_window:-points_window, :] #center points + s_left = skels[:, :-2*points_window, :] #left side points + s_right = skels[:, 2*points_window:, :] #right side points + + d_left = s_left - s_center + d_right = s_center - s_right + + #arctan2 expects the y,x angle + ang_l = np.arctan2(d_left[...,1], d_left[...,0]) + ang_r = np.arctan2(d_right[...,1], d_right[...,0]) + + with warnings.catch_warnings(): + #I am unwraping in one dimension first + warnings.simplefilter("ignore") + ang = np.unwrap(ang_r-ang_l, axis=1); + + for ii in range(ang.shape[1]): + ang[:, ii] = nanunwrap(ang[:, ii]) + return ang + + if lengths is None: + #caculate the length if it is not given + lengths = get_length(skeletons) + + #Number of segments is the number of vertices minus 1 + n_segments = skeletons.shape[1] -1 + + #This is the fraction of the length the angle is calculated on + length_frac = 2*(points_window-1)/(n_segments-1) + segment_length = length_frac*lengths + segment_angles = _tangent_angles(skeletons, points_window) + + curvature = segment_angles/segment_length[:, None] + + return curvature + + +def _curvature_savgol(skeletons, window_length = None, length=None): + ''' + Calculate the curvature using univariate splines. This method is slower and can fail + badly if the fit does not work, so I am only using it as testing + ''' + + if window_length is None: + window_length = 7 + + def _fitted_curvature(skel): + if np.any(np.isnan(skel)): + return np.full(skel.shape[0], np.nan) + + x = skel[:, 0] + y = skel[:, 1] + + x_d = savgol_filter(x, window_length=window_length, polyorder=3, deriv=1) + y_d = savgol_filter(y, window_length=window_length, polyorder=3, deriv=1) + x_dd = savgol_filter(x, window_length=window_length, polyorder=3, deriv=2) + y_dd = savgol_filter(y, window_length=window_length, polyorder=3, deriv=2) + curvature = _curvature_fun(x_d, y_d, x_dd, y_dd) + return curvature + + + curvatures_fit = np.array([_fitted_curvature(skel) for skel in skeletons]) + return curvatures_fit + + +def _curvature_spline(skeletons, points_window=None, length=None): + ''' + Calculate the curvature using univariate splines. This method is slower and can fail + badly if the fit does not work, so I am only using it as testing + ''' + + def _spline_curvature(skel): + if np.any(np.isnan(skel)): + return np.full(skel.shape[0], np.nan) + + x = skel[:, 0] + y = skel[:, 1] + n = np.arange(x.size) + + fx = UnivariateSpline(n, x, k=5) + fy = UnivariateSpline(n, y, k=5) + + x_d = fx.derivative(1)(n) + x_dd = fx.derivative(2)(n) + y_d = fy.derivative(1)(n) + y_dd = fy.derivative(2)(n) + + curvature = _curvature_fun(x_d, y_d, x_dd, y_dd) + return curvature + + + curvatures_fit = np.array([_spline_curvature(skel) for skel in skeletons]) + return curvatures_fit + +#%% +def _curvature_fun(x_d, y_d, x_dd, y_dd): + return (x_d*y_dd - y_d*x_dd)/(x_d*x_d + y_d*y_d)**1.5 + +def _gradient_windowed(X, points_window, axis): + ''' + Calculate the gradient using an arbitrary window. The larger window make + this procedure less noisy that the numpy native gradient. + ''' + w_s = 2*points_window + + #I use slices to deal with arbritary dimenssions + #https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html + n_axis_ini = max(0, axis) + n_axis_fin = max(0, X.ndim-axis-1) + + right_slice = [slice(None, None, None)]*n_axis_ini + [slice(None, -w_s, None)] + right_slice = tuple(right_slice) + + left_slice = [slice(None, None, None)]*n_axis_ini + [slice(w_s, None, None)] + left_slice = tuple(left_slice) + + right_pad = [(0,0)]*n_axis_ini + [(w_s, 0)] + [(0,0)]*n_axis_fin + left_pad = [(0,0)]*n_axis_ini + [(0, w_s)] + [(0,0)]*n_axis_fin + + right_side = np.pad(X[right_slice], right_pad, 'edge') + left_side = np.pad(X[left_slice], left_pad, 'edge') + + ramp = np.full(X.shape[axis]-2*w_s, w_s*2) + + ramp = np.pad(ramp, pad_width = (w_s, w_s), mode='linear_ramp', end_values = w_s) + #ramp = np.pad(ramp, pad_width = (w_s, w_s), mode='constant', constant_values = np.nan) + ramp_slice = [None]*n_axis_ini + [slice(None, None, None)] + [None]*n_axis_fin + ramp_slice = tuple(ramp_slice) + + grad = (left_side - right_side) / ramp[ramp_slice] #divide it by the time window + + return grad + +def curvature_grad(curve, points_window=None, axis=1, is_nan_border=True): + ''' + Calculate the curvature using the gradient using differences similar to numpy grad + + x1, x2, x3 + + grad(x2) = (x3-x1)/2 + + ''' + + #The last element must be the coordinates + assert curve.shape[-1] == 2 + assert axis != curve.ndim - 1 + + if points_window is None: + points_window = 1 + + if curve.shape[0] <= points_window*4: + return np.full((curve.shape[0], curve.shape[1]), np.nan) + + d = _gradient_windowed(curve, points_window, axis=axis) + dd = _gradient_windowed(d, points_window, axis=axis) + + gx = d[..., 0] + gy = d[..., 1] + ggx = dd[..., 0] + ggy = dd[..., 1] + + curvature_r = _curvature_fun(gx, gy, ggx, ggy) + if is_nan_border: + #I cannot really trust in the border gradient + w_s = 4*points_window + n_axis_ini = max(0, axis) + right_slice = [slice(None, None, None)]*n_axis_ini + [slice(None, w_s, None)] + right_slice = tuple(right_slice) + + left_slice = [slice(None, None, None)]*n_axis_ini + [slice(-w_s, None, None)] + left_slice = tuple(left_slice) + + curvature_r[right_slice] = np.nan + curvature_r[left_slice] = np.nan + + return curvature_r +#%% + +def get_curvature_features(skeletons, method = 'grad', points_window=None): + curvature_funcs = { + 'angle' : _curvature_angles, + 'spline' : _curvature_spline, + 'savgol' : _curvature_savgol, + 'grad' : curvature_grad + } + + + assert method in curvature_funcs + + if method == 'angle': + segments_ind_dflt = { + 'head' : 0, + 'neck' : 0.25, + 'midbody' : 0.5, + 'hips' : 0.75, + 'tail' : 1., + } + else: + segments_ind_dflt = { + 'head' : 5/48, + 'neck' : 15/48, + 'midbody' : 24/48, + 'hips' : 33/48, + 'tail' : 44/48, + } + + curvatures = curvature_funcs[method](skeletons, points_window) + max_angle_index = curvatures.shape[-1]-1 + segments_ind = {k:int(round(x*max_angle_index)) for k,x in segments_ind_dflt.items()} + + curv_dict = {'curvature_' + x :curvatures[:, ind] for x,ind in segments_ind.items()} + + #standard deviation of the curvature around the segments (seems to be usefull in classification) + p_obj = DataPartition(list(segments_ind_dflt.keys()), n_segments = skeletons.shape[1]) + + #i need to use nan because the curvature at the ends is not defined + curv_std = p_obj.apply_partitions(curvatures, func=np.nanstd) + for key, val in curv_std.items(): + curv_dict['curvature_std_' + key] = val + + #i need to use nan because the curvature at the ends is not defined + curv_mean = p_obj.apply_partitions(curvatures, func=np.nanmean) + for key, val in curv_mean.items(): + curv_dict['curvature_mean_' + key] = val + + data = pd.DataFrame.from_dict(curv_dict) + + return data + +#%% + +if __name__ == '__main__': + import matplotlib.pylab as plt + + R = 1 + + ang = np.linspace(-np.pi, np.pi, 50) + curve = np.array([np.cos(ang), np.sin(ang)]).T*R + curvature = curvature_grad(curve, axis=0) + + plt.figure() + plt.subplot(1,2,1) + plt.plot(curve[:, 0], curve[:, 1], '.-') + plt.axis('equal') + + plt.subplot(1,2,2) + plt.plot(curvature) + + k = 1/R + plt.ylim(k - k/2, k + k/2) + diff --git a/tierpsy/features/tierpsy_features/events.py b/tierpsy/features/tierpsy_features/events.py new file mode 100755 index 00000000..9d33ad21 --- /dev/null +++ b/tierpsy/features/tierpsy_features/events.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" + +@author: ajaver +""" + +import numpy as np +import pandas as pd + +event_columns = ['motion_mode', 'food_region', 'turn'] +durations_columns = ['event_type', 'region', + 'duration', 'timestamp_initial', + 'timestamp_final', 'edge_flag'] +event_region_labels = { + 'motion_mode': {-1:'backward', 1:'forward', 0:'paused'}, + 'food_region': {-1:'outside', 1:'inside', 0:'edge'}, + 'turn': {1:'inter', 0:'intra'} + } + +assert set(event_region_labels.keys()).issubset(event_columns) + +#%% +def _get_pulses_indexes(light_on, min_window_size=0, is_pad = True): + ''' + Get the start and end of a given pulse. + ''' + + if is_pad: + + light_on = np.pad(light_on, (1,1), 'constant', constant_values = False) + + switches = np.diff(light_on.astype(np.int)) + turn_on, = np.where(switches==1) + turn_off, = np.where(switches==-1) + + if is_pad: + turn_on -= 1 + turn_off -= 1 + turn_on = np.clip(turn_on, 0, light_on.size-3) + turn_off = np.clip(turn_off, 0, light_on.size-3) + + + assert turn_on.size == turn_off.size + + delP = turn_off - turn_on + + good = delP > min_window_size + + return turn_on[good], turn_off[good] + +#%% +def _find_turns(worm_data, + fps, + d_ratio_th = (0.15, 0.075), + ang_v_th = (0.75, 0.35), + interp_window_s = 0.5 + ): + #check the necessary columns are in the dataframe + assert set(('head_tail_distance', 'major_axis', 'angular_velocity')).issubset(set(worm_data.columns)) + + #adjust the interpolation window in frames + w_interp = int(fps*interp_window_s) + w_interp = w_interp if w_interp%2 == 1 else w_interp+1 + + try: + #get the ratio of this mesurements + #the cubic interpolation is important to detect this feature + d_ratio = 1-(worm_data['head_tail_distance']/worm_data['major_axis']) + d_ratio = d_ratio.rolling(window = w_interp).min().interpolate(method='cubic') + with np.errstate(invalid='ignore'): + ang_velocity = worm_data['angular_velocity'].abs() + ang_velocity = ang_velocity.rolling(window = w_interp).max().interpolate(method='cubic') + except ValueError: + #there was an error in the interpolation + return [np.full(worm_data.shape[0], np.nan) for _ in range(3)] + + + #find candidate turns that satisfy at the same time the higher threshold + turns_vec_ini = (d_ratio>d_ratio_th[0]) & (ang_velocity>ang_v_th[0]) + + #refine the estimates with the lower threshold in each vector independently + d_ration_candidates = _get_pulses_indexes(d_ratio>d_ratio_th[1]) + d_ration_r = [x for x in zip(*d_ration_candidates) if np.any(turns_vec_ini[x[0]:x[1]+1])] + + ang_v_candidates = _get_pulses_indexes(ang_velocity>ang_v_th[1]) + ang_v_r = [x for x in zip(*ang_v_candidates) if np.any(turns_vec_ini[x[0]:x[1]+1])] + + #combine the results into a final vector + turns_vec = np.zeros_like(turns_vec_ini) + for x in d_ration_r + ang_v_r: + turns_vec[x[0]:x[1]+1] = True + + return turns_vec, d_ratio, ang_velocity + +#%% +def _range_vec(vec, th): + ''' + flag a vector depending on the threshold, th + -1 if the value is below -th + 1 if the value is above th + 0 if it is between -th and th + ''' + flags = np.zeros(vec.size) + _out = vec < -th + _in = vec > th + flags[_out] = -1 + flags[_in] = 1 + return flags + +def _flag_regions(vec, central_th, extrema_th, smooth_window, min_frame_range): + ''' + Flag a frames into lower (-1), central (0) and higher (1) regions. + + The strategy is + 1) Smooth the timeseries by smoothed window + 2) Find frames that are certainly lower or higher using extrema_th + 3) Find regions that are between (-central_th, central_th) and + and last more than min_zero_window. This regions are certainly + central regions. + 4) If a region was not identified as central, but contains + frames labeled with a given extrema, label the whole region + with the corresponding extrema. + ''' + vv = pd.Series(vec).fillna(method='ffill').fillna(method='bfill') + smoothed_vec = vv.rolling(window=smooth_window,center=True).mean() + + + + paused_f = (smoothed_vec > -central_th) & (smoothed_vec < central_th) + turn_on, turn_off = _get_pulses_indexes(paused_f, min_frame_range) + inter_pulses = zip([0] + list(turn_off), list(turn_on) + [paused_f.size-1]) + + + flag_modes = _range_vec(smoothed_vec, extrema_th) + + for ini, fin in inter_pulses: + dd = np.unique(flag_modes[ini:fin+1]) + dd = [x for x in dd if x != 0] + if len(dd) == 1: + flag_modes[ini:fin+1] = dd[0] + elif len(dd) > 1: + kk = flag_modes[ini:fin+1] + kk[kk==0] = np.nan + kk = pd.Series(kk).fillna(method='ffill').fillna(method='bfill') + flag_modes[ini:fin+1] = kk + return flag_modes + +def _get_vec_durations(event_vec): + durations_list = [] + for e_id in np.unique(event_vec): + ini_e, fin_e = _get_pulses_indexes(event_vec == e_id, is_pad = True) + event_durations = fin_e - ini_e + + #flag if the event is on the vector edge or not + edge_flag = np.zeros_like(fin_e) + edge_flag[ini_e <= 0] = -1 + edge_flag[fin_e >= event_vec.size-1] = 1 + + event_ids = np.full(event_durations.shape, e_id) + durations_list.append(np.stack((event_ids, event_durations, ini_e, fin_e, edge_flag)).T) + + cols = ['region', 'duration', 'timestamp_initial', 'timestamp_final', 'edge_flag'] + event_durations_df = pd.DataFrame(np.concatenate(durations_list), columns = cols) + return event_durations_df + +def get_event_durations_w(events_df, fps): + event_durations_list = [] + for col in events_df: + if not col in ['timestamp', 'worm_index']: + dd = _get_vec_durations(events_df[col].values) + dd.insert(0, 'event_type', col) + event_durations_list.append(dd) + + if len(event_durations_list) == 0: + event_durations_df = pd.DataFrame() + else: + + event_durations_df = pd.concat(event_durations_list, ignore_index=True) + event_durations_df['duration'] /= fps + #shift timestamps to match the real initial time + first_t = events_df['timestamp'].min() + event_durations_df['timestamp_initial'] += first_t + event_durations_df['timestamp_final'] += first_t + + + return event_durations_df + + +def get_events(df, fps, worm_length = None, _is_debug=False): + + #initialize data + smooth_window_s = 0.5 + min_paused_win_speed_s = 1/3 + + if worm_length is None: + assert 'length' in df + worm_length = df['length'].median() + + + df = df.sort_values(by='timestamp') + + w_size = int(round(fps*smooth_window_s)) + smooth_window = w_size if w_size % 2 == 1 else w_size + 1 + + #WORM MOTION EVENTS + dd = [x for x in ['worm_index', 'timestamp'] if x in df] + events_df = pd.DataFrame(df[dd]) + if 'speed' in df: + speed = df['speed'].values + pause_th_lower = worm_length*0.025 + pause_th_higher = worm_length*0.05 + min_paused_win_speed = fps/min_paused_win_speed_s + + motion_mode = _flag_regions(speed, + pause_th_lower, + pause_th_higher, + smooth_window, + min_paused_win_speed + ) + events_df['motion_mode'] = motion_mode + + #FOOD EDGE EVENTS + if 'dist_from_food_edge' in df: + dist_from_food_edge = df['dist_from_food_edge'].values + edge_offset_lower = worm_length/2 + edge_offset_higher = worm_length + min_paused_win_food_s = 1 + + min_paused_win_food = fps/min_paused_win_food_s + food_region = _flag_regions(dist_from_food_edge, + edge_offset_lower, + edge_offset_higher, + smooth_window, + min_paused_win_food + ) + events_df['food_region'] = food_region + + #TURN EVENT + if set(('head_tail_distance', 'major_axis', 'angular_velocity')).issubset(set(df.columns)): + turn_vector, _, _ = _find_turns(df, fps) + events_df['turn'] = turn_vector.astype(np.float32) + + + if _is_debug: + plt.figure() + plt.plot(speed) + plt.plot(motion_mode*pause_th_higher) + + plt.figure() + plt.plot(dist_from_food_edge) + plt.plot(food_region*edge_offset_lower) + + return events_df + +#%% +def _get_event_stats(event_durations, n_worms_estimate, total_time): + ''' + Get the event statistics using the event durations table. + ''' + if event_durations.size == 0: + return pd.Series() + + all_events_time = event_durations.groupby('event_type').agg({'duration':'sum'})['duration'] + event_g = event_durations.groupby(['event_type', 'region']) + event_stats = [] + + valid_regions = [x for x in event_region_labels.keys() if x in all_events_time] + + for event_type in valid_regions: + region_dict = event_region_labels[event_type] + for region_id, region_name in region_dict.items(): + stat_prefix = event_type + '_' + region_name + try: + dat = event_g.get_group((event_type, region_id)) + duration = dat['duration'].values + edge_flag = dat['edge_flag'].values + except: + duration = np.zeros(1) + edge_flag = np.zeros(0) + + stat_name = stat_prefix + '_duration_50th' + stat_val = np.nanmedian(duration) + event_stats.append((stat_val, stat_name)) + + stat_name = stat_prefix + '_fraction' + stat_val = np.nansum(duration)/all_events_time[event_type] + event_stats.append((stat_val, stat_name)) + + stat_name = stat_prefix + '_frequency' + # calculate total events excluding events that started before the beginig of the trajectory + total_events = (edge_flag != -1).sum() + stat_val = total_events/n_worms_estimate/total_time + event_stats.append((stat_val, stat_name)) + + event_stats_s = pd.Series(*list(zip(*event_stats))) + return event_stats_s + +#%% +def get_event_durations(timeseries_data, fps): + + dd = ['worm_index', 'timestamp'] + event_columns + dd = [x for x in dd if x in timeseries_data] + events_df = timeseries_data[dd] + + event_durations = [] + for worm_index, dat in events_df.groupby('worm_index'): + dur = get_event_durations_w(dat, fps) + dur['worm_index'] = worm_index + event_durations.append(dur) + + if event_durations: + event_durations = pd.concat(event_durations, ignore_index=True) + return event_durations + else: + return pd.DataFrame() + + +def get_event_stats(timeseries_data, fps, n_worms_estimate): + event_durations = get_event_durations(timeseries_data, fps) + + total_time = (timeseries_data['timestamp'].max() - timeseries_data['timestamp'].min())/fps + event_stats_s = _get_event_stats(event_durations, n_worms_estimate, total_time) + return event_stats_s +#%% + +if __name__ == '__main__': + from tierpsy.helper.params import read_fps + import matplotlib.pylab as plt + import os + import glob + + + dname = '/Volumes/behavgenom_archive$/Solveig/Results/' + fnames = glob.glob(os.path.join(dname, 'Experiment8', '**', '*_featuresN.hdf5'), recursive = True) + + for ifname, fname in enumerate(fnames): + print(ifname+1, len(fnames)) + with pd.HDFStore(fname, 'r') as fid: + if '/provenance_tracking/FEAT_TIERPSY' in fid: + timeseries_data = fid['/timeseries_features'] + + trajectories_data = fid['/trajectories_data'] + good = trajectories_data['skeleton_id']>=0 + trajectories_data = trajectories_data[good] + else: + continue + break + + #%% + fps = read_fps(fname) + for worm_index in [2]:#, 69, 431, 437, 608]: + worm_data = timeseries_data[timeseries_data['worm_index']==worm_index] + worm_length = worm_data['length'].median() + + events_df = get_events(worm_data, fps, _is_debug=True) + #get event durations + event_durations_df = get_event_durations_w(events_df, fps) + + + #%% + from tierpsy_features.helper import get_n_worms_estimate + + n_worms_estimate = get_n_worms_estimate(timeseries_data['timestamp']) + get_event_stats(events_df, fps, n_worms_estimate) + #%% + from tierpsy.analysis.ske_create.helperIterROI import getROIfromInd + turns_vec, d_ratio, ang_velocity = _find_turns(worm_data, fps) + xx = worm_data['timestamp'].values + + plt.figure(figsize=(25,5)) + plt.plot(xx, ang_velocity) + plt.plot(xx, d_ratio) + plt.plot(xx, turns_vec) + #plt.ylim((0.7, 1.1)) + plt.xlim((xx[0], xx[-1])) + + + dd = _get_pulses_indexes(turns_vec, min_window_size=fps//2) + pulse_ranges = list(zip(*dd)) + + + masked_file = fname.replace('_featuresN', '') + for p in pulse_ranges: + + dd = worm_data.loc[worm_data.index[p[0]:p[1]+1]] + timestamps = dd['timestamp'][::12] + plt.figure(figsize=(50, 5)) + for ii, tt in enumerate(timestamps): + _, img, _ = getROIfromInd(masked_file, trajectories_data, tt, worm_index) + + plt.subplot(1, timestamps.size, ii+1) + plt.imshow(img, cmap='gray', interpolation='none') + plt.axis('off') + \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/extras/master_eigen_worms_N2.mat b/tierpsy/features/tierpsy_features/extras/master_eigen_worms_N2.mat new file mode 100755 index 00000000..b02482d5 Binary files /dev/null and b/tierpsy/features/tierpsy_features/extras/master_eigen_worms_N2.mat differ diff --git a/tierpsy/features/tierpsy_features/extras/pca_components.npy b/tierpsy/features/tierpsy_features/extras/pca_components.npy new file mode 100755 index 00000000..79d73c3f Binary files /dev/null and b/tierpsy/features/tierpsy_features/extras/pca_components.npy differ diff --git a/tierpsy/features/tierpsy_features/features.py b/tierpsy/features/tierpsy_features/features.py new file mode 100755 index 00000000..7149f475 --- /dev/null +++ b/tierpsy/features/tierpsy_features/features.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 22 22:01:03 2017 + +@author: ajaver +""" +import pandas as pd +import numpy as np +import warnings + +from .helper import get_delta_in_frames, add_derivatives + +from .velocities import get_velocity_features, velocities_columns +from .postures import get_morphology_features, morphology_columns, \ +get_posture_features, posture_columns, posture_aux + +from .curvatures import get_curvature_features, curvature_columns +from .food import get_cnt_feats, food_columns +from .path import get_path_curvatures, path_curvature_columns, path_curvature_columns_aux + +from .events import get_events, event_columns + +#all time series features +timeseries_feats_no_dev_columns = velocities_columns + morphology_columns + posture_columns + \ + curvature_columns + food_columns + path_curvature_columns + +#add derivative columns +timeseries_feats_columns = timeseries_feats_no_dev_columns + ['d_' + x for x in timeseries_feats_no_dev_columns] + +#add all the axiliary columns +aux_columns = posture_aux + path_curvature_columns_aux +timeseries_all_columns = (timeseries_feats_columns + event_columns + aux_columns) + + +#add ventral features +ventral_signed_columns = ['relative_to_body_speed_midbody'] +ventral_signed_columns += path_curvature_columns + curvature_columns +ventral_signed_columns += [x for x in velocities_columns if 'angular_velocity' in x] +ventral_signed_columns += [x for x in posture_columns if 'eigen_projection' in x] +ventral_signed_columns = ventral_signed_columns + ['d_' + x for x in ventral_signed_columns] + +#all the ventral_signed_columns must be in timeseries_feats_columns +assert len(set(ventral_signed_columns) - set(timeseries_feats_columns)) == 0 + +valid_ventral_side = ('', 'clockwise','anticlockwise', 'unknown') + +def get_timeseries_features(skeletons, + widths = None, + dorsal_contours = None, + ventral_contours = None, + fps = 1, + derivate_delta_time = 1/3, + ventral_side = '', + timestamp = None, + food_cnt = None, + is_smooth_food_cnt = False, + ): + + ''' + skeletons -> n_frames x n_segments x 2 + widths -> n_frames x n_segments + dorsal_contours -> n_frames x n_segments x 2 + ventral_contours -> n_frames x n_segments x 2 + derivate_delta_time -> delta time in seconds used to calculate derivatives (including velocity) + + ''' + + assert ventral_side in valid_ventral_side + + derivate_delta_frames = get_delta_in_frames(derivate_delta_time, fps) + + feat_morph = get_morphology_features(skeletons, widths, dorsal_contours, ventral_contours) + feat_posture = get_posture_features(skeletons) + + #I am still missing the velocity and path features but it should look like this + cols_to_use = [x for x in feat_posture.columns if x not in feat_morph] #avoid duplicate length + + features_df = feat_morph.join(feat_posture[cols_to_use]) + + curvatures = get_curvature_features(skeletons) + features_df = features_df.join(curvatures) + + velocities = get_velocity_features(skeletons, derivate_delta_frames, fps) + if velocities is not None: + features_df = features_df.join(velocities) + + if food_cnt is not None: + food = get_cnt_feats(skeletons, + food_cnt, + is_smooth_food_cnt + ) + features_df = features_df.join(food) + + + path_curvatures, path_coords = get_path_curvatures(skeletons) + features_df = features_df.join(path_curvatures) + features_df = features_df.join(path_coords) + + + if timestamp is None: + timestamp = np.arange(features_df.shape[0], np.int32) + warnings.warn('`timestamp` was not given. I will assign an arbritary one.') + + features_df['timestamp'] = timestamp + + events_df = get_events(features_df, fps) + + dd = [x for x in events_df if x in event_columns] + features_df = features_df.join(events_df[dd]) + + #add the derivatives + features_df = add_derivatives(features_df, + timeseries_feats_no_dev_columns, + derivate_delta_frames, + fps) + + #correct ventral side sign + if ventral_side == 'clockwise': + features_df[ventral_signed_columns] *= -1 + + + #add any missing column + all_columns = ['timestamp'] + timeseries_all_columns + df = pd.DataFrame([], columns = timeseries_all_columns) + features_df = pd.concat((df, features_df), ignore_index=True, sort=False) + + features_df = features_df[all_columns] + + return features_df \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/food.py b/tierpsy/features/tierpsy_features/food.py new file mode 100755 index 00000000..8a25b4ee --- /dev/null +++ b/tierpsy/features/tierpsy_features/food.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Jul 18 16:55:14 2017 + +@author: ajaver +""" +import pandas as pd +import numpy as np +import matplotlib.path as mplPath +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from .velocities import _h_segment_position + +food_columns = ['orientation_food_edge', 'dist_from_food_edge'] + +#%% +def _is_valid_cnt(x): + return x is not None and \ + x.size >= 2 and \ + x.ndim ==2 and \ + x.shape[1] == 2 + +def _h_smooth_cnt(food_cnt, resampling_N = 1000, smooth_window=None, _is_debug=False): + if smooth_window is None: + smooth_window = resampling_N//20 + + if not _is_valid_cnt(food_cnt): + #invalid contour arrays + return food_cnt + + smooth_window = smooth_window if smooth_window%2 == 1 else smooth_window+1 + # calculate the cumulative length for each segment in the curve + dx = np.diff(food_cnt[:, 0]) + dy = np.diff(food_cnt[:, 1]) + dr = np.sqrt(dx * dx + dy * dy) + lengths = np.cumsum(dr) + lengths = np.hstack((0, lengths)) # add the first point + tot_length = lengths[-1] + fx = interp1d(lengths, food_cnt[:, 0]) + fy = interp1d(lengths, food_cnt[:, 1]) + subLengths = np.linspace(0 + np.finfo(float).eps, tot_length, resampling_N) + + rx = fx(subLengths) + ry = fy(subLengths) + + pol_degree = 3 + rx = savgol_filter(rx, smooth_window, pol_degree, mode='wrap') + ry = savgol_filter(ry, smooth_window, pol_degree, mode='wrap') + + food_cnt_s = np.stack((rx, ry), axis=1) + + if _is_debug: + import matplotlib.pylab as plt + plt.figure() + plt.plot(food_cnt[:, 0], food_cnt[:, 1], '.-') + plt.plot(food_cnt_s[:, 0], food_cnt_s[:, 1], '.-') + plt.axis('equal') + plt.title('smoothed contour') + + return food_cnt_s + +#%% +def _h_get_unit_vec(x): + return x/np.linalg.norm(x, axis=1)[:, np.newaxis] +#%% +def get_cnt_feats(skeletons, + food_cnt, + is_smooth_cnt = True, + _is_debug = False): + + if is_smooth_cnt: + food_cnt = _h_smooth_cnt(food_cnt) + #%% + worm_coords, orientation_v = _h_segment_position(skeletons, partition = 'body') + + rr = np.linalg.norm(worm_coords[:, None, :] - food_cnt[None, ...], axis=2) + cnt_ind = np.argmin(rr, axis=1) + dist_from_cnt = np.array([x[i] for i,x in zip(cnt_ind, rr)]) + bbPath = mplPath.Path(food_cnt) + outside = ~bbPath.contains_points(worm_coords) + dist_from_cnt[outside] = -dist_from_cnt[outside] + worm_u = _h_get_unit_vec(orientation_v) + #%% + top = cnt_ind+1 + top[top>=food_cnt.shape[0]] -= food_cnt.shape[0] #fix any overflow index + bot = cnt_ind-1 #it is not necessary to correct because we can use negative indexing + + #I am using the normal vector so the orientation can be calculated between -90 and 90 + #positive if the worm is pointing towards the food center and negative if it is looking out + food_u = _h_get_unit_vec(food_cnt[top]-food_cnt[bot]) + R = np.array([[0,1], [-1, 0]]) + food_u = (np.dot(R, food_u.T)).T + + dot_prod = np.sum(food_u*worm_u, axis=1) + + with np.errstate(invalid='ignore'): + orientation_food_cnt = 90-np.arccos(dot_prod)*180/np.pi + + #%% + dd = np.array([orientation_food_cnt, dist_from_cnt]).T + food_df = pd.DataFrame(dd, columns = food_columns) + #%% + if _is_debug: + import matplotlib.pylab as plt + plt.figure(figsize=(24,12)) + + plt.subplot(2,2,2) + plt.plot(orientation_food_cnt) + plt.title('Orientation respect to the food contour') + + plt.subplot(2,2,4) + plt.plot(dist_from_cnt) + plt.title('Distance from the food contour') + + plt.subplot(1,2,1) + plt.plot(food_cnt[:,0], food_cnt[:,1]) + plt.plot(worm_coords[:,0], worm_coords[:,1], '.') + plt.plot(food_cnt[cnt_ind,0], food_cnt[cnt_ind,1], 'r.') + plt.axis('equal') + + + return food_df + + diff --git a/tierpsy/features/tierpsy_features/helper.py b/tierpsy/features/tierpsy_features/helper.py new file mode 100755 index 00000000..ec4ad246 --- /dev/null +++ b/tierpsy/features/tierpsy_features/helper.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 22 22:36:52 2017 + +@author: ajaver +""" +import pandas as pd +import numpy as np +import numba +import math +import os + +extras_dir = os.path.join(os.path.dirname(__file__), 'extras') +def load_OW_eigen_projections(): + eigen_projection_file = os.path.join(extras_dir, 'master_eigen_worms_N2.mat') + assert os.path.exists(eigen_projection_file) + with tables.File(EIGEN_PROJECTION_FILE) as fid: + eigen_worms = fid.get_node('/eigenWorms')[:] + eigen_worms = eigen_worms.T + return eigen_worms + +def load_eigen_projections(n_projections = 7): + eigen_projection_file = os.path.join(extras_dir, 'pca_components.npy') + if not os.path.exists(eigen_projection_file): + raise FileNotFoundError('The file {} does not exists. I cannot start tierpsy features.') + eigen_worms = np.load(eigen_projection_file)[:n_projections] + return eigen_worms + + +@numba.jit +def fillfnan(arr): + ''' + fill foward nan values (iterate using the last valid nan) + I define this function so I do not have to call pandas DataFrame + ''' + out = arr.copy() + for idx in range(1, out.shape[0]): + if np.isnan(out[idx]): + out[idx] = out[idx - 1] + return out + +@numba.jit +def fillbnan(arr): + ''' + fill foward nan values (iterate using the last valid nan) + I define this function so I do not have to call pandas DataFrame + ''' + out = arr.copy() + for idx in range(out.shape[0]-1)[::-1]: + if np.isnan(out[idx]): + out[idx] = out[idx+1] + return out + +@numba.jit +def nanunwrap(x): + '''correct for phase change for a vector with nan values + ''' + bad = np.isnan(x) + x = fillfnan(x) + x = fillbnan(x) + x = np.unwrap(x) + x[bad] = np.nan + return x + +def get_n_worms_estimate(frame_numbers, percentile = 99): + ''' + Get an estimate of the number of worms using the table frame_numbers vector + ''' + + n_per_frame = frame_numbers.value_counts() + n_per_frame = n_per_frame.values + if len(n_per_frame) > 0: + n_worms_estimate = np.percentile(n_per_frame, percentile) + else: + n_worms_estimate = 0 + return n_worms_estimate + + +def get_delta_in_frames(delta_time, fps): + '''Get the conversion of delta time in frames. Make sure it is more than one.''' + return max(1, int(round(fps*delta_time))) + +def add_derivatives(feats, cols2deriv, delta_frames, fps): + ''' + Calculate the derivatives of timeseries features, and add the columns to the original dataframe. + ''' + #%% + val_cols = [x for x in cols2deriv if x in feats] + + feats = feats.sort_values(by='timestamp') + + df_ts = feats[val_cols].copy() + df_ts.columns = ['d_' + x for x in df_ts.columns] + + m_o, m_f = math.floor(delta_frames/2), math.ceil(delta_frames/2) + + + vf = df_ts.iloc[delta_frames:].values + vo = df_ts.iloc[:-delta_frames].values + vv = (vf - vo)/(delta_frames/fps) + + #the series was too small to calculate the derivative + if vv.size > 0: + df_ts.loc[:] = np.nan + df_ts.iloc[m_o:-m_f] = vv + + feats = pd.concat([feats, df_ts], axis=1) + #%% + return feats + +class DataPartition(): + def __init__(self, partitions=None, n_segments=49): + + #the upper limits are one more than the real limit so I can do A[ini:fin] + partitions_dflt = {'head': (0, 8), + 'neck': (8, 16), + 'midbody': (16, 33), + 'hips': (33, 41), + 'tail': (41, 49), + 'head_tip': (0, 3), + 'head_base': (5, 8), + 'tail_base': (41, 44), + 'tail_tip': (46, 49), + 'all': (0, 49), + #'hh' : (0, 16), + #'tt' : (33, 49), + 'body': (8, 41), + } + + if partitions is None: + partitions = partitions_dflt + else: + partitions = {p:partitions_dflt[p] for p in partitions} + + + if n_segments != 49: + r_fun = lambda x : int(round(x/49*n_segments)) + for key in partitions: + partitions[key] = tuple(map(r_fun, partitions[key])) + + self.n_segments = n_segments + self.partitions = partitions + + def apply(self, data, partition, func, segment_axis=1): + assert self.n_segments == data.shape[segment_axis] + assert partition in self.partitions + + ini, fin = self.partitions[partition] + sub_data = np.take(data, np.arange(ini, fin), axis=segment_axis) + d_transform = func(sub_data, axis=segment_axis) + + return d_transform + + def apply_partitions(self, data, func, segment_axis=1): + return {p:self.apply(data, p, func, segment_axis=segment_axis) for p in self.partitions} diff --git a/tierpsy/features/tierpsy_features/path.py b/tierpsy/features/tierpsy_features/path.py new file mode 100755 index 00000000..1c45f2aa --- /dev/null +++ b/tierpsy/features/tierpsy_features/path.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 29 15:54:00 2017 + +@author: ajaver +""" +import numpy as np +import cv2 +import pandas as pd +from scipy.interpolate import interp1d + +from .curvatures import curvature_grad +from .postures import get_length +from .helper import DataPartition, get_n_worms_estimate + +path_curvature_columns = ['path_curvature_body', + 'path_curvature_tail', + 'path_curvature_midbody', + 'path_curvature_head' + ] + +path_curvature_columns_aux = ['coord_x_body', 'coord_y_body', + 'coord_x_tail', 'coord_y_tail', + 'coord_x_midbody', 'coord_y_midbody', + 'coord_x_head', 'coord_y_head' + ] + +DFLT_ARGS = dict( + path_step = 11, + path_grad_window = 5, + clip_val_body_lengths = 20, + bin_size_microns = 250, + bin_size_body_lengths = 0.25 + ) + +#%% +def _h_path_curvature(skeletons, + body_length = None, + partition_str = 'body', + path_step = DFLT_ARGS['path_step'], + path_grad_window = DFLT_ARGS['path_grad_window'], + _is_debug = False): + + if body_length is None: + #caculate the length if it is not given + body_length = np.nanmedian(get_length(skeletons)) + #clip_val = clip_val_body_lengths/body_length + + p_obj = DataPartition(n_segments=skeletons.shape[1]) + body_coords = p_obj.apply(skeletons, partition_str, func=np.mean) + + + xx = body_coords[:,0] + yy = body_coords[:,1] + tt = np.arange(body_coords.shape[0]) + + #empty array return + if body_coords.size == 0 or np.all(np.isnan(body_coords)): + return np.full_like(tt, np.nan), body_coords + + + #interpolate nan values + good = ~np.isnan(xx) + + x_i = xx[good] + y_i = yy[good] + t_i = tt[good] + + t_i = np.hstack([-1, t_i, body_coords.shape[0]]) + x_i = np.hstack([x_i[0], x_i, x_i[-1]]) + y_i = np.hstack([y_i[0], y_i, y_i[-1]]) + + fx = interp1d(t_i, x_i) + fy = interp1d(t_i, y_i) + + xx_i = fx(tt) + yy_i = fy(tt) + + # calculate the cumulative length for each segment in the curve + dx = np.diff(xx_i) + dy = np.diff(yy_i) + dr = np.sqrt(dx * dx + dy * dy) + + lengths = np.cumsum(dr) + lengths = np.hstack((0, lengths)) + + fx = interp1d(lengths, xx_i) + fy = interp1d(lengths, yy_i) + ft = interp1d(lengths, tt) + + sub_lengths = np.arange(lengths[0], lengths[-1], path_step) + + #there is not enough data to calculate the curvature + if len(sub_lengths) <= 4*path_grad_window: + return np.full(skeletons.shape[0], np.nan), body_coords + + xs = fx(sub_lengths) + ys = fy(sub_lengths) + ts = ft(sub_lengths) + + curve = np.vstack((xs, ys)).T + curvature_r = curvature_grad(curve, + points_window = path_grad_window, + axis=0, + is_nan_border=False) + + #clip values to remove regions with extremely large curvatures (typically short reversars) + #curvature_r = np.clip(curvature_r, -clip_val,clip_val) + + ts_i = np.hstack((-1, ts, tt[-1] + 1)) + c_i = np.hstack((curvature_r[0], curvature_r, curvature_r[-1])) + curvature_t = interp1d(ts_i, c_i)(tt) + + if _is_debug: + import matplotlib.pylab as plt + from matplotlib.collections import LineCollection + #path_curvature[np.isnan(worm_features['speed'])] = np.nan + #path_curvature = np.clip(curvature_t, -0.02, 0.02) + path_curvature = curvature_t + + curv_range = (np.nanmin(path_curvature), np.nanmax(path_curvature)) + + points = body_coords.reshape(-1, 1, 2) + segments = np.concatenate([points[:-1], points[1:]], axis=1) + lc = LineCollection(segments, + cmap = plt.get_cmap('plasma'), + norm = plt.Normalize(*curv_range)) + lc.set_array(path_curvature) + lc.set_linewidth(2) + + plt.figure(figsize=(20, 5)) + + plt.subplot(1,2,1) + plt.gca().add_collection(lc) + + plt.xlim(3000, 11000) + plt.ylim(3000, 11000) + plt.axis('equal') + + plt.subplot(1,2,2) + plt.plot(path_curvature) + + return curvature_t, body_coords + +def get_path_curvatures(skeletons, **argkws): + path_curvatures = [] + path_coords = [] + + body_length = np.nanmedian(get_length(skeletons)) + + for partition_str in ['body', 'tail', 'midbody', 'head']: + + + path_curv, coords = \ + _h_path_curvature(skeletons, + body_length, + partition_str = partition_str, + **argkws + ) + + path_curvatures.append(('path_curvature_' + partition_str, path_curv)) + + path_coords.append(('coord_x_' + partition_str, coords[...,0])) + path_coords.append(('coord_y_' + partition_str, coords[...,1])) + + cols, dat = zip(*path_curvatures) + + path_curvatures_df = pd.DataFrame(np.array(dat).T, columns=cols) + + cols, dat = zip(*path_coords) + path_coords_df = pd.DataFrame(np.array(dat).T, columns=cols) + return path_curvatures_df, path_coords_df + +def _test_plot_cnts_maps(ventral_contour, dorsal_contour): + import matplotlib.pylab as plt + pix2microns = 10 + + x_min = np.nanmin(ventral_contour[:, :, 0]) + x_max = np.nanmax(ventral_contour[:, :, 0]) + + y_min = np.nanmin(dorsal_contour[:, :, 1]) + y_max = np.nanmax(dorsal_contour[:, :, 1]) + + + rx = int(round((x_max - x_min)/pix2microns)) + ry = int(round((y_max - y_min)/pix2microns)) + + size_counts = (rx + 1, ry + 1) + + partitions_dflt = {'head': (0, 8), + 'neck': (8, 16), + 'midbody': (16, 33), + 'hips': (33, 41), + 'tail': (41, 49), + 'all': (0, 49), + 'body': (8, 41) + } + + all_cnts = {} + for part, rr in partitions_dflt.items(): + + p_vc = ventral_contour[:, rr[0]:rr[1], :].astype(np.float32) + p_dc = dorsal_contour[:, rr[0]:rr[1], :].astype(np.float32) + h = np.hstack((p_vc[:, ], p_dc[:, ::-1, :], p_vc[:, 0, :][:, None, :])) + + + cnts = [np.round((x-np.array((x_min, y_min))[None, :])/pix2microns) for x in h] + + + counts = np.zeros(size_counts, np.float32) + + for ii, cnt in enumerate(cnts): + if np.any(np.isnan(cnt)): + continue + cc = np.zeros(size_counts, np.float32) + cc = cv2.drawContours(cc, [cnt[:, None, :].astype(np.int)], contourIdx=-1, thickness=-1, color=1) + counts += cc + + plt.figure() + plt.imshow(counts, interpolation='none') + plt.title(part) + + all_cnts[part] = counts + + print(part) + + +#%% +def _get_path_coverage_feats(timeseries_data, bin_size_microns): + + #find the columns that correspond to curvature_coords + cols = [x for x in timeseries_data if x in path_curvature_columns_aux] + path_coords_df = timeseries_data[cols] + + + + bin_vals = ((path_coords_df - path_coords_df.mean())/bin_size_microns).round() + try: + bin_vals = bin_vals.fillna(method='ffill').fillna(method='bfill').astype(np.int) + except ValueError: + #likely full of nan's return empty + return {} + + path_coverage_feats = {} + for b_part in set(x.rpartition('_')[-1] for x in bin_vals.columns): + dat = bin_vals[['coord_x_' + b_part,'coord_y_' + b_part]] + dat.columns = ['X', 'Y'] + gg = dat.groupby(["X", "Y"]) + + #here i am counting the number of times any worm enter to a given grid + grid_counts = gg.size().reset_index(name="Counts") + #cc = pd.crosstab(dat['X'], dat['Y']) + + #now i want to assign a label to each grid each (worm_index, timestamp) + ind_bins = np.full(dat.shape[0], -1) + for ii, (k, vals) in enumerate(gg): + ind_bins[vals.index] = ii + df = timeseries_data[['worm_index']].copy() + df['ind_bins'] = ind_bins + + #now i want to see the duration a given worm spend in each grid + grid_durations = [] + for w, vec in df.groupby('worm_index'): + xx = vec['ind_bins'].values + xr = np.insert(xx[1:], xx.size-1, -1) + + b_flags = xr!=xx + #b_id = xx[b_flags] + b_s = np.diff(np.insert(np.where(b_flags)[0], 0, -1)) + grid_durations.append(b_s) + + if grid_durations: + grid_durations = np.concatenate(grid_durations) + else: + grid_durations = np.zeros(0) + + path_coverage_feats[b_part] = (grid_counts, grid_durations) + + + return path_coverage_feats + +def get_path_extent_stats(timeseries_data, fps, is_normalized = False): + + if is_normalized: + body_length = timeseries_data['length'].median() + bin_size_microns = DFLT_ARGS['bin_size_body_lengths']*body_length + area_per_grid = 1 + is_norm_str = '_norm' + else: + bin_size_microns = DFLT_ARGS['bin_size_microns'] + is_norm_str = '' + area_per_grid = bin_size_microns**2 + + path_coverage_feats = _get_path_coverage_feats(timeseries_data, bin_size_microns) + + Q = [50, 95] + + grid_stats = [] + for b_part, (grid_counts, grid_durations) in path_coverage_feats.items(): + if grid_durations.size > 0: + grid_transit_time = np.percentile(grid_durations, Q)/fps + else: + grid_transit_time = (np.nan, np.nan) + + if grid_counts['Counts'].size > 0: + path_coverage = grid_counts['Counts'].size*area_per_grid + path_density = np.percentile(grid_counts['Counts'], Q)/grid_counts['Counts'].sum() + else: + path_coverage = np.nan + path_density = (np.nan, np.nan) + + posfix = b_part + is_norm_str + grid_stats += [ + (path_coverage, 'path_coverage_' + posfix), + (path_density[0], 'path_density_{}_{}th'.format(posfix, Q[0])), + (path_density[1], 'path_density_{}_{}th'.format(posfix, Q[1])), + (grid_transit_time[0], 'path_transit_time_{}_{}th'.format(posfix, Q[0])), + (grid_transit_time[1], 'path_transit_time_{}_{}th'.format(posfix, Q[1])), + ] + + grid_stats_s = pd.Series(*list(zip(*grid_stats))) + return grid_stats_s + +#%% +if __name__ == '__main__': + import os + import tables + + + #%% + #_test_plot_cnts_maps(ventral_contour, dorsal_contour) + base_dir = '/Users/ajaver/OneDrive - Imperial College London/tierpsy_features/test_data/multiworm' + skeletons_file = os.path.join(base_dir, 'MY23_worms5_food1-10_Set4_Pos5_Ch4_29062017_140148_skeletons.hdf5') + features_file = skeletons_file.replace('_skeletons.hdf5', '_featuresN.hdf5') + + #features_file = '/Users/ajaver/OneDrive - Imperial College London/tierpsy_features/test_data/multiworm/MY16_worms5_food1-10_Set5_Pos4_Ch1_02062017_131004_featuresN.hdf5' + features_file = '/Users/ajaver/OneDrive - Imperial College London/tierpsy_features/test_data/multiworm/170817_matdeve_exp7co1_12_Set0_Pos0_Ch1_17082017_140001_featuresN.hdf5' + + with pd.HDFStore(features_file, 'r') as fid: + blob_features = fid['/blob_features'] + trajectories_data = fid['/trajectories_data'] + timeseries_data = fid['/timeseries_data'] + + fps = fid.get_storer('/trajectories_data').attrs['fps'] + good = trajectories_data['skeleton_id']>=0 + trajectories_data = trajectories_data[good] + blob_features = blob_features[good] + + if False: + + trajectories_data_g = trajectories_data.groupby('worm_index_joined') + + + for worm_index in trajectories_data_g.groups.keys(): + worm_index = 4#695 + worm_data = trajectories_data_g.get_group(worm_index) + skel_id = worm_data['skeleton_id'].values + with tables.File(features_file, 'r') as fid: + skeletons = fid.get_node('/coordinates/skeletons')[skel_id, :, :] + worm_features = timeseries_data.loc[skel_id] + + path_curvatures_df, path_coords_df = get_path_curvatures(skeletons, _is_debug=True) + break + #%% + get_path_extent_stats(timeseries_data) + + \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/postures.py b/tierpsy/features/tierpsy_features/postures.py new file mode 100755 index 00000000..1430a7d9 --- /dev/null +++ b/tierpsy/features/tierpsy_features/postures.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 22 22:01:03 2017 + +@author: ajaver +""" +import numpy as np +import tables +import warnings +import cv2 +import pandas as pd +from collections import OrderedDict + +from .helper import DataPartition, load_eigen_projections + +morphology_columns = ['length', 'area', 'width_head_base', 'width_midbody', 'width_tail_base'] + +posture_columns = ['quirkiness', 'major_axis', + 'minor_axis', 'eigen_projection_1', 'eigen_projection_2', + 'eigen_projection_3', 'eigen_projection_4', 'eigen_projection_5', + 'eigen_projection_6', 'eigen_projection_7'] + +posture_aux = ['head_tail_distance'] + +#%% Morphology Features +def get_widths(widths): + partitions = ('head_base', 'midbody', 'tail_base') + p_obj = DataPartition(partitions, n_segments=widths.shape[1]) + + with warnings.catch_warnings(): + #I am unwraping in one dimension first + warnings.simplefilter("ignore") + segment_widths = {p:p_obj.apply(widths, p, func=np.median) for p in partitions} + + return segment_widths + +def _signed_areas(cnt_side1, cnt_side2): + '''calculate the contour area using the shoelace method, the sign indicate the contour orientation.''' + assert cnt_side1.shape == cnt_side2.shape + if cnt_side1.ndim == 2: + # if it is only two dimenssion (as if in a single skeleton). + # Add an extra dimension to be compatible with the rest of the code + cnt_side1 = cnt_side1[None, ...] + cnt_side2 = cnt_side2[None, ...] + + contour = np.hstack((cnt_side1, cnt_side2[:, ::-1, :])) + signed_area = np.sum( + contour[:,:-1,0] * contour[:,1:,1] - + contour[:,1:,0] * contour[:,:-1,1], + axis=1)/ 2 + + assert signed_area.size == contour.shape[0] + return signed_area + +def get_area(cnt_side1, cnt_side2): + with np.errstate(invalid='ignore'): + area = np.abs(_signed_areas(cnt_side1, cnt_side2)) + return area + +def get_length(skeletons): + ''' + Calculate length using the skeletons + ''' + + delta_coords = np.diff(skeletons, axis=1) + segment_sizes = np.linalg.norm(delta_coords, axis=2) + w_length = np.sum(segment_sizes, axis=1) + return w_length + + +def get_morphology_features(skeletons, + widths = None, + dorsal_contours = None, + ventral_contours = None): + + data = OrderedDict() + + lengths = get_length(skeletons) + data['length'] = lengths + + areas = None + if ventral_contours is not None and dorsal_contours is not None: + areas = get_area(ventral_contours, dorsal_contours) + data['area'] = areas + #data['area_length_ratio'] = areas/lengths + + if widths is not None: + widths_seg = get_widths(widths) + #data['width_length_ratio'] = widths_seg['midbody']/lengths + for p in widths_seg: + data['width_' + p] = widths_seg[p] + + data = pd.DataFrame.from_dict(data) + return data + + +#%% +def _angles(skeletons): + dd = np.diff(skeletons,axis=1); + angles = np.arctan2(dd[...,0], dd[...,1]) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + angles = np.unwrap(angles, axis=1); + + mean_angles = np.mean(angles, axis=1) + angles -= mean_angles[:, None] + + return angles, mean_angles + +def get_eigen_projections(skeletons): + eigen_worms = load_eigen_projections() + angles, _ = _angles(skeletons) + eigen_projections = np.dot(eigen_worms, angles.T) + eigen_projections = np.rollaxis(eigen_projections, -1, 0) + return eigen_projections + +#%% +def get_quirkiness(skeletons): + bad = np.isnan(skeletons[:, 0, 0]) + + dd = [cv2.minAreaRect(x) for x in skeletons.astype(np.float32)] + dd = [(L,W) if L >W else (W,L) for _,(L,W),_ in dd] + L, W = list(map(np.array, zip(*dd))) + L[bad] = np.nan + W[bad] = np.nan + quirkiness = np.sqrt(1 - W**2 / L**2) + + return quirkiness, L, W + +def get_head_tail_dist(skeletons): + return np.linalg.norm(skeletons[:, 0, :] - skeletons[:, -1, :], axis=1) +#%% +def get_posture_features(skeletons): + + + head_tail_dist = get_head_tail_dist(skeletons) + quirkiness, major_axis, minor_axis = get_quirkiness(skeletons) + + #I prefer to explicity recalculate the lengths, just to do not have to pass the length information + eigen_projections = get_eigen_projections(skeletons) + + #repack into an ordered dictionary + data = OrderedDict( + [ + ('head_tail_distance' , head_tail_dist), + ('quirkiness' , quirkiness), + ('major_axis' , major_axis), + ('minor_axis' , minor_axis) + ] + ) + + for n in range(eigen_projections.shape[1]): + data['eigen_projection_' + str(n+1)] = eigen_projections[:, n] + + data = pd.DataFrame.from_dict(data) + return data + +#%% +if __name__ == '__main__': + data = np.load('worm_example_small_W1.npz') + + skeletons = data['skeleton'] + dorsal_contours = data['dorsal_contour'] + ventral_contours = data['ventral_contour'] + widths = data['widths'] + + feat_morph = get_morphology_features(skeletons, widths, dorsal_contours, ventral_contours) + feat_posture = get_posture_features(skeletons, curvature_window = 4) + + #I am still missing the velocity and path features but it should look like this + cols_to_use = [x for x in feat_posture.columns if x not in feat_morph] #avoid duplicate length + + features = feat_morph.join(feat_posture[cols_to_use]) + \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/smooth.py b/tierpsy/features/tierpsy_features/smooth.py new file mode 100755 index 00000000..c0b1f22c --- /dev/null +++ b/tierpsy/features/tierpsy_features/smooth.py @@ -0,0 +1,380 @@ +# -*- coding: utf-8 -*- +""" +This module defines the NormalizedWorm class + +""" + +import numpy as np +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter + + +def _h_resample_curve(curve, resampling_N=49, widths=None): + '''Resample curve to have resampling_N equidistant segments + I give width as an optional parameter since I want to use the + same interpolation as with the skeletons + + I calculate the length here indirectly + ''' + + # calculate the cumulative length for each segment in the curve + dx = np.diff(curve[:, 0]) + dy = np.diff(curve[:, 1]) + dr = np.sqrt(dx * dx + dy * dy) + + lengths = np.cumsum(dr) + lengths = np.hstack((0, lengths)) # add the first point + tot_length = lengths[-1] + + # Verify array lengths + if len(lengths) < 2 or len(curve) < 2: + return None, None, None + + fx = interp1d(lengths, curve[:, 0]) + fy = interp1d(lengths, curve[:, 1]) + + subLengths = np.linspace(0 + np.finfo(float).eps, tot_length, resampling_N) + + # I add the epsilon because otherwise the interpolation will produce nan + # for zero + try: + resampled_curve = np.zeros((resampling_N, 2)) + resampled_curve[:, 0] = fx(subLengths) + resampled_curve[:, 1] = fy(subLengths) + if widths is not None: + fw = interp1d(lengths, widths) + widths = fw(subLengths) + except ValueError: + resampled_curve = np.full((resampling_N, 2), np.nan) + widths = np.full(resampling_N, np.nan) + + return resampled_curve, tot_length, widths + + +def _h_smooth_curve(curve, window=5, pol_degree=3): + '''smooth curves using the savgol_filter''' + + if curve.shape[0] < window: + # nothing to do here return an empty array + return np.full_like(curve, np.nan) + + # consider the case of one (widths) or two dimensions (skeletons, contours) + if curve.ndim == 1: + smoothed_curve = savgol_filter(curve, window, pol_degree) + else: + smoothed_curve = np.zeros_like(curve) + for nn in range(curve.ndim): + smoothed_curve[:, nn] = savgol_filter( + curve[:, nn], window, pol_degree) + + return smoothed_curve + +def get_group_borders(index_o, pad_val = False): + + #add zeros at the edge to consider any block in the edges + index = np.hstack([pad_val, index_o , pad_val]) + switches = np.diff(index.astype(np.int)) + turn_on, = np.where(switches==1) + turn_off, = np.where(switches==-1) + assert turn_off.size == turn_on.size + + #fin if fin max_gap_size] + + index_filled = np.zeros_like(index_o) + for ini, fin in ind_ranges: + index_filled[ini:fin+1] = True + + return index_filled +#%% + +class SmoothedWorm(): + """ + Encapsulates the notion of a worm's elementary measurements, scaled + (i.e. "normalized") to 49 points along the length of the worm. + """ + + def __init__(self, + skeleton, + widths = None, + ventral_contour = None, + dorsal_contour = None, + skel_smooth_window = None, + coords_smooth_window = None, + frames_to_interpolate = None, + gap_to_interp = 0 + ): + """ + I assume data is evenly distributed in time, and missing frames are nan. + """ + + + + + self.ventral_contour = ventral_contour + self.dorsal_contour = dorsal_contour + self.skeleton = skeleton + self.widths = widths + self._h_validate_dims() + + self.pol_degree = 3 + self.gap_to_interp = gap_to_interp + + skel_smooth_window = self._h_fix_smooth(skel_smooth_window) + coords_smooth_window = self._h_fix_smooth(coords_smooth_window) + + self._smooth_coords(frames_to_interpolate, s_win = coords_smooth_window) + self._smooth_skeletons(s_win = skel_smooth_window) + self._resample_coords() + self._h_validate_dims() + + + def _h_validate_dims(self): + #validate dimenssions + n_frames, n_segments, n_dims = self.skeleton.shape + assert n_dims == 2 + if self.ventral_contour is not None: + assert self.dorsal_contour is not None + assert self.ventral_contour.shape == (n_frames, n_segments, n_dims) + assert self.ventral_contour.shape == self.dorsal_contour.shape + + if self.widths is not None: + #TODO I might be able to calculate the widths if the dorsal and ventral contour are given + assert self.widths.shape == (n_frames, n_segments) + + + def _h_fix_smooth(self, smooth_window): + if smooth_window is None: + return smooth_window + + if smooth_window <= self.pol_degree: + #if the smoot window is too small do not smooth + return None + + if smooth_window % 2 == 0: + smooth_window += 1 + + return smooth_window + + + def _h_resample_coords(self, A, W = None): + #I am adding the W as width, in the case of skeletons, + #I want to interpolate the widths using the same spacing + L = np.full(A.shape[0], np.nan) + for ii in range(A.shape[0]): + w = None if W is None else W[ii] + + A[ii], L[ii], w = \ + _h_resample_curve(A[ii], A.shape[1], w) + + if not w is None: + W[ii] = w + + return A, L, W + + def _resample_coords(self): + + self.skeleton, self.length, self.widths = \ + self._h_resample_coords(self.skeleton, W = self.widths) + + if self.dorsal_contour is not None: + self.ventral_contour, _, _ = \ + self._h_resample_coords(self.ventral_contour) + self.dorsal_contour, _, _ = \ + self._h_resample_coords(self.dorsal_contour) + + + def _h_smooth_skeletons(self, curves, s_win, pol_degree=3): + if curves is not None: + for ii in range(curves.shape[0]): + if not np.any(np.isnan(curves[ii])): + curves[ii] = _h_smooth_curve( + curves[ii], + window = s_win, + pol_degree = self.pol_degree + ) + return curves + + def _smooth_skeletons(self, s_win): + if s_win is None: + return + self.skeleton = self._h_smooth_skeletons(self.skeleton, s_win) + self.widths = self._h_smooth_skeletons(self.widths, s_win) + self.ventral_contour = self._h_smooth_skeletons(self.ventral_contour, s_win) + self.dorsal_contour = self._h_smooth_skeletons(self.dorsal_contour, s_win) + + def _h_interp_and_smooth(self, x, y, x_pred, s_win): + f = interp1d(x, y) + y_interp = f(x_pred) + + if (s_win is None) or (y_interp.size <= s_win): + return y_interp + + y_smooth = savgol_filter(y_interp, s_win, self.pol_degree) + return y_smooth + + + def _h_smooth_coords(self, + dat_o, + s_win, + good_frames_index, + frames_to_interpolate, + frames_to_nan): + ''' + Interpolate coordinates for each segment + ''' + + if dat_o is None: + return dat_o + + dat_all = dat_o[good_frames_index] + if dat_all.shape[0] <= 2: + #not enough data to smooth + return dat_all + + new_shape = (frames_to_interpolate.size, dat_o.shape[1], dat_o.shape[2]) + dat_all_s = np.full(new_shape, np.nan) + + #add data in the borders to be able to interpolate within those regions + tt = np.hstack([-1, good_frames_index, dat_o.shape[0]]) + for i_seg in range(dat_all.shape[1]): + for i_coord in range(2): + c = dat_all[:, i_seg, i_coord] + c = np.hstack([c[0], c, c[-1]]) + + c_s = self._h_interp_and_smooth(tt, c, frames_to_interpolate, s_win) + dat_all_s[:, i_seg, i_coord] = c_s + + dat_all_s[frames_to_nan, :, :] = np.nan + return dat_all_s + + def _smooth_coords(self, frames_to_interpolate, s_win): + + if frames_to_interpolate is None: + frames_to_interpolate = np.arange(self.skeleton.shape[0]) + + bad = np.isnan(self.skeleton[:, 0, 0]) + good_frames_index, = np.where(~bad) + + #get indexes of nan's after removing small gaps and interpolating + bad_filled = _h_fill_small_gaps(bad, self.gap_to_interp) + f = interp1d(np.arange(bad_filled.size), bad_filled) + frames_to_nan = np.ceil(f(frames_to_interpolate)).astype(np.bool) + assert frames_to_interpolate.size == frames_to_nan.size + + #interpolate all the fields + args = (s_win, good_frames_index, frames_to_interpolate, frames_to_nan) + self.skeleton = self._h_smooth_coords(self.skeleton, *args) + self.ventral_contour = self._h_smooth_coords(self.ventral_contour, *args) + self.dorsal_contour = self._h_smooth_coords(self.dorsal_contour, *args) + + + +if __name__ == '__main__': + ''' + Code for testing... + ''' + from tierpsy.analysis.feat_create.obtainFeaturesHelper import WormFromTable + from tierpsy.analysis.feat_create.obtainFeatures import getGoodTrajIndexes + from tierpsy.helper.misc import RESERVED_EXT + from tierpsy.helper.params import read_fps + + import os + + if False: + #use if if you want to get the file names + import glob + import fnmatch + + exts = [''] + exts = ['*'+ext+'.hdf5' for ext in exts] + + mask_dir = '/Users/ajaver/OneDrive - Imperial College London/optogenetics/Arantza/MaskedVideos/**/' + #mask_dir = '/Users/ajaver/OneDrive - Imperial College London/tests/join/' + fnames = glob.glob(os.path.join(mask_dir, '*.hdf5')) + fnames = [x for x in fnames if any(fnmatch.fnmatch(x, ext) for ext in exts)] + fnames = [x for x in fnames if not any(x.endswith(ext) for ext in RESERVED_EXT)] + + + #save_dir = '/Users/ajaver/OneDrive - Imperial College London/smooth_examples' + save_dir = './' + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + +# mask_video = '/Users/ajaver/OneDrive - Imperial College London/optogenetics/Arantza/MaskedVideos/control_pulse/pkd2_5min_Ch1_11052017_121414.hdf5' +# save_prefix = 'worm_example.npz' +# is_WT2 = False + +# mask_video = '/Volumes/behavgenom_archive$/single_worm/finished/WT/N2/food_OP50/XX/30m_wait/clockwise/N2 on food L_2011_03_29__17_02_06___8___14.hdf5' +# save_prefix = 'worm_example_big_W{}.npz' +# is_WT2 = True + +# mask_video = '/Volumes/behavgenom_archive$/single_worm/finished/WT/N2/food_OP50/XX/30m_wait/anticlockwise/N2 on food R_2009_09_04__10_59_59___8___5.hdf5' +# save_prefix = 'worm_example_small_W{}.npz' +# is_WT2 = True + + mask_video = '/Volumes/behavgenom_archive$/Lidia/MaskedVideos/Optogenetics-day1/AQ3071-ATR_Set1_Ch1_18072017_191322.hdf5' + is_WT2 = False + + skeletons_file = mask_video.replace('MaskedVideos','Results').replace('.hdf5', '_skeletons.hdf5') + #%% + import pandas as pd + with pd.HDFStore(skeletons_file, 'r') as fid: + trajectories_data = fid['/trajectories_data'] + trajectories_data[trajectories_data['worm_index_joined'] == 2] + + #%% + fps = read_fps(skeletons_file) + coords_smooth_window = int(np.round(fps/3)) + if coords_smooth_window <= 3: + coords_smooth_window = None + + good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file) + for iw, worm_index in enumerate(good_traj_index): + print(iw, len(good_traj_index)) + worm = WormFromTable(skeletons_file, + worm_index, + worm_index_type=worm_index_type + ) + if is_WT2: worm.correct_schafer_worm() + + wormN = SmoothedWorm( + worm.skeleton, + worm.widths, + worm.ventral_contour, + worm.dorsal_contour, + skel_smooth_window = 5, + coords_smooth_window = coords_smooth_window, + gap_to_interp = 5 + ) + + +# save_file = os.path.join(save_dir, save_prefix.format(worm_index)) +# np.savez_compressed(save_file, +# skeleton=wormN.skeleton, +# ventral_contour=wormN.ventral_contour, +# dorsal_contour=wormN.dorsal_contour, +# widths = wormN.widths +# ) +# +# +# +# break +# #%% +# import matplotlib.pyplot as plt +# plt.figure() +# plt.subplot(4,1,1) +# plt.plot(wormN.skeleton[: ,0,0]) +# plt.subplot(4,1,2) +# plt.plot(wormN.skeleton[: ,0,1]) +# plt.subplot(2,1,2) +# plt.plot(wormN.skeleton[: ,0,0], wormN.skeleton[: ,0,1]) +# plt.axis('equal') + \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/summary_stats.py b/tierpsy/features/tierpsy_features/summary_stats.py new file mode 100755 index 00000000..ff20330c --- /dev/null +++ b/tierpsy/features/tierpsy_features/summary_stats.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Oct 2 14:24:25 2017 + +@author: ajaver +""" + + +from .helper import get_n_worms_estimate, get_delta_in_frames, add_derivatives +from .events import get_event_stats, event_region_labels +from .path import get_path_extent_stats +from .features import timeseries_feats_columns, \ +ventral_signed_columns, path_curvature_columns, curvature_columns + +import pandas as pd +import numpy as np + +index_colums = ['worm_index', 'timestamp'] + +blob_feats_columns = ['blob_area', + 'blob_perimeter', + 'blob_box_length', + 'blob_box_width', + 'blob_quirkiness', + 'blob_compactness', + 'blob_solidity', + 'blob_hu0', + 'blob_hu1', + 'blob_hu2', + 'blob_hu3', + 'blob_hu4', + 'blob_hu5', + 'blob_hu6' + ] + +#get the ratios to be normalized +feats2normalize = { + 'L' : [ + 'head_tail_distance', + 'major_axis', + 'minor_axis', + 'dist_from_food_edge', + 'length', + 'width_head_base', + 'width_midbody', + 'width_tail_base' + ], + '1/L' : path_curvature_columns + curvature_columns, + 'L^2' : ['area'] +} +feats2normalize['L'] += [x for x in timeseries_feats_columns if 'radial_velocity' in x] +feats2normalize['L'] += [x for x in timeseries_feats_columns if 'speed' in x] + +#add derivatives and make sure there are not duplicates +for k,dat in feats2normalize.items(): + dfeats = ['d_' + x for x in dat if not x.startswith('d_')] + feats2normalize[k] = list(set(dat) ^ set(dfeats)) + +def _normalize_by_w_length(timeseries_data, feats2norm): + ''' + Normalize features by body length. This is far from being the most efficient solution, but it is the easier to implement. + ''' + def _get_conversion_vec(units_t, median_length_vec): + '''helper function to find how to make the conversion''' + if units_t == 'L': + conversion_vec = 1/median_length_vec + elif units_t == '1/L': + conversion_vec = median_length_vec + elif units_t == 'L^2': + conversion_vec = median_length_vec**2 + return conversion_vec + + timeseries_data = timeseries_data.copy() + + median_length = timeseries_data.groupby('worm_index').agg({'length':'median'}) + median_length_vec = timeseries_data['worm_index'].map(median_length['length']) + + changed_feats_l = [] + for units_t, feats in feats2norm.items(): + feats_f = [x for x in timeseries_data if any(x.startswith(f) for f in feats)] + conversion_vec = _get_conversion_vec(units_t, median_length_vec) + for f in feats_f: + timeseries_data[f] *= conversion_vec + changed_feats_l += feats_f + + changed_feats = {x: x + '_norm' for x in changed_feats_l} + timeseries_data = timeseries_data.rename(columns = changed_feats) + + return timeseries_data, changed_feats + +def get_df_quantiles(df, + feats2check = timeseries_feats_columns, + subdivision_dict = {'food_region':['orientation_food_edge']}, + feats2norm = feats2normalize, + feats2abs = ventral_signed_columns, + is_remove_subdivided = True, + is_abs_ventral = True, + is_normalize = False + ): + ''' + Get quantile statistics for all the features given by `feats2check`. + In the features in `feats2abs` we are going to use only the absolute. This is to + deal with worms with unknown dorsal/ventral orientation. + ''' + q_vals = (0.1, 0.5, 0.9) #percentiles to calculate + iqr_limits = (0.25, 0.75) # range of percentiles used for the interquantile distance + valid_q = q_vals + iqr_limits + + df = df.copy() #like this i can modify directoy the df without long lasting consequences + + #filter features to be abs + def _filter_ventral_features(feats2check):#%% + valid_f = [x for x in feats2check if any(x.startswith(f) for f in feats2abs)] + return valid_f + + #filter default columns in case they are not present + feats2check = [x for x in feats2check if x in df] + + #filter default columns in case they are not present. Same for the subdivision dictionary. + subdivision_dict_r = {} + for e_subdivide, feats2subdivide in subdivision_dict.items(): + ff = [x for x in feats2check if x in feats2subdivide] + if e_subdivide in df and ff: + subdivision_dict_r[e_subdivide] = ff + subdivision_dict = subdivision_dict_r + + + #subdivide a feature using the event features + subdivided_df = _get_subdivided_features(df, subdivision_dict = subdivision_dict) + df = df.join(subdivided_df) + feats2check += subdivided_df.columns.tolist() + if is_remove_subdivided: + df = df[[x for x in df if not x in feats2subdivide]] + feats2check = [x for x in feats2check if x not in feats2subdivide] + + #add normalized features + if is_normalize: + df, changed_feats = _normalize_by_w_length(df, feats2norm = feats2norm) + feats2check = [x if not x in changed_feats else changed_feats[x] for x in feats2check] + + #abs features that are ventral/dorsal side + if is_abs_ventral: + feats2abs = _filter_ventral_features(feats2check) + #find features that match ventral_signed_columns + if feats2abs: + #normalize + if df.size > 0: + df[feats2abs] = df[feats2abs].abs() + #change name + df.columns = [x + '_abs' if x in feats2abs else x for x in df.columns] + feats2check = [x + '_abs' if x in feats2abs else x for x in feats2check] + + #calculate quantiles + feat_mean = None + Q = df[feats2check].quantile(valid_q) + feat_mean = pd.concat((feat_mean, Q), axis=1) + + #name correctly + dat = [] + for q in q_vals: + q_dat = feat_mean.loc[q] + q_str = '_{}th'.format(int(round(q*100))) + for feat, val in q_dat.iteritems(): + dat.append((val, feat+q_str)) + + + IQR = feat_mean.loc[0.75] - feat_mean.loc[0.25] + dat += [(val, feat + '_IQR') for feat, val in IQR.iteritems()] + + feat_mean_s = pd.Series(*list(zip(*dat))) + return feat_mean_s + + +def _get_subdivided_features(timeseries_data, subdivision_dict): + ''' + subdivision_dict = {event_v1: [feature_v1, feature_v2, ...], event_v2: [feature_vn ...], ...} + + event_vector = [-1, -1, 0, 0, 1, 1] + feature_vector = [1, 3, 4, 5, 6, 6] + + new_vectors -> + [1, 3, nan, nan, nan, nan] + [nan, nan, 4, 5, nan, nan] + [nan, nan, nan, nan, 6, 6] + + ''' + + #assert all the subdivision keys are known events + assert all(x in event_region_labels.keys() for x in subdivision_dict) + + + event_type_link = {#%% + 'food_region' : '_in_', + 'motion_mode' : '_w_' + } + subdivided_data = [] + for e_col, timeseries_cols in subdivision_dict.items(): + e_data = timeseries_data[e_col].values + + if e_col in event_type_link: + str_l = event_type_link[e_col] + else: + str_l = '_' + + for flag, label in event_region_labels[e_col].items(): + _flag = e_data != flag + + for f_col in timeseries_cols: + f_data = timeseries_data[f_col].values.copy() + + try: + f_data[_flag] = np.nan + except: + import pdb + pdb.set_trace() + new_name = f_col + str_l + label + + subdivided_data.append((new_name, f_data)) + + + if not subdivided_data: + #return empty df if nothing was subdivided + return pd.DataFrame([]) + + + columns, data = zip(*subdivided_data) + subdivided_df = pd.DataFrame(np.array(data).T, columns = columns) + subdivided_df.index = timeseries_data.index + + return subdivided_df + + +def process_blob_data(blob_features, derivate_delta_time, fps): + ''' + Filter only the selected features and add derivatives + ''' + assert not ((blob_features is None) and (derivate_delta_time is None)) + assert all(x in blob_features for x in index_colums) + + #add the blob prefix to the blob features if it is not present + filt_func = lambda x : (not x.startswith('blob_') and not (x in index_colums)) + blob_features.columns = ['blob_' + x if filt_func(x) else x for x in blob_features.columns ] + + #add blob derivatives + + derivate_delta_frames = get_delta_in_frames(derivate_delta_time, fps) + + blob_l = [] + for w_ind, blob_w in blob_features.groupby('worm_index'): + blob_w = add_derivatives(blob_w, blob_feats_columns, derivate_delta_frames, fps) + blob_l.append(blob_w) + + if blob_l: + blob_features = pd.concat(blob_l, axis=0) + #select only the valid columns + blob_feats_columns_d = blob_feats_columns + ['d_' + x for x in blob_feats_columns] + blob_cols = [x for x in blob_feats_columns_d if x in blob_features] + blob_features = blob_features[blob_cols] + else: + blob_features, blob_cols = pd.DataFrame([]), [] + + return blob_features, blob_cols + +def get_summary_stats(timeseries_data, + fps, + blob_features = None, + derivate_delta_time = None, + only_abs_ventral = False, + ): + if timeseries_data.size == 0: + return pd.DataFrame([]) + + ts_cols_all, v_sign_cols, feats2norm = timeseries_feats_columns, ventral_signed_columns, feats2normalize + ts_cols_norm = sum(feats2norm.values(), []) + + #summarize everything + exp_feats = [] + + ## event features + n_worms_estimate = get_n_worms_estimate(timeseries_data['timestamp']) + event_stats_s = get_event_stats(timeseries_data, fps , n_worms_estimate) + + ## timeseries features + + ##### simple + timeseries_stats_s = get_df_quantiles(timeseries_data, + feats2check = ts_cols_all, + feats2abs = v_sign_cols, + feats2norm = feats2norm, + is_normalize = False) + + path_grid_stats_s = get_path_extent_stats(timeseries_data, fps, is_normalized = False) + + feat_stats = pd.concat((timeseries_stats_s, path_grid_stats_s, event_stats_s)) + + exp_feats.append(feat_stats) + ##### normalized by worm length + timeseries_stats_n = get_df_quantiles(timeseries_data, + feats2check = ts_cols_norm, + feats2abs = v_sign_cols, + feats2norm = feats2norm, + is_normalize = True) + + path_grid_stats_n = get_path_extent_stats(timeseries_data, fps, is_normalized = True) + feat_stats_n = pd.concat((timeseries_stats_n, path_grid_stats_n)) + exp_feats.append(feat_stats_n) + + #add subdivisions + feat_stats_m_subdiv = get_df_quantiles(timeseries_data, + feats2check = ts_cols_all, + feats2abs = v_sign_cols, + feats2norm = feats2norm, + subdivision_dict = {'motion_mode' : ts_cols_all}, + is_abs_ventral = True) #i only calculate the subdivision abs or not abs + exp_feats.append(feat_stats_m_subdiv) + + if not only_abs_ventral: + ##### non-abs ventral signed features + feat_stats_v = get_df_quantiles(timeseries_data, + feats2check = v_sign_cols, + feats2abs = v_sign_cols, + feats2norm = feats2norm, + is_abs_ventral = False, + is_normalize = False) + + exp_feats.append(feat_stats_v) + + ##### non-abs and normalized ventral signed features + v_sign_cols_norm = list(set(v_sign_cols) & set(ts_cols_norm)) + feat_stats_v_n = get_df_quantiles(timeseries_data, + feats2check = v_sign_cols_norm, + feats2abs = v_sign_cols, + feats2norm = feats2norm, + is_abs_ventral = False, + is_normalize = True) + exp_feats.append(feat_stats_v_n) + + + #add subdivisions + feat_stats_m_subdiv_v = get_df_quantiles(timeseries_data, + feats2check = v_sign_cols, + feats2abs = v_sign_cols, + subdivision_dict = {'motion_mode' : ts_cols_all}, + is_abs_ventral = False, + is_normalize = False) #i only calculate the subdivision abs or not abs + + exp_feats.append(feat_stats_m_subdiv_v) + + + if blob_features is not None: + #I need to add the worm index and timesstamp before calculating the derivative + blob_features = pd.concat((timeseries_data[index_colums], blob_features), axis=1) + + blob_features, blob_cols = process_blob_data(blob_features, derivate_delta_time, fps) + #get blobstats + blob_stats = get_df_quantiles(blob_features, feats2check = blob_cols) + + blob_features['motion_mode'] = timeseries_data['motion_mode'] + blob_stats_m_subdiv = get_df_quantiles(blob_features, + feats2check = blob_cols, + subdivision_dict = {'motion_mode':blob_cols}, + is_abs_ventral = False) + exp_feats += [blob_stats, blob_stats_m_subdiv] + + exp_feats_df = pd.concat(exp_feats) + + assert not np.any(exp_feats_df.index.duplicated()) #If there are duplicated indexes there might be an error here + + return exp_feats_df + + +#%% +if __name__ == '__main__': + from tierpsy.helper.params import read_fps + #fname = '/Users/ajaver/OneDrive - Imperial College London/aggregation/N2_1_Ch1_29062017_182108_comp3_featuresN.hdf5' + #%% + + fname = '/Volumes/behavgenom_archive$/Avelino/screening/CeNDR/Results/CeNDR_Set1_020617/WN2002_worms10_food1-10_Set1_Pos4_Ch4_02062017_115723_featuresN.hdf5' + with pd.HDFStore(fname, 'r') as fid: + timeseries_data = fid['/timeseries_data'] + blob_features = fid['/blob_features'] + fps = read_fps(fname) + + feat_stats = get_summary_stats(timeseries_data, + fps, + blob_features, + 1/3, + only_abs_ventral = True + ) + + print(feat_stats) + \ No newline at end of file diff --git a/tierpsy/features/tierpsy_features/velocities.py b/tierpsy/features/tierpsy_features/velocities.py new file mode 100755 index 00000000..6c5d280d --- /dev/null +++ b/tierpsy/features/tierpsy_features/velocities.py @@ -0,0 +1,317 @@ +# -*- coding: utf-8 -*- +""" +This module defines the NormalizedWorm class + +""" +import numpy as np +import pandas as pd +from collections import OrderedDict + +import matplotlib.pylab as plt +from matplotlib import animation, patches + +from .helper import DataPartition, nanunwrap + +# i am including an excess of subdivisions with the hope to later reduce them +velocities_columns = ['speed', + 'angular_velocity', + 'relative_to_body_speed_midbody', + 'relative_to_body_radial_velocity_head_tip', + 'relative_to_body_angular_velocity_head_tip', + 'relative_to_body_radial_velocity_neck', + 'relative_to_body_angular_velocity_neck', + 'relative_to_body_radial_velocity_hips', + 'relative_to_body_angular_velocity_hips', + 'relative_to_body_radial_velocity_tail_tip', + 'relative_to_body_angular_velocity_tail_tip', + 'speed_neck', + 'angular_velocity_neck', + 'relative_to_neck_radial_velocity_head_tip', + 'relative_to_neck_angular_velocity_head_tip', + 'speed_head_base', + 'angular_velocity_head_base', + 'relative_to_head_base_radial_velocity_head_tip', + 'relative_to_head_base_angular_velocity_head_tip', + 'speed_hips', + 'angular_velocity_hips', + 'relative_to_hips_radial_velocity_tail_tip', + 'relative_to_hips_angular_velocity_tail_tip', + 'speed_tail_base', + 'angular_velocity_tail_base', + 'relative_to_tail_base_radial_velocity_tail_tip', + 'relative_to_tail_base_angular_velocity_tail_tip', + 'speed_midbody', + 'angular_velocity_midbody', + 'speed_head_tip', + 'angular_velocity_head_tip', + 'speed_tail_tip', + 'angular_velocity_tail_tip'] + +#%% features that are relative to specific body parts +relative_to_dict = {'body' : ('head_tip', 'neck', 'hips', 'tail_tip'), + 'neck' : ('head_tip',), + 'head_base' : ('head_tip',), + 'hips' : ('tail_tip',), + 'tail_base' : ('tail_tip',), + 'midbody' : [], + 'head_tip' : [], + 'tail_tip' : [], + } + +#%% +def _h_orientation_vector(x, axis=None): + return x[:, 0, :] - x[:, -1, :] + +def _h_get_velocity(x, delta_frames, fps): + if delta_frames < 1: + raise ValueError('Invalid number of delta frames %i' % delta_frames) + delta_time = delta_frames/fps + if x.shape[0] < delta_frames: + #not enough frames return empty array + return np.full_like(x, np.nan) + + v = (x[delta_frames:] - x[:-delta_frames])/delta_time + + #pad with nan so the vector match the original vectors + pad_w = [(int(np.floor(delta_frames/2)), int(np.ceil(delta_frames/2)))] + + #explicity add zero path if there are extra dimensions + if x.ndim > 1: + pad_w += [(0,0) for _ in range(x.ndim-1)] + + v = np.pad(v, + pad_w, + 'constant', + constant_values = np.nan) + + return v + +#%% +def _h_center_skeleton(skeletons, orientation, coords): + + Rsin = np.sin(orientation)[:, None] + Rcos = np.cos(orientation)[:, None] + + skel_c = skeletons - coords[:, None, :] + + skel_ang = np.zeros_like(skel_c) + skel_ang[:, :, 0] = skel_c[:, :, 0]*Rcos - skel_c[:, :, 1]*Rsin + skel_ang[:, :, 1] = skel_c[:, :, 0]*Rsin + skel_c[:, :, 1]*Rcos + + return skel_ang + +def _h_segment_position(skeletons, partition): + p_obj = DataPartition([partition], n_segments=skeletons.shape[1]) + coords = p_obj.apply(skeletons, partition, func=np.nanmean) + orientation_v = p_obj.apply(skeletons, partition, func=_h_orientation_vector) + return coords, orientation_v + +#%% +def get_velocity(skeletons, partition, delta_frames, fps): + coords, orientation_v = _h_segment_position(skeletons, partition = partition) + + nan_frames = np.isnan(coords[:, 0]) + + is_any_nan = np.any(nan_frames) + + if is_any_nan: + x = np.arange(coords.shape[0]) + xp = np.where(~nan_frames)[0] + if xp.size > 2: + #I only do this if there are actually some points to interpolate + for ii in range(coords.shape[1]): + coords[:, ii] = np.interp(x, xp, coords[xp, ii]) + orientation_v[:, ii] = np.interp(x, xp, orientation_v[xp, ii]) + + velocity = _h_get_velocity(coords, delta_frames, fps) + speed = np.linalg.norm(velocity, axis=1) + #I do not need to normalize the vectors because it will only add a constant factor, + #and I am only interested in the sign + s_sign = np.sign(np.sum(velocity*orientation_v, axis=1)) + signed_speed = speed *s_sign + + #let's change the vector to angles + orientation = np.arctan2(orientation_v[:, 0], orientation_v[:, 1]) + #wrap the angles so the change is continous no jump between np.pi and -np.pi + orientation = nanunwrap(orientation) + angular_velocity = _h_get_velocity(orientation, delta_frames, fps) + + centered_skeleton = _h_center_skeleton(skeletons, orientation, coords) + + if is_any_nan: + signed_speed[nan_frames] = np.nan + angular_velocity[nan_frames] = np.nan + + return signed_speed, angular_velocity, centered_skeleton + +#%% +def _h_relative_velocity(segment_coords, delta_frames, fps): + x = segment_coords[:, 0] + y = segment_coords[:, 1] + r = np.sqrt(x**2+y**2) + theta = nanunwrap(np.arctan2(y,x)) + + r_radial_velocity = _h_get_velocity(r, delta_frames, fps) + r_angular_velocity = _h_get_velocity(theta, delta_frames, fps) + + return r_radial_velocity, r_angular_velocity + + + +#%% +def get_relative_velocities(centered_skeleton, partitions, delta_frames, fps): + p_obj = DataPartition(partitions, n_segments=centered_skeleton.shape[1]) + + r_radial_velocities = {} + r_angular_velocities = {} + + + for p in partitions: + + + segment_coords = p_obj.apply(centered_skeleton, p, func=np.nanmean) + r_radial_velocity, r_angular_velocity = _h_relative_velocity(segment_coords, delta_frames, fps) + r_radial_velocities[p] = r_radial_velocity + r_angular_velocities[p] = r_angular_velocity + + + + return r_radial_velocities, r_angular_velocities + + +def get_relative_speed_midbody(centered_skeleton, delta_frames, fps): + ''' + This velocity meassures how the midbody changes in relation to the body central axis. + I cannot really define this for the othes parts without getting too complicated. + ''' + p_obj = DataPartition(['midbody'], n_segments=centered_skeleton.shape[1]) + segment_coords = p_obj.apply(centered_skeleton, 'midbody', func=np.nanmean) + return _h_get_velocity(segment_coords[:, 0], delta_frames, fps) + +#%% + + +def _h_ax_range(skel_a): + x_range = [np.nanmin(skel_a[...,0]), np.nanmax(skel_a[...,0])] + y_range = [np.nanmin(skel_a[...,1]), np.nanmax(skel_a[...,1])] + + dx, dy = np.diff(x_range), np.diff(y_range) + if dx > dy: + y_range[1] = y_range[0] + dx + else: + x_range[1] = x_range[0] + dy + + return (x_range, y_range) + +def animate_velocity(skel_a, ini_arrow, arrow_size, speed_v, ang_v): + x_range, y_range = _h_ax_range(skel_a) + fig = plt.figure(figsize = (15, 8)) + ax = plt.subplot(1,2,1) + ax_speed = plt.subplot(2,2,2) + ax_ang_speed = plt.subplot(2,2,4) + ax.set_xlim(*x_range) + ax.set_ylim(*y_range) + + line, = ax.plot([], [], lw=2) + head_p, = ax.plot([], [], 'o') + orient_arrow = patches.Arrow(*ini_arrow[0], *arrow_size[0], fc='k', ec='k') + + ax_speed.plot(speed_v) + ax_ang_speed.plot(ang_v) + + speed_p, = ax_speed.plot([], 'o') + ang_speed_p, = ax_ang_speed.plot([], 'o') + + # animation function. This is called sequentially + def _animate(i): + global orient_arrow + + x = skel_a[i, :, 0] + y = skel_a[i, :, 1] + line.set_data(x, y) + head_p.set_data(x[0], y[0]) + if ax.patches: + ax.patches.remove(orient_arrow) + orient_arrow = patches.Arrow(*ini_arrow[i], *arrow_size[i], width=50, fc='k', ec='k') + ax.add_patch(orient_arrow) + + speed_p.set_data(i, speed_v[i]) + ang_speed_p.set_data(i, ang_v[i]) + return (line, head_p, orient_arrow, speed_p, ang_speed_p) + + # call the animator. blit=True means only re-draw the parts that have changed. + anim = animation.FuncAnimation(fig, _animate, + frames=skel_a.shape[0], interval=20, blit=True); + return anim + +#%% +def get_velocity_features(skeletons, delta_frames, fps): + assert isinstance(delta_frames, int) + + if skeletons.shape[0] < delta_frames: + return + + + def _process_part(part): + signed_speed, angular_velocity, centered_skeleton = get_velocity(skeletons, part, delta_frames, fps) + + if part == 'body': + #speed without prefix is the body speed + part_velocities = [('speed', signed_speed), + ('angular_velocity', angular_velocity)] + #this is really a special case. midbody moving like this <- | -> + relative_speed_midbody = get_relative_speed_midbody(centered_skeleton, delta_frames, fps) + + #add the body signed speed and angular velocity. This values are very similar for the other parts + part_velocities.append(('relative_to_body_speed_midbody', relative_speed_midbody)) + else: + #at the end i might only calculate this for the body, but i want to do a massive test to feel sure about this + part_velocities = [('speed_' + part, signed_speed), + ('angular_velocity_' + part, angular_velocity)] + + if part in relative_to_dict: + partitions = relative_to_dict[part] + r_radial_velocities, r_angular_velocities = get_relative_velocities(centered_skeleton, + partitions, + delta_frames, + fps) + #pack into a dictionary + for p in partitions: + k_r = 'relative_to_{}_radial_velocity_{}'.format(part, p) + part_velocities.append((k_r, r_radial_velocities[p])) + + k_a = 'relative_to_{}_angular_velocity_{}'.format(part, p) + part_velocities.append((k_a, r_angular_velocities[p])) + + + + + return part_velocities + + #process all the parts + velocities = map(_process_part, relative_to_dict.keys()) + #flatten list + velocities = sum(velocities, []) + + #put all the data into a dataframe + velocities = pd.DataFrame(OrderedDict(velocities)) + + + + assert velocities.shape[0] == skeletons.shape[0] + + return velocities + + +#%% +if __name__ == '__main__': + #data = np.load('worm_example_small_W1.npz') + data = np.load('../notebooks/data/worm_example.npz') + skeletons = data['skeleton'] + + fps = 25 + delta_time = 1/3 #delta time in seconds to calculate the velocity + + delta_frames = max(1, int(round(fps*delta_time))) + velocities = get_velocity_features(skeletons, delta_frames, fps) + diff --git a/tierpsy/features/tierpsy_features/version.py b/tierpsy/features/tierpsy_features/version.py new file mode 100755 index 00000000..41436057 --- /dev/null +++ b/tierpsy/features/tierpsy_features/version.py @@ -0,0 +1,13 @@ +# # -*- coding: utf-8 -*- +__version__ = '0.1' + +try: + import os + import subprocess + + cwd = os.path.dirname(os.path.abspath(__file__)) + command = ['git', 'rev-parse', 'HEAD'] + sha = subprocess.check_output(command, cwd=cwd, stderr = subprocess.DEVNULL).decode('ascii').strip() + __version__ += '+' + sha[:7] +except Exception: + pass \ No newline at end of file diff --git a/tierpsy/gui/HDF5VideoPlayer.py b/tierpsy/gui/HDF5VideoPlayer.py index 435ad527..b4a85b03 100644 --- a/tierpsy/gui/HDF5VideoPlayer.py +++ b/tierpsy/gui/HDF5VideoPlayer.py @@ -8,6 +8,7 @@ from PyQt5 import QtWidgets, QtCore, QtGui from PyQt5.QtCore import Qt +from PyQt5.QtWidgets import QApplication def setChildrenFocusPolicy(obj, policy): @@ -441,11 +442,13 @@ def closeEvent(self, event): self.fid.close() super(HDF5VideoPlayerGUI, self).closeEvent(event) -if __name__ == '__main__': - print('hello!!') - app = QtWidgets.QApplication(sys.argv) +def tierpsy_gui_simple(): + app = QApplication(sys.argv) ui = HDF5VideoPlayerGUI() ui.show() - sys.exit(app.exec_()) + app.exec_() + +if __name__ == '__main__': + sys.exit(tierpsy_gui_simple()) diff --git a/tierpsy/gui/SelectApp.py b/tierpsy/gui/SelectApp.py index df2fdc44..be740a39 100644 --- a/tierpsy/gui/SelectApp.py +++ b/tierpsy/gui/SelectApp.py @@ -1,15 +1,17 @@ - -from functools import partial -from collections import OrderedDict - -from PyQt5.QtWidgets import QWidget, QApplication, QMainWindow, QPushButton, QVBoxLayout -from PyQt5.QtCore import Qt - import tierpsy from tierpsy.gui.GetMaskParams import GetMaskParams_GUI from tierpsy.gui.MWTrackerViewer import MWTrackerViewer_GUI from tierpsy.gui.Summarizer import Summarizer_GUI from tierpsy.gui.BatchProcessing import BatchProcessing_GUI + +from PyQt5.QtWidgets import QWidget, QApplication, QMainWindow, QPushButton, QVBoxLayout +from PyQt5.QtCore import Qt + +import os +import sys +import stat +from functools import partial +from collections import OrderedDict dd = [('get_params', (GetMaskParams_GUI,"Set Parameters")), @@ -54,12 +56,43 @@ def appCall(self, name): ui.show() ui.setAttribute(Qt.WA_DeleteOnClose) + +def _create_desktop_command(): + #currently only works for OSX... + if sys.platform == 'darwin': + if 'CONDA_DEFAULT_ENV' in os.environ: + act_str = 'source activate ' + os.environ['CONDA_DEFAULT_ENV'] + source_cmd = os.path.join(os.environ['CONDA_PREFIX'], 'bin', 'tierpsy_gui') + else: + act_str = '' + source_cmd = os.path.join(os.path.dirname(__file__), 'tierpsy_gui') + + if not os.path.exists(source_cmd): + script_name = os.path.join(os.path.dirname(__file__), '..', '..', 'scripts' , 'tierpsy_gui.py') + script_name = os.path.realpath(script_name) + source_cmd = '{} {}'.format(sys.executable, script_name) + cmd = '\n'.join([act_str, source_cmd]) + + link_file = os.path.join(os.path.expanduser('~'), 'Desktop', 'tierpsy_gui.command') + with open(link_file, 'w') as fid: + fid.write(cmd) + + + st = os.stat(link_file) + os.chmod(link_file, st.st_mode | stat.S_IEXEC) -if __name__ == '__main__': - import sys +def tierpsy_gui(): + _create_desktop_command() app = QApplication(sys.argv) + ui = SelectApp() ui.show() - sys.exit(app.exec_()) + + app.exec_() + + + +if __name__ == '__main__': + tierpsy_gui() \ No newline at end of file diff --git a/tierpsy/helper/__init__.py b/tierpsy/helper/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tierpsy/processing/processMultipleFilesFun.py b/tierpsy/processing/processMultipleFilesFun.py index 175a779c..a1848922 100644 --- a/tierpsy/processing/processMultipleFilesFun.py +++ b/tierpsy/processing/processMultipleFilesFun.py @@ -127,3 +127,7 @@ def processMultipleFilesFun( max_num_process = max_num_process, refresh_time = refresh_time, is_debug = is_debug) + +def tierpsy_process(): + args = ProcessMultipleFilesParser().parse_args() + processMultipleFilesFun(**vars(args)) diff --git a/tierpsy/summary/__init__.py b/tierpsy/summary/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tierpsy/summary/helper.py b/tierpsy/summary/helper.py index 0e9f1460..96be2060 100644 --- a/tierpsy/summary/helper.py +++ b/tierpsy/summary/helper.py @@ -5,7 +5,7 @@ @author: avelinojaver """ -from tierpsy_features.summary_stats import get_n_worms_estimate +from tierpsy.features.tierpsy_features.summary_stats import get_n_worms_estimate import random import math diff --git a/tierpsy/summary/process_tierpsy.py b/tierpsy/summary/process_tierpsy.py index 8e3df234..ed7aa14a 100644 --- a/tierpsy/summary/process_tierpsy.py +++ b/tierpsy/summary/process_tierpsy.py @@ -5,9 +5,9 @@ @author: avelinojaver """ +from tierpsy.features.tierpsy_features.summary_stats import get_summary_stats from tierpsy.summary.helper import augment_data, add_trajectory_info from tierpsy.helper.params import read_fps -from tierpsy_features.summary_stats import get_summary_stats from tierpsy.helper.misc import WLAB import pandas as pd diff --git a/tierpsy/tests/__init__.py b/tierpsy/tests/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/tests/run_tests.py b/tierpsy/tests/run_tests.py similarity index 58% rename from tests/run_tests.py rename to tierpsy/tests/run_tests.py index 7cfb2427..33fca508 100644 --- a/tests/run_tests.py +++ b/tierpsy/tests/run_tests.py @@ -7,15 +7,16 @@ import glob import json import argparse +import tqdm +import requests +import math +import zipfile +import warnings - +DLFT_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') EXAMPLES_LINK="https://imperiallondon-my.sharepoint.com/personal/ajaver_ic_ac_uk/_layouts/15/guestaccess.aspx?guestaccesstoken=ldZ18fLY%2bzlu7XuO9mbKVdyiKoH4naiesqiLXWU4vGQ%3d&docid=0cec4e52f4ccf4d5b8bb3a737020fc12f&rev=1" -def dowload_examples(): - import tqdm - import requests - import math - import zipfile +def download_files(data_dir): # Streaming, so we can iterate over the response. r = requests.get(EXAMPLES_LINK, stream=True) @@ -24,9 +25,12 @@ def dowload_examples(): block_size = 1024 wrote = 0 - tmp_file = 'test_data.zip' + if not os.path.exists(data_dir): + os.makedirs(data_dir) + + tmp_file = os.path.join(data_dir, 'test_data.zip') with open(tmp_file, 'wb') as f: - for data in tqdm.tqdm(r.iter_content(block_size), total=math.ceil(total_size//block_size) , unit='MB', unit_scale=True): + for data in tqdm.tqdm(r.iter_content(block_size), total=math.ceil(total_size//block_size) , unit='kB', unit_scale=False): wrote = wrote + len(data) f.write(data) if total_size != 0 and wrote != total_size: @@ -34,27 +38,33 @@ def dowload_examples(): with zipfile.ZipFile(tmp_file, "r") as zip_ref: - zip_ref.extractall("./") - + zip_ref.extractall(data_dir) os.remove(tmp_file) + #the files would be unzipped into data_dir/data/ I want to move them back to data_dir + src_dir = os.path.join(data_dir, 'data') + files = os.listdir(src_dir) + for f in files: + shutil.move(os.path.join(src_dir,f), data_dir) + os.rmdir(src_dir) + class TestObj(): - def __init__(self, examples_dir, script_dir): + def __init__(self, examples_dir, base_script): self.commands = [] - self.process_script = os.path.join(script_dir, 'processMultipleFiles.py') + self.base_script = base_script self.main_dir = os.path.join(examples_dir, self.name) self.masked_files_dir = os.path.join(self.main_dir, 'MaskedVideos') self.raw_video_dir = os.path.join(self.main_dir, 'RawVideos') self.results_dir = os.path.join(self.main_dir, 'Results') - def add_command(self, args, dlft_script=''): - if not dlft_script: - dlft_script = self.process_script + def add_command(self, args, base_script=''): + if not base_script: + base_script = self.base_script if not '--is_debug' in args: args.append('--is_debug') - command = [sys.executable, dlft_script] + args + command = base_script + args cmd_dd = [] for ii, x in enumerate(command): @@ -93,12 +103,11 @@ def clean(self): class GECKO_VIDEOS(TestObj): + name = 'GECKO_VIDEOS' + description = 'Complete analysis from video from Gecko .mjpg files.' + def __init__(self, *args): - self.name = 'GECKO_VIDEOS' - self.description = 'Complete analysis from video from Gecko .mjpg files.' super().__init__(*args) - - args = [ '--video_dir_root', self.raw_video_dir, @@ -114,9 +123,10 @@ def __init__(self, *args): self.add_command(args) class AVI_VIDEOS(TestObj): + name = 'AVI_VIDEOS' + description = 'Complete analysis from .avi files.' + def __init__(self, *args): - self.name = 'AVI_VIDEOS' - self.description = 'Complete analysis from .avi files.' super().__init__(*args) json_file = os.path.join(self.main_dir, 'AVI_VIDEOS.json') @@ -133,9 +143,10 @@ def __init__(self, *args): self.add_command(args) class MANUAL_FEATS(TestObj): + name = 'MANUAL_FEATS' + description = 'Calculate features from manually joined trajectories.' + def __init__(self, *args): - self.name = 'MANUAL_FEATS' - self.description = 'Calculate features from manually joined trajectories.' super().__init__(*args) args = [ @@ -153,9 +164,9 @@ def clean(self): class RIG_HDF5_VIDEOS(TestObj): + name = 'RIG_HDF5_VIDEOS' + description = 'Reformat hdf5 file produced by the gecko plugin in the worm rig.' def __init__(self, *args): - self.name = 'RIG_HDF5_VIDEOS' - self.description = 'Reformat hdf5 file produced by the gecko plugin in the worm rig.' super().__init__(*args) args = [ @@ -172,9 +183,9 @@ def __init__(self, *args): class WT2(TestObj): + name = 'WT2' + description = "Worm Tracker 2.0 (Schafer's lab single worm)." def __init__(self, *args): - self.name = 'WT2' - self.description = "Worm Tracker 2.0 (Schafer's lab single worm)." super().__init__(*args) args = [ @@ -193,9 +204,9 @@ def __init__(self, *args): class WORM_MOTEL(TestObj): + name = 'WORM_MOTEL' + description = "Worm motel (background subtraction)." def __init__(self, *args): - self.name = 'WORM_MOTEL' - self.description = "Worm motel (background subtraction)." super().__init__(*args) args = [ @@ -212,39 +223,63 @@ def __init__(self, *args): ] self.add_command(args) -if __name__ == '__main__': - #dowload_examples() - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('n_tests', metavar='N', type=int, nargs='*', - help='Number of tests to be done. If it is empty it will execute all the tests.') - parser.add_argument('--tests', dest='accumulate', action='store_const', - const=sum, default=max, - help='sum the integers (default: find the max)') +def tierpsy_tests(): + base_script = ['tierpsy_process'] + + _all_tests_obj = [ + GECKO_VIDEOS, + AVI_VIDEOS, + MANUAL_FEATS, + RIG_HDF5_VIDEOS, + WT2, + WORM_MOTEL + ] + _available_tests = [x.name for x in _all_tests_obj] + _available_tests_str = ' '.join(_available_tests) + test_dict = dict(zip(_available_tests, _all_tests_obj)) + + help_test = 'Name of the tests to be executed. If not values are given all tests will be executed. The available tests are: {}'.format(_available_tests_str) + + parser = argparse.ArgumentParser(description='Tierpsy Tracker tests.') + parser.add_argument('tests', + nargs='*', + help=help_test) + + parser.add_argument('--download_files', + action = 'store_true', + help = 'Flag to indicate if the test files are going to be downloaded.') + + parser.add_argument('--data_dir', + default = DLFT_DATA_DIR, + help='Directory where the test files are located or where they will be downloaded.' + ) + + args = parser.parse_args() + data_dir = args.data_dir - n_tests = args.n_tests - - root_dir = os.path.abspath(os.path.join(os.path.dirname(tierpsy.__file__), '..')) - examples_dir = os.path.join(root_dir, 'tests', 'data') - script_dir = os.path.join(root_dir, 'cmd_scripts') - - all_tests_obj = [ - GECKO_VIDEOS, - AVI_VIDEOS, - MANUAL_FEATS, - RIG_HDF5_VIDEOS, - WT2, - WORM_MOTEL - ] - all_tests = [obj(examples_dir, script_dir) for obj in all_tests_obj] - - tests_ind = [x-1 for x in n_tests] - if tests_ind: - test_to_exec = [all_tests[x] for x in tests_ind] - else: - test_to_exec = all_tests #execute all tests + + if args.download_files: + download_files(data_dir) + + if not os.path.exists(data_dir): + print('The given --data_dir "{}" does not exists. Select the directory with the valid files or use --download_files.'.format(data_dir)) + return + + tests_given = args.tests + test2run = [] + for tt in tests_given: + if tt in _available_tests: + test2run.append(tt) + else: + warnings.warn('Test "{}" is not a valid name, and it will be skiped. The valid tests are: {}'.format(tt, _available_tests_str)) - for test in test_to_exec: + if not tests_given: + print("No tests given. Please specify some valid tests {}.".format(_available_tests_str)) + return + + for test_name in test2run: + test = test_dict[test_name](args.data_dir, base_script) test.run() diff --git a/tierpsy/version.py b/tierpsy/version.py index e8c185bb..a2a51c22 100755 --- a/tierpsy/version.py +++ b/tierpsy/version.py @@ -1,17 +1,23 @@ # # -*- coding: utf-8 -*- -__version__ = '1.5.0' +__version__ = '1.5.1-beta' try: import os import subprocess - cwd = os.path.dirname(os.path.abspath(__file__)) - sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() + cwd = os.path.dirname(os.path.abspath(__file__)) + command = ['git', 'rev-parse', 'HEAD'] + sha = subprocess.check_output(command, cwd=cwd, stderr = subprocess.DEVNULL).decode('ascii').strip() __version__ += '+' + sha[:7] + except Exception: pass ''' +1.5.1-beta +- Create a conda package. This will be the new way to distribute the package. +- Merge part of the code in tierpsy-features and open-worm-analysis-toolbox to remove them as dependencies. + 1.5.0 - Bug corrections.