diff --git a/docs/source/database.rst b/docs/source/database.rst new file mode 100644 index 000000000..2d5ba36de --- /dev/null +++ b/docs/source/database.rst @@ -0,0 +1,7 @@ +******** +database +******** + +.. automodule:: jwql.database.database_interface + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 52c2fab9c..5e7a85965 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,6 +19,7 @@ API documentation :maxdepth: 1 :caption: Contents: + database.rst logging.rst monitor_filesystem.rst monitor_mast.rst diff --git a/environment.yml b/environment.yml index 806364e60..23ed66152 100644 --- a/environment.yml +++ b/environment.yml @@ -14,6 +14,7 @@ dependencies: - numpy=1.14.0 - numpydoc=0.8.0 - postgresql=9.6.6 +- psycopg2=2.7.5 - python=3.6.4 - python-dateutil=2.6.1 - pytest=3.4.2 diff --git a/jwql/database/__init__.py b/jwql/database/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/jwql/database/database_interface.py b/jwql/database/database_interface.py new file mode 100644 index 000000000..591d00906 --- /dev/null +++ b/jwql/database/database_interface.py @@ -0,0 +1,158 @@ +""" +A module to interact with the JWQL postgresql database ``jwqldb`` + +The ``load_connection()`` function within this module allows the user +to connect to the ``jwqldb`` database via the ``session``, ``base``, +and ``engine`` objects (described below). The classes within serve as +ORMs (Object-relational mappings) that define the individual tables of +the relational database. + +The ``engine`` object serves as the low-level database API and perhaps +most importantly contains dialects which allows the ``sqlalchemy`` +module to communicate with the database. + +The ``base`` object serves as a base class for class definitions. It +produces ``Table`` objects and constructs ORMs. + +The ``session`` object manages operations on ORM-mapped objects, as +construced by the base. These operations include querying, for +example. + +Authors +------- + Joe Filippazzo, Johannes Sahlmann, Matthew Bourque + +""" + +from datetime import datetime + +import pandas as pd +from sqlalchemy import Boolean +from sqlalchemy import Column +from sqlalchemy import create_engine +from sqlalchemy import DateTime +from sqlalchemy import Integer +from sqlalchemy import MetaData +from sqlalchemy import String +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm.query import Query + +from ..utils import utils + + +SETTINGS = utils.get_config() + + +# Monkey patch Query with data_frame method +@property +def data_frame(self): + """Method to return a pandas.DataFrame of the results""" + return pd.read_sql(self.statement, self.session.bind) + + +Query.data_frame = data_frame + + +def load_connection(connection_string): + """Return ``session``, ``base``, ``engine``, and ``metadata`` + objects for connecting to the ``jwqldb`` database. + + Create an ``engine`` using an given ``connection_string``. Create + a ``base`` class and ``session`` class from the ``engine``. Create + an instance of the ``session`` class. Return the ``session``, + ``base``, and ``engine`` instances. This was stolen from the + `ascql` repository. + + Parameters + ---------- + connection_string : str + A postgresql database connection string. The + connection string should take the form: + ``dialect+driver://username:password@host:port/database`` + + Returns + ------- + session : sesson object + Provides a holding zone for all objects loaded or associated + with the database. + base : base object + Provides a base class for declarative class definitions. + engine : engine object + Provides a source of database connectivity and behavior. + meta: metadata object + The connection metadata + + References + ---------- + ``ascql``: + https://github.com/spacetelescope/acsql/blob/master/acsql/database/database_interface.py + """ + engine = create_engine(connection_string, echo=False) + base = declarative_base(engine) + Session = sessionmaker(bind=engine) + session = Session() + meta = MetaData() + + return session, base, engine, meta + + +session, base, engine, meta = load_connection(SETTINGS['connection_string']) + + +class Anomaly(base): + """ORM for the anomalies table""" + # Name the table + __tablename__ = 'anomalies' + + # Define the columns + id = Column(Integer, primary_key=True, nullable=False) + filename = Column(String, nullable=False) + flag_date = Column(DateTime, nullable=False, default=datetime.now()) + bowtie = Column(Boolean, nullable=False, default=False) + snowball = Column(Boolean, nullable=False, default=False) + cosmic_ray_shower = Column(Boolean, nullable=False, default=False) + crosstalk = Column(Boolean, nullable=False, default=False) + cte_correction_error = Column(Boolean, nullable=False, default=False) + data_transfer_error = Column(Boolean, nullable=False, default=False) + detector_ghost = Column(Boolean, nullable=False, default=False) + diamond = Column(Boolean, nullable=False, default=False) + diffraction_spike = Column(Boolean, nullable=False, default=False) + dragon_breath = Column(Boolean, nullable=False, default=False) + earth_limb = Column(Boolean, nullable=False, default=False) + excessive_saturation = Column(Boolean, nullable=False, default=False) + figure8_ghost = Column(Boolean, nullable=False, default=False) + filter_ghost = Column(Boolean, nullable=False, default=False) + fringing = Column(Boolean, nullable=False, default=False) + guidestar_failure = Column(Boolean, nullable=False, default=False) + banding = Column(Boolean, nullable=False, default=False) + persistence = Column(Boolean, nullable=False, default=False) + prominent_blobs = Column(Boolean, nullable=False, default=False) + trail = Column(Boolean, nullable=False, default=False) + scattered_light = Column(Boolean, nullable=False, default=False) + other = Column(Boolean, nullable=False, default=False) + + def __repr__(self): + """Return the canonical string representation of the object""" + # Get the columns that are True + a_list = [col for col, val in self.__dict__.items() + if val is True and isinstance(val, bool)] + + txt = ('Anomaly {0.id}: {0.filename} flagged at ' + '{0.flag_date} for {1}').format(self, a_list) + + return txt + + @property + def colnames(self): + """A list of all the column names in this table""" + # Get the columns + a_list = [col for col, val in self.__dict__.items() + if isinstance(val, bool)] + + return a_list + + +if __name__ == '__main__': + + base.metadata.create_all(engine) diff --git a/notebooks/database_anomoly_table.ipynb b/notebooks/database_anomoly_table.ipynb new file mode 100644 index 000000000..6d922cf1e --- /dev/null +++ b/notebooks/database_anomoly_table.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Populating and querying the `anomalies` table in the `jwqldb`\n", + "This shows how to add records to the `anomalies` table in the `jwqldb`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Import\n", + "from jwql.database import database_interface as di" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's some dummy records to populate the `anomalies` table in the database." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "di.session.add(di.Anomaly(filename='foo1', bowtie=True))\n", + "di.session.add(di.Anomaly(filename='foo2', bowtie=True, crosstalk=True))\n", + "di.session.add(di.Anomaly(filename='foo3', snowball=True))\n", + "di.session.add(di.Anomaly(filename='foo4', bowtie=True, snowball=True))\n", + "di.session.add(di.Anomaly(filename='foo5', crosstalk=True))\n", + "di.session.add(di.Anomaly(filename='foo6', crosstalk=True, snowball=True))\n", + "di.session.commit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can query the database for all records with bowtie anomalies." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bowties = di.session.query(di.Anomaly).filter(di.Anomaly.bowtie == True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I added a method to return the result as a `pandas.DataFrame` for kicks." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfilenameflag_datebowtiesnowballcosmic_ray_showercrosstalkcte_correction_errordata_transfer_errordetector_ghost...figure8_ghostfilter_ghostfringingguidestar_failurebandingpersistenceprominent_blobstrailscattered_lightother
01foo12018-08-06 10:45:11.916114TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
12foo12018-08-06 10:45:11.916114TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
23foo22018-08-06 10:45:11.916114TrueFalseFalseTrueFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
35foo42018-08-06 10:45:11.916114TrueTrueFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", + "

4 rows × 25 columns

\n", + "
" + ], + "text/plain": [ + " id filename flag_date bowtie snowball \\\n", + "0 1 foo1 2018-08-06 10:45:11.916114 True False \n", + "1 2 foo1 2018-08-06 10:45:11.916114 True False \n", + "2 3 foo2 2018-08-06 10:45:11.916114 True False \n", + "3 5 foo4 2018-08-06 10:45:11.916114 True True \n", + "\n", + " cosmic_ray_shower crosstalk cte_correction_error data_transfer_error \\\n", + "0 False False False False \n", + "1 False False False False \n", + "2 False True False False \n", + "3 False False False False \n", + "\n", + " detector_ghost ... figure8_ghost filter_ghost fringing \\\n", + "0 False ... False False False \n", + "1 False ... False False False \n", + "2 False ... False False False \n", + "3 False ... False False False \n", + "\n", + " guidestar_failure banding persistence prominent_blobs trail \\\n", + "0 False False False False False \n", + "1 False False False False False \n", + "2 False False False False False \n", + "3 False False False False False \n", + "\n", + " scattered_light other \n", + "0 False False \n", + "1 False False \n", + "2 False False \n", + "3 False False \n", + "\n", + "[4 rows x 25 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bowties.data_frame()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And a print statement for a quick summary." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Anomaly 3: foo2 flagged at 2018-08-06 10:45:11.916114 for ['crosstalk', 'bowtie']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bowties[2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's all the columns in the `anomalies` table." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['data_transfer_error',\n", + " 'diffraction_spike',\n", + " 'crosstalk',\n", + " 'trail',\n", + " 'prominent_blobs',\n", + " 'bowtie',\n", + " 'scattered_light',\n", + " 'persistence',\n", + " 'figure8_ghost',\n", + " 'detector_ghost',\n", + " 'banding',\n", + " 'other',\n", + " 'guidestar_failure',\n", + " 'excessive_saturation',\n", + " 'snowball',\n", + " 'cosmic_ray_shower',\n", + " 'filter_ghost',\n", + " 'dragon_breath',\n", + " 'diamond',\n", + " 'earth_limb',\n", + " 'cte_correction_error',\n", + " 'fringing']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bowties[2].colnames" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [conda env:astroconda]", + "language": "python", + "name": "conda-env-astroconda-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}