diff --git a/docs/source/database.rst b/docs/source/database.rst new file mode 100644 index 000000000..2d5ba36de --- /dev/null +++ b/docs/source/database.rst @@ -0,0 +1,7 @@ +******** +database +******** + +.. automodule:: jwql.database.database_interface + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 52c2fab9c..5e7a85965 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,6 +19,7 @@ API documentation :maxdepth: 1 :caption: Contents: + database.rst logging.rst monitor_filesystem.rst monitor_mast.rst diff --git a/environment.yml b/environment.yml index 806364e60..23ed66152 100644 --- a/environment.yml +++ b/environment.yml @@ -14,6 +14,7 @@ dependencies: - numpy=1.14.0 - numpydoc=0.8.0 - postgresql=9.6.6 +- psycopg2=2.7.5 - python=3.6.4 - python-dateutil=2.6.1 - pytest=3.4.2 diff --git a/jwql/database/__init__.py b/jwql/database/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/jwql/database/database_interface.py b/jwql/database/database_interface.py new file mode 100644 index 000000000..591d00906 --- /dev/null +++ b/jwql/database/database_interface.py @@ -0,0 +1,158 @@ +""" +A module to interact with the JWQL postgresql database ``jwqldb`` + +The ``load_connection()`` function within this module allows the user +to connect to the ``jwqldb`` database via the ``session``, ``base``, +and ``engine`` objects (described below). The classes within serve as +ORMs (Object-relational mappings) that define the individual tables of +the relational database. + +The ``engine`` object serves as the low-level database API and perhaps +most importantly contains dialects which allows the ``sqlalchemy`` +module to communicate with the database. + +The ``base`` object serves as a base class for class definitions. It +produces ``Table`` objects and constructs ORMs. + +The ``session`` object manages operations on ORM-mapped objects, as +construced by the base. These operations include querying, for +example. + +Authors +------- + Joe Filippazzo, Johannes Sahlmann, Matthew Bourque + +""" + +from datetime import datetime + +import pandas as pd +from sqlalchemy import Boolean +from sqlalchemy import Column +from sqlalchemy import create_engine +from sqlalchemy import DateTime +from sqlalchemy import Integer +from sqlalchemy import MetaData +from sqlalchemy import String +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm.query import Query + +from ..utils import utils + + +SETTINGS = utils.get_config() + + +# Monkey patch Query with data_frame method +@property +def data_frame(self): + """Method to return a pandas.DataFrame of the results""" + return pd.read_sql(self.statement, self.session.bind) + + +Query.data_frame = data_frame + + +def load_connection(connection_string): + """Return ``session``, ``base``, ``engine``, and ``metadata`` + objects for connecting to the ``jwqldb`` database. + + Create an ``engine`` using an given ``connection_string``. Create + a ``base`` class and ``session`` class from the ``engine``. Create + an instance of the ``session`` class. Return the ``session``, + ``base``, and ``engine`` instances. This was stolen from the + `ascql` repository. + + Parameters + ---------- + connection_string : str + A postgresql database connection string. The + connection string should take the form: + ``dialect+driver://username:password@host:port/database`` + + Returns + ------- + session : sesson object + Provides a holding zone for all objects loaded or associated + with the database. + base : base object + Provides a base class for declarative class definitions. + engine : engine object + Provides a source of database connectivity and behavior. + meta: metadata object + The connection metadata + + References + ---------- + ``ascql``: + https://github.com/spacetelescope/acsql/blob/master/acsql/database/database_interface.py + """ + engine = create_engine(connection_string, echo=False) + base = declarative_base(engine) + Session = sessionmaker(bind=engine) + session = Session() + meta = MetaData() + + return session, base, engine, meta + + +session, base, engine, meta = load_connection(SETTINGS['connection_string']) + + +class Anomaly(base): + """ORM for the anomalies table""" + # Name the table + __tablename__ = 'anomalies' + + # Define the columns + id = Column(Integer, primary_key=True, nullable=False) + filename = Column(String, nullable=False) + flag_date = Column(DateTime, nullable=False, default=datetime.now()) + bowtie = Column(Boolean, nullable=False, default=False) + snowball = Column(Boolean, nullable=False, default=False) + cosmic_ray_shower = Column(Boolean, nullable=False, default=False) + crosstalk = Column(Boolean, nullable=False, default=False) + cte_correction_error = Column(Boolean, nullable=False, default=False) + data_transfer_error = Column(Boolean, nullable=False, default=False) + detector_ghost = Column(Boolean, nullable=False, default=False) + diamond = Column(Boolean, nullable=False, default=False) + diffraction_spike = Column(Boolean, nullable=False, default=False) + dragon_breath = Column(Boolean, nullable=False, default=False) + earth_limb = Column(Boolean, nullable=False, default=False) + excessive_saturation = Column(Boolean, nullable=False, default=False) + figure8_ghost = Column(Boolean, nullable=False, default=False) + filter_ghost = Column(Boolean, nullable=False, default=False) + fringing = Column(Boolean, nullable=False, default=False) + guidestar_failure = Column(Boolean, nullable=False, default=False) + banding = Column(Boolean, nullable=False, default=False) + persistence = Column(Boolean, nullable=False, default=False) + prominent_blobs = Column(Boolean, nullable=False, default=False) + trail = Column(Boolean, nullable=False, default=False) + scattered_light = Column(Boolean, nullable=False, default=False) + other = Column(Boolean, nullable=False, default=False) + + def __repr__(self): + """Return the canonical string representation of the object""" + # Get the columns that are True + a_list = [col for col, val in self.__dict__.items() + if val is True and isinstance(val, bool)] + + txt = ('Anomaly {0.id}: {0.filename} flagged at ' + '{0.flag_date} for {1}').format(self, a_list) + + return txt + + @property + def colnames(self): + """A list of all the column names in this table""" + # Get the columns + a_list = [col for col, val in self.__dict__.items() + if isinstance(val, bool)] + + return a_list + + +if __name__ == '__main__': + + base.metadata.create_all(engine) diff --git a/notebooks/database_anomoly_table.ipynb b/notebooks/database_anomoly_table.ipynb new file mode 100644 index 000000000..6d922cf1e --- /dev/null +++ b/notebooks/database_anomoly_table.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Populating and querying the `anomalies` table in the `jwqldb`\n", + "This shows how to add records to the `anomalies` table in the `jwqldb`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Import\n", + "from jwql.database import database_interface as di" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's some dummy records to populate the `anomalies` table in the database." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "di.session.add(di.Anomaly(filename='foo1', bowtie=True))\n", + "di.session.add(di.Anomaly(filename='foo2', bowtie=True, crosstalk=True))\n", + "di.session.add(di.Anomaly(filename='foo3', snowball=True))\n", + "di.session.add(di.Anomaly(filename='foo4', bowtie=True, snowball=True))\n", + "di.session.add(di.Anomaly(filename='foo5', crosstalk=True))\n", + "di.session.add(di.Anomaly(filename='foo6', crosstalk=True, snowball=True))\n", + "di.session.commit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can query the database for all records with bowtie anomalies." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bowties = di.session.query(di.Anomaly).filter(di.Anomaly.bowtie == True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I added a method to return the result as a `pandas.DataFrame` for kicks." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | id | \n", + "filename | \n", + "flag_date | \n", + "bowtie | \n", + "snowball | \n", + "cosmic_ray_shower | \n", + "crosstalk | \n", + "cte_correction_error | \n", + "data_transfer_error | \n", + "detector_ghost | \n", + "... | \n", + "figure8_ghost | \n", + "filter_ghost | \n", + "fringing | \n", + "guidestar_failure | \n", + "banding | \n", + "persistence | \n", + "prominent_blobs | \n", + "trail | \n", + "scattered_light | \n", + "other | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1 | \n", + "foo1 | \n", + "2018-08-06 10:45:11.916114 | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
1 | \n", + "2 | \n", + "foo1 | \n", + "2018-08-06 10:45:11.916114 | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
2 | \n", + "3 | \n", + "foo2 | \n", + "2018-08-06 10:45:11.916114 | \n", + "True | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
3 | \n", + "5 | \n", + "foo4 | \n", + "2018-08-06 10:45:11.916114 | \n", + "True | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
4 rows × 25 columns
\n", + "