From b727907a120e20a1c298ae0a9135e04d8c2eb04a Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Wed, 15 May 2024 23:39:47 +0100 Subject: [PATCH] Add Exception information --- docs/api.rst | 23 +++++++++++- docs/demo_error.py | 9 +++++ docs/etl_functions/copy.rst | 17 +++++++-- docs/etl_functions/error_handling.rst | 51 +++++++++++++++++++++++++-- 4 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 docs/demo_error.py diff --git a/docs/api.rst b/docs/api.rst index 997daf2..62b7a9d 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -9,7 +9,7 @@ etlhelper etlhelper.row_factories ------------------------ +^^^^^^^^^^^^^^^^^^^^^^^ .. automodule:: etlhelper.row_factories :members: @@ -22,4 +22,25 @@ DB Helpers ^^^^^^^^^^^ .. automodule:: etlhelper.db_helpers +.. autoclass:: DbHelper +.. autoclass:: SQLiteDbHelper +.. autoclass:: PostgresDbHelper +.. autoclass:: OracleDbHelper +.. autoclass:: MSSQLDbHelper + +.. _exceptions: + +Exceptions +^^^^^^^^^^^ + +.. automodule:: etlhelper.exceptions +.. autoclass:: etlhelper.exceptions.ETLHelperError +.. autoclass:: etlhelper.exceptions.ETLHelperConnectionError +.. autoclass:: etlhelper.exceptions.ETLHelperQueryError +.. autoclass:: etlhelper.exceptions.ETLHelperDbParamsError +.. autoclass:: etlhelper.exceptions.ETLHelperExtractError +.. autoclass:: etlhelper.exceptions.ETLHelperInsertError +.. autoclass:: etlhelper.exceptions.ETLHelperAbort +.. autoclass:: etlhelper.exceptions.ETLHelperHelperError +.. autoclass:: etlhelper.exceptions.ETLHelperBadIdentifierError diff --git a/docs/demo_error.py b/docs/demo_error.py new file mode 100644 index 0000000..ed3cb11 --- /dev/null +++ b/docs/demo_error.py @@ -0,0 +1,9 @@ +"""ETL Helper script to demonstrate an extract error.""" +import sqlite3 +import etlhelper as etl + +db_file = "igneous_rocks.db" +select_sql = "SELECT * FROM bad_table" + +with sqlite3.connect(db_file) as conn: + rows = etl.fetchall(select_sql, conn) diff --git a/docs/etl_functions/copy.rst b/docs/etl_functions/copy.rst index 1e548fd..b99d541 100644 --- a/docs/etl_functions/copy.rst +++ b/docs/etl_functions/copy.rst @@ -75,15 +75,26 @@ auto-generated values via the INSERT query. GROUP BY customer_id """ + # This insert query uses positional parameters, so a namedtuple_row_factory + # is used. insert_sql = """ - INSERT INTO dest (customer_id, total_amount, loaded_by, load_time) - VALUES (%s, %s, current_user, now()) + INSERT INTO dest ( + customer_id, + total_amount, + loaded_by, + load_time) + VALUES ( + %s, + %s, + current_user, + now() + ) """ with ORACLEDB.connect("ORA_PASSWORD") as src_conn: with POSTGRESDB.connect("PG_PASSWORD") as dest_conn: copy_rows(select_sql, src_conn, insert_sql, dest_conn, - row_factory=namedtuple_row_factory) # insert_sql used positional parameters + row_factory=namedtuple_row_factory) ``parameters`` can be passed to the SELECT query as before and the ``commit_chunks``, ``chunk_size`` and ``on_error`` options can be set. diff --git a/docs/etl_functions/error_handling.rst b/docs/etl_functions/error_handling.rst index 5d2db4e..b839409 100644 --- a/docs/etl_functions/error_handling.rst +++ b/docs/etl_functions/error_handling.rst @@ -1,9 +1,37 @@ Error Handling ^^^^^^^^^^^^^^ -This section describes exception classes and on_error functions. +This section describes Exception classes, ``on_error`` functions and error +handling via SQL. -logged errors + +ETLHelperError +-------------- + +ETL Helper has a :ref:`variety of Exception classes `, all of which are subclasses +of the :class:`ETLHelperError ` base class. + +To aid debugging, +the :class:`ETLHelperQueryError `, +:class:`ETLHelperExtractError ` and +:class:`ETLHelperInsertError ` +classes print the SQL query and the required paramstyle as well as the error +message returned by the database. + +.. literalinclude:: ../demo_error.py + :language: python + +The output is: + +.. code:: bash + + etlhelper.exceptions.ETLHelperExtractError: SQL query raised an error. + + SELECT * FROM bad_table + + Required paramstyle: qmark + + no such table: bad_table also handling errors in SQL e.g. ON CONFLICT @@ -61,4 +89,21 @@ The IDs of failed rows can be written to a file. ``executemany``, ``load``, ``copy_rows`` and ``copy_table_rows`` can all take an ``on_error`` parameter. They each return a tuple containing the -number of rows processed and the number of rows that failed. \ No newline at end of file +number of rows processed and the number of rows that failed. + + +Error handling via SQL +---------------------- + +The ``on_error`` functions allow individual failed rows to be processed, +however this flexibility can come at the expense of speed. +Each chunk of data that contains a bad row will be retried on a row-by-row +basis. + +Databases also have methods for handling errors e.g. duplicate primary keys +using SQL. +By customising an INSERT query (which can be programmatically generated with +:func:`generate_insert_query() `) the database +can be instructed how to process such rows. + +TODO: example script of ON CONFLICT ignore