diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 45ae678ab4c..dda640d6767 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -229,10 +229,10 @@ def version(self): print(migration.db_version()) @args('--max_rows', type=int, metavar='', dest='max_rows', - help='Maximum number of deleted rows to archive. Defaults to 1000. ' - 'Note that this number does not include the corresponding ' - 'rows, if any, that are removed from the API database for ' - 'deleted instances.') + help='Maximum number of deleted rows to archive per table. Defaults ' + 'to 1000. Note that this number is a soft limit and does not ' + 'include the corresponding rows, if any, that are removed ' + 'from the API database for deleted instances.') @args('--before', metavar='', help=('Archive rows that have been deleted before this date. ' 'Accepts date strings in the default format output by the ' @@ -404,7 +404,10 @@ def _do_archive( 'cell1.instances': 5} :param cctxt: Cell-targeted nova.context.RequestContext if archiving across all cells - :param max_rows: Maximum number of deleted rows to archive + :param max_rows: Maximum number of deleted rows to archive per table. + Note that this number is a soft limit and does not include the + corresponding rows, if any, that are removed from the API database + for deleted instances. :param until_complete: Whether to run continuously until all deleted rows are archived :param verbose: Whether to print how many rows were archived per table @@ -417,15 +420,26 @@ def _do_archive( """ ctxt = context.get_admin_context() while True: - run, deleted_instance_uuids, total_rows_archived = \ + # table_to_rows = {table_name: number_of_rows_archived} + # deleted_instance_uuids = ['uuid1', 'uuid2', ...] + table_to_rows, deleted_instance_uuids, total_rows_archived = \ db.archive_deleted_rows( cctxt, max_rows, before=before_date, task_log=task_log) - for table_name, rows_archived in run.items(): + + for table_name, rows_archived in table_to_rows.items(): if cell_name: table_name = cell_name + '.' + table_name table_to_rows_archived.setdefault(table_name, 0) table_to_rows_archived[table_name] += rows_archived - if deleted_instance_uuids: + + # deleted_instance_uuids does not necessarily mean that any + # instances rows were archived because it is obtained by a query + # separate from the archive queries. For example, if a + # DBReferenceError was raised while processing the instances table, + # we would have skipped the table and had 0 rows archived even + # though deleted instances rows were found. + instances_archived = table_to_rows.get('instances', 0) + if deleted_instance_uuids and instances_archived: table_to_rows_archived.setdefault( 'API_DB.instance_mappings', 0) table_to_rows_archived.setdefault( @@ -448,8 +462,9 @@ def _do_archive( # If we're not archiving until there is nothing more to archive, we # have reached max_rows in this cell DB or there was nothing to - # archive. - if not until_complete or not run: + # archive. We check the values() in case we get something like + # table_to_rows = {'instances': 0} back somehow. + if not until_complete or not any(table_to_rows.values()): break if verbose: sys.stdout.write('.') diff --git a/nova/db/main/api.py b/nova/db/main/api.py index 7d24f974f91..ae6533f49cd 100644 --- a/nova/db/main/api.py +++ b/nova/db/main/api.py @@ -4407,6 +4407,9 @@ def _archive_deleted_rows_for_table( select = select.order_by(column).limit(max_rows) with conn.begin(): rows = conn.execute(select).fetchall() + + # This is a list of IDs of rows that should be archived from this table, + # limited to a length of max_rows. records = [r[0] for r in rows] # We will archive deleted rows for this table and also generate insert and @@ -4419,51 +4422,103 @@ def _archive_deleted_rows_for_table( # Keep track of any extra tablenames to number of rows that we archive by # following FK relationships. - # {tablename: extra_rows_archived} + # + # extras = {tablename: number_of_extra_rows_archived} extras = collections.defaultdict(int) - if records: - insert = shadow_table.insert().from_select( - columns, sql.select(table).where(column.in_(records)) - ).inline() - delete = table.delete().where(column.in_(records)) + + if not records: + # Nothing to archive, so return. + return rows_archived, deleted_instance_uuids, extras + + # Keep track of how many rows we accumulate for the insert+delete database + # transaction and cap it as soon as it is >= max_rows. Because we will + # archive all child rows of a parent row along with the parent at the same + # time, we end up with extra rows to archive in addition to len(records). + num_rows_in_batch = 0 + # The sequence of query statements we will execute in a batch. These are + # ordered: [child1, child1, parent1, child2, child2, child2, parent2, ...] + # Parent + child "trees" are kept together to avoid FK constraint + # violations. + statements_in_batch = [] + # The list of records in the batch. This is used for collecting deleted + # instance UUIDs in the case of the 'instances' table. + records_in_batch = [] + + # (melwitt): We will gather rows related by foreign key relationship for + # each deleted row, one at a time. We do it this way to keep track of and + # limit the total number of rows that will be archived in a single database + # transaction. In a large scale database with potentially hundreds of + # thousands of deleted rows, if we don't limit the size of the transaction + # based on max_rows, we can get into a situation where we get stuck not + # able to make much progress. The value of max_rows has to be 1) small + # enough to not exceed the database's max packet size limit or timeout with + # a deadlock but 2) large enough to make progress in an environment with a + # constant high volume of create and delete traffic. By archiving each + # parent + child rows tree one at a time, we can ensure meaningful progress + # can be made while allowing the caller to predictably control the size of + # the database transaction with max_rows. + for record in records: # Walk FK relationships and add insert/delete statements for rows that # refer to this table via FK constraints. fk_inserts and fk_deletes # will be prepended to by _get_fk_stmts if referring rows are found by # FK constraints. fk_inserts, fk_deletes = _get_fk_stmts( - metadata, conn, table, column, records) - - # NOTE(tssurya): In order to facilitate the deletion of records from - # instance_mappings, request_specs and instance_group_member tables in - # the nova_api DB, the rows of deleted instances from the instances - # table are stored prior to their deletion. Basically the uuids of the - # archived instances are queried and returned. - if tablename == "instances": - query_select = sql.select(table.c.uuid).where( - table.c.id.in_(records) - ) - with conn.begin(): - rows = conn.execute(query_select).fetchall() - deleted_instance_uuids = [r[0] for r in rows] + metadata, conn, table, column, [record]) + statements_in_batch.extend(fk_inserts + fk_deletes) + # statement to add parent row to shadow table + insert = shadow_table.insert().from_select( + columns, sql.select(table).where(column.in_([record]))).inline() + statements_in_batch.append(insert) + # statement to remove parent row from main table + delete = table.delete().where(column.in_([record])) + statements_in_batch.append(delete) - try: - # Group the insert and delete in a transaction. - with conn.begin(): - for fk_insert in fk_inserts: - conn.execute(fk_insert) - for fk_delete in fk_deletes: - result_fk_delete = conn.execute(fk_delete) - extras[fk_delete.table.name] += result_fk_delete.rowcount - conn.execute(insert) - result_delete = conn.execute(delete) - rows_archived += result_delete.rowcount - except db_exc.DBReferenceError as ex: - # A foreign key constraint keeps us from deleting some of - # these rows until we clean up a dependent table. Just - # skip this table for now; we'll come back to it later. - LOG.warning("IntegrityError detected when archiving table " - "%(tablename)s: %(error)s", - {'tablename': tablename, 'error': str(ex)}) + records_in_batch.append(record) + + # Check whether were have a full batch >= max_rows. Rows are counted as + # the number of rows that will be moved in the database transaction. + # So each insert+delete pair represents one row that will be moved. + # 1 parent + its fks + num_rows_in_batch += 1 + len(fk_inserts) + + if max_rows is not None and num_rows_in_batch >= max_rows: + break + + # NOTE(tssurya): In order to facilitate the deletion of records from + # instance_mappings, request_specs and instance_group_member tables in the + # nova_api DB, the rows of deleted instances from the instances table are + # stored prior to their deletion. Basically the uuids of the archived + # instances are queried and returned. + if tablename == "instances": + query_select = sql.select(table.c.uuid).where( + table.c.id.in_(records_in_batch)) + with conn.begin(): + rows = conn.execute(query_select).fetchall() + # deleted_instance_uuids = ['uuid1', 'uuid2', ...] + deleted_instance_uuids = [r[0] for r in rows] + + try: + # Group the insert and delete in a transaction. + with conn.begin(): + for statement in statements_in_batch: + result = conn.execute(statement) + result_tablename = statement.table.name + # Add to archived row counts if not a shadow table. + if not result_tablename.startswith(_SHADOW_TABLE_PREFIX): + if result_tablename == tablename: + # Number of tablename (parent) rows archived. + rows_archived += result.rowcount + else: + # Number(s) of child rows archived. + extras[result_tablename] += result.rowcount + + except db_exc.DBReferenceError as ex: + # A foreign key constraint keeps us from deleting some of these rows + # until we clean up a dependent table. Just skip this table for now; + # we'll come back to it later. + LOG.warning("IntegrityError detected when archiving table " + "%(tablename)s: %(error)s", + {'tablename': tablename, 'error': str(ex)}) conn.close() diff --git a/nova/tests/functional/db/test_archive.py b/nova/tests/functional/db/test_archive.py index 4a813d10a7c..1bcce9f3ee0 100644 --- a/nova/tests/functional/db/test_archive.py +++ b/nova/tests/functional/db/test_archive.py @@ -174,6 +174,50 @@ def test_archive_deleted_rows_incomplete(self): break self.assertFalse(exceptions) + def test_archive_deleted_rows_parent_child_trees_one_at_time(self): + """Test that we are archiving parent + child rows "trees" one at a time + + Previously, we archived deleted rows in batches of max_rows parents + + their child rows in a single database transaction. Doing it that way + limited how high a value of max_rows could be specified by the caller + because of the size of the database transaction it could generate. + + For example, in a large scale deployment with hundreds of thousands of + deleted rows and constant server creation and deletion activity, a + value of max_rows=1000 might exceed the database's configured maximum + packet size or timeout due to a database deadlock, forcing the operator + to use a much lower max_rows value like 100 or 50. + + And when the operator has e.g. 500,000 deleted instances rows (and + millions of deleted rows total) they are trying to archive, being + forced to use a max_rows value serveral orders of magnitude lower than + the number of rows they need to archive was a poor user experience. + + This tests that we are archiving each parent plus their child rows one + at a time while limiting the total number of rows per table in a batch + as soon as the total number of archived rows is >= max_rows. + """ + # Boot two servers and delete them, then try to archive rows. + for i in range(2): + server = self._create_server() + self._delete_server(server) + + # Use max_rows=5 to limit the archive to one complete parent + + # child rows tree. + table_to_rows, _, _ = db.archive_deleted_rows(max_rows=5) + + # We should have archived only one instance because the parent + + # child tree will have >= max_rows of 5. + self.assertEqual(1, table_to_rows['instances']) + + # Archive once more to archive the other instance (and its child rows). + table_to_rows, _, _ = db.archive_deleted_rows(max_rows=5) + self.assertEqual(1, table_to_rows['instances']) + + # There should be nothing else to archive. + _, _, total_rows_archived = db.archive_deleted_rows(max_rows=100) + self.assertEqual(0, total_rows_archived) + def _get_table_counts(self): engine = db.get_engine() conn = engine.connect() diff --git a/nova/tests/unit/cmd/test_manage.py b/nova/tests/unit/cmd/test_manage.py index 10c1a77c948..0bd4db284e7 100644 --- a/nova/tests/unit/cmd/test_manage.py +++ b/nova/tests/unit/cmd/test_manage.py @@ -501,6 +501,26 @@ def test_archive_deleted_rows_verbose_no_results(self, mock_get_all, self.assertIn('Nothing was archived.', output) self.assertEqual(0, result) + @mock.patch.object(db, 'archive_deleted_rows') + @mock.patch.object(objects.CellMappingList, 'get_all') + def test_archive_deleted_rows_deleted_instances_no_rows(self, mock_get_all, + mock_db_archive): + # Simulate a scenario where we didn't archive any rows (example: + # DBReferenceError was raised while processing the instances table) but + # the separate query for deleted_instance_uuids found rows. + mock_db_archive.return_value = ( + {}, [uuidsentinel.instance1, uuidsentinel.instance2], 0) + + result = self.commands.archive_deleted_rows(20, verbose=True) + + mock_db_archive.assert_called_once_with( + test.MatchType(context.RequestContext), 20, before=None, + task_log=False) + output = self.output.getvalue() + # If nothing was archived, there should be no purge messages + self.assertIn('Nothing was archived.', output) + self.assertEqual(0, result) + @mock.patch.object(db, 'archive_deleted_rows') @mock.patch.object(objects.RequestSpec, 'destroy_bulk') @mock.patch.object(objects.InstanceGroup, 'destroy_members_bulk') diff --git a/releasenotes/notes/db-archive-performance-degradation-3fdabc43398149b1.yaml b/releasenotes/notes/db-archive-performance-degradation-3fdabc43398149b1.yaml new file mode 100644 index 00000000000..537d67b3838 --- /dev/null +++ b/releasenotes/notes/db-archive-performance-degradation-3fdabc43398149b1.yaml @@ -0,0 +1,21 @@ +--- +fixes: + - | + `Bug #2024258`_: Fixes an issue with performance degradation archiving + databases with large numbers of foreign key related records. + + Previously, deleted rows were archived in batches of max_rows parents + + their child rows in a single database transaction. It limited how high + a value of max_rows could be specified by the user because of the size of + the database transaction it could generate. Symptoms of the behavior were + exceeding the maximum configured packet size of the database or timing out + due to a deadlock. + + The behavior has been changed to archive batches of complete parent + + child rows trees while limiting each batch when it has reached >= max_rows + records. This allows the size of the database transaction to be controlled + by the user and enables more rows to be archived per invocation of + ``nova-manage db archive_deleted_rows`` when there are a large number of + foreign key related records. + + .. _Bug #2024258: https://bugs.launchpad.net/nova/+bug/2024258