From 369db2eb071ffc9c8f61bb29d5b8eb8282a43c09 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 5 Jul 2023 13:47:18 +0200 Subject: [PATCH 01/22] Base setup for postgres --- docker-compose.yml | 14 ++++++++ requirements/common.in | 1 + requirements/common.txt | 64 +++++++++++++++++++++++++++++++++++ treeherder/config/settings.py | 15 +++++--- 4 files changed, 89 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ddfc557e8bd..34bfd4ef049 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -83,6 +83,19 @@ services: - '3306:3306' command: --character-set-server=utf8 --collation-server=utf8_bin + postgres: + container_name: postgres + # https://hub.docker.com/r/library/postgres/ + image: postgres:15 + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=mozilla1234 + - POSTGRES_DB=treeherder + volumes: + - postgres_data:/var/lib/postgresql/data + ports: + - '5432:5432' + redis: container_name: redis # https://hub.docker.com/_/redis/ @@ -144,3 +157,4 @@ services: volumes: # TODO: Experiment with using tmpfs when testing, to speed up database-using Python tests. mysql_data: {} + postgres_data: {} diff --git a/requirements/common.in b/requirements/common.in index 5019f360dd2..f490477e831 100644 --- a/requirements/common.in +++ b/requirements/common.in @@ -9,6 +9,7 @@ newrelic==8.8.0 certifi==2023.5.7 mysqlclient==2.1.1 # Required by Django +psycopg2-binary==2.9.6 jsonschema==4.17.3 # import jsonschema djangorestframework==3.14.0 # Imported as rest_framework diff --git a/requirements/common.txt b/requirements/common.txt index 1bad225e357..802e5719c0d 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -831,6 +831,70 @@ prompt-toolkit==3.0.39 \ --hash=sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac \ --hash=sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88 # via click-repl +psycopg2-binary==2.9.6 \ + --hash=sha256:02c0f3757a4300cf379eb49f543fb7ac527fb00144d39246ee40e1df684ab514 \ + --hash=sha256:02c6e3cf3439e213e4ee930308dc122d6fb4d4bea9aef4a12535fbd605d1a2fe \ + --hash=sha256:0645376d399bfd64da57148694d78e1f431b1e1ee1054872a5713125681cf1be \ + --hash=sha256:0892ef645c2fabb0c75ec32d79f4252542d0caec1d5d949630e7d242ca4681a3 \ + --hash=sha256:0d236c2825fa656a2d98bbb0e52370a2e852e5a0ec45fc4f402977313329174d \ + --hash=sha256:0e0f754d27fddcfd74006455b6e04e6705d6c31a612ec69ddc040a5468e44b4e \ + --hash=sha256:15e2ee79e7cf29582ef770de7dab3d286431b01c3bb598f8e05e09601b890081 \ + --hash=sha256:1876843d8e31c89c399e31b97d4b9725a3575bb9c2af92038464231ec40f9edb \ + --hash=sha256:1f64dcfb8f6e0c014c7f55e51c9759f024f70ea572fbdef123f85318c297947c \ + --hash=sha256:2ab652e729ff4ad76d400df2624d223d6e265ef81bb8aa17fbd63607878ecbee \ + --hash=sha256:30637a20623e2a2eacc420059be11527f4458ef54352d870b8181a4c3020ae6b \ + --hash=sha256:34b9ccdf210cbbb1303c7c4db2905fa0319391bd5904d32689e6dd5c963d2ea8 \ + --hash=sha256:38601cbbfe600362c43714482f43b7c110b20cb0f8172422c616b09b85a750c5 \ + --hash=sha256:441cc2f8869a4f0f4bb408475e5ae0ee1f3b55b33f350406150277f7f35384fc \ + --hash=sha256:498807b927ca2510baea1b05cc91d7da4718a0f53cb766c154c417a39f1820a0 \ + --hash=sha256:4ac30da8b4f57187dbf449294d23b808f8f53cad6b1fc3623fa8a6c11d176dd0 \ + --hash=sha256:4c727b597c6444a16e9119386b59388f8a424223302d0c06c676ec8b4bc1f963 \ + --hash=sha256:4d67fbdaf177da06374473ef6f7ed8cc0a9dc640b01abfe9e8a2ccb1b1402c1f \ + --hash=sha256:4dfb4be774c4436a4526d0c554af0cc2e02082c38303852a36f6456ece7b3503 \ + --hash=sha256:4ea29fc3ad9d91162c52b578f211ff1c931d8a38e1f58e684c45aa470adf19e2 \ + --hash=sha256:51537e3d299be0db9137b321dfb6a5022caaab275775680e0c3d281feefaca6b \ + --hash=sha256:61b047a0537bbc3afae10f134dc6393823882eb263088c271331602b672e52e9 \ + --hash=sha256:6460c7a99fc939b849431f1e73e013d54aa54293f30f1109019c56a0b2b2ec2f \ + --hash=sha256:65bee1e49fa6f9cf327ce0e01c4c10f39165ee76d35c846ade7cb0ec6683e303 \ + --hash=sha256:65c07febd1936d63bfde78948b76cd4c2a411572a44ac50719ead41947d0f26b \ + --hash=sha256:71f14375d6f73b62800530b581aed3ada394039877818b2d5f7fc77e3bb6894d \ + --hash=sha256:7a40c00dbe17c0af5bdd55aafd6ff6679f94a9be9513a4c7e071baf3d7d22a70 \ + --hash=sha256:7e13a5a2c01151f1208d5207e42f33ba86d561b7a89fca67c700b9486a06d0e2 \ + --hash=sha256:7f0438fa20fb6c7e202863e0d5ab02c246d35efb1d164e052f2f3bfe2b152bd0 \ + --hash=sha256:8122cfc7cae0da9a3077216528b8bb3629c43b25053284cc868744bfe71eb141 \ + --hash=sha256:8338a271cb71d8da40b023a35d9c1e919eba6cbd8fa20a54b748a332c355d896 \ + --hash=sha256:84d2222e61f313c4848ff05353653bf5f5cf6ce34df540e4274516880d9c3763 \ + --hash=sha256:8a6979cf527e2603d349a91060f428bcb135aea2be3201dff794813256c274f1 \ + --hash=sha256:8a76e027f87753f9bd1ab5f7c9cb8c7628d1077ef927f5e2446477153a602f2c \ + --hash=sha256:964b4dfb7c1c1965ac4c1978b0f755cc4bd698e8aa2b7667c575fb5f04ebe06b \ + --hash=sha256:9972aad21f965599ed0106f65334230ce826e5ae69fda7cbd688d24fa922415e \ + --hash=sha256:a8c28fd40a4226b4a84bdf2d2b5b37d2c7bd49486b5adcc200e8c7ec991dfa7e \ + --hash=sha256:ae102a98c547ee2288637af07393dd33f440c25e5cd79556b04e3fca13325e5f \ + --hash=sha256:af335bac6b666cc6aea16f11d486c3b794029d9df029967f9938a4bed59b6a19 \ + --hash=sha256:afe64e9b8ea66866a771996f6ff14447e8082ea26e675a295ad3bdbffdd72afb \ + --hash=sha256:b4b24f75d16a89cc6b4cdff0eb6a910a966ecd476d1e73f7ce5985ff1328e9a6 \ + --hash=sha256:b6c8288bb8a84b47e07013bb4850f50538aa913d487579e1921724631d02ea1b \ + --hash=sha256:b83456c2d4979e08ff56180a76429263ea254c3f6552cd14ada95cff1dec9bb8 \ + --hash=sha256:bfb13af3c5dd3a9588000910178de17010ebcccd37b4f9794b00595e3a8ddad3 \ + --hash=sha256:c3dba7dab16709a33a847e5cd756767271697041fbe3fe97c215b1fc1f5c9848 \ + --hash=sha256:c48d8f2db17f27d41fb0e2ecd703ea41984ee19362cbce52c097963b3a1b4365 \ + --hash=sha256:c7e62ab8b332147a7593a385d4f368874d5fe4ad4e341770d4983442d89603e3 \ + --hash=sha256:c83a74b68270028dc8ee74d38ecfaf9c90eed23c8959fca95bd703d25b82c88e \ + --hash=sha256:cacbdc5839bdff804dfebc058fe25684cae322987f7a38b0168bc1b2df703fb1 \ + --hash=sha256:cf4499e0a83b7b7edcb8dabecbd8501d0d3a5ef66457200f77bde3d210d5debb \ + --hash=sha256:cfec476887aa231b8548ece2e06d28edc87c1397ebd83922299af2e051cf2827 \ + --hash=sha256:d26e0342183c762de3276cca7a530d574d4e25121ca7d6e4a98e4f05cb8e4df7 \ + --hash=sha256:d4e6036decf4b72d6425d5b29bbd3e8f0ff1059cda7ac7b96d6ac5ed34ffbacd \ + --hash=sha256:d57c3fd55d9058645d26ae37d76e61156a27722097229d32a9e73ed54819982a \ + --hash=sha256:dfa74c903a3c1f0d9b1c7e7b53ed2d929a4910e272add6700c38f365a6002820 \ + --hash=sha256:e3ed340d2b858d6e6fb5083f87c09996506af483227735de6964a6100b4e6a54 \ + --hash=sha256:e78e6e2a00c223e164c417628572a90093c031ed724492c763721c2e0bc2a8df \ + --hash=sha256:e9182eb20f41417ea1dd8e8f7888c4d7c6e805f8a7c98c1081778a3da2bee3e4 \ + --hash=sha256:e99e34c82309dd78959ba3c1590975b5d3c862d6f279f843d47d26ff89d7d7e1 \ + --hash=sha256:f6a88f384335bb27812293fdb11ac6aee2ca3f51d3c7820fe03de0a304ab6249 \ + --hash=sha256:f81e65376e52f03422e1fb475c9514185669943798ed019ac50410fb4c4df232 \ + --hash=sha256:ffe9dc0a884a8848075e576c1de0290d85a533a9f6e9c4e564f19adf8f6e54a7 + # via -r requirements/common.in pyasn1==0.5.0 \ --hash=sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57 \ --hash=sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde diff --git a/treeherder/config/settings.py b/treeherder/config/settings.py index cff9686cb35..de520b8fbd6 100644 --- a/treeherder/config/settings.py +++ b/treeherder/config/settings.py @@ -142,10 +142,15 @@ # We're intentionally not using django-environ's query string options feature, # since it hides configuration outside of the repository, plus could lead to # drift between environments. -for alias in DATABASES: +for alias, db in DATABASES.items(): # Persist database connections for 5 minutes, to avoid expensive reconnects. - DATABASES[alias]['CONN_MAX_AGE'] = 300 - DATABASES[alias]['OPTIONS'] = { + db['CONN_MAX_AGE'] = 300 + + # These options are only valid for mysql + if db['ENGINE'] != 'django.db.backends.mysql': + continue + + db['OPTIONS'] = { # Override Django's default connection charset of 'utf8', otherwise it's # still not possible to insert non-BMP unicode into utf8mb4 tables. 'charset': 'utf8mb4', @@ -157,8 +162,8 @@ } # For use of the stage replica, use the 'deployment/gcp/ca-cert.pem' path for use in your local env file # or pass the variable to docker-compose command; additional certs are in the deployment directory. - if connection_should_use_tls(DATABASES[alias]['HOST']): - DATABASES[alias]['OPTIONS']['ssl'] = { + if connection_should_use_tls(db['HOST']): + db['OPTIONS']['ssl'] = { 'ca': env("TLS_CERT_PATH", default=None), } From d19121de9a340436bcef6eff224ddaa231a9aca9 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 5 Jul 2023 16:35:42 +0200 Subject: [PATCH 02/22] Support Postgres in migrations --- ...hed_0022_modify_bugscache_and_bugjobmap.py | 98 ++++++++++++------- .../0036_cascade_perf_datum_deletion.py | 10 +- .../migrations/0044_perfdatum_bigint_fk.py | 10 +- ...estore_perf_multicommitdatum_and_schema.py | 5 + ...046_restore_cascade_perf_datum_deletion.py | 10 +- 5 files changed, 88 insertions(+), 45 deletions(-) diff --git a/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py b/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py index b19862e4dfd..8f8a14b37ae 100644 --- a/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py +++ b/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py @@ -7,6 +7,65 @@ from django.db import migrations, models +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + EXTRA_MIGRATIONS = [ + # Manually created migrations. + # Since Django doesn't natively support creating FULLTEXT indices. + migrations.RunSQL( + [ + # Suppress the MySQL warning "InnoDB rebuilding table to add column FTS_DOC_ID": + # https://dev.mysql.com/doc/refman/5.7/en/innodb-fulltext-index.html#innodb-fulltext-index-docid + # The table is empty when the index is added, so we don't care about it being rebuilt, + # and there isn't a better way to add the index without Django FULLTEXT support. + 'SET @old_max_error_count=@@max_error_count, max_error_count=0;', + 'CREATE FULLTEXT INDEX idx_summary ON bugscache (summary);', + 'SET max_error_count=@old_max_error_count;', + ], + reverse_sql=['ALTER TABLE bugscache DROP INDEX idx_summary;'], + ), + # Since Django doesn't natively support creating composite prefix indicies for Mysql + migrations.RunSQL( + [ + 'CREATE INDEX failure_line_test_idx ON failure_line (test(50), subtest(25), status, expected, created);', + 'CREATE INDEX failure_line_signature_test_idx ON failure_line (signature(25), test(50), created);', + ], + reverse_sql=[ + 'DROP INDEX failure_line_test_idx ON failure_line;', + 'DROP INDEX failure_line_signature_test_idx ON failure_line;', + ], + state_operations=[ + migrations.AlterIndexTogether( + name='failureline', + index_together=set( + [ + ('test', 'subtest', 'status', 'expected', 'created'), + ('job_guid', 'repository'), + ('signature', 'test', 'created'), + ] + ), + ), + ], + ), + ] +else: + # On postgres we can use standard migrations + EXTRA_MIGRATIONS = [ + migrations.AlterIndexTogether( + name='failureline', + index_together=set( + [ + ('test', 'subtest', 'status', 'expected', 'created'), + ('job_guid', 'repository'), + ('signature', 'test', 'created'), + ] + ), + ), + migrations.AddIndex( + model_name='bugscache', index=models.Index('summary', name='idx_summary') + ), + ] + + class Migration(migrations.Migration): initial = True @@ -965,41 +1024,4 @@ class Migration(migrations.Migration): name='bugjobmap', unique_together=set([('job', 'bug_id')]), ), - # Manually created migrations. - # Since Django doesn't natively support creating FULLTEXT indices. - migrations.RunSQL( - [ - # Suppress the MySQL warning "InnoDB rebuilding table to add column FTS_DOC_ID": - # https://dev.mysql.com/doc/refman/5.7/en/innodb-fulltext-index.html#innodb-fulltext-index-docid - # The table is empty when the index is added, so we don't care about it being rebuilt, - # and there isn't a better way to add the index without Django FULLTEXT support. - 'SET @old_max_error_count=@@max_error_count, max_error_count=0;', - 'CREATE FULLTEXT INDEX idx_summary ON bugscache (summary);', - 'SET max_error_count=@old_max_error_count;', - ], - reverse_sql=['ALTER TABLE bugscache DROP INDEX idx_summary;'], - ), - # Since Django doesn't natively support creating composite prefix indicies. - migrations.RunSQL( - [ - 'CREATE INDEX failure_line_test_idx ON failure_line (test(50), subtest(25), status, expected, created);', - 'CREATE INDEX failure_line_signature_test_idx ON failure_line (signature(25), test(50), created);', - ], - reverse_sql=[ - 'DROP INDEX failure_line_test_idx ON failure_line;', - 'DROP INDEX failure_line_signature_test_idx ON failure_line;', - ], - state_operations=[ - migrations.AlterIndexTogether( - name='failureline', - index_together=set( - [ - ('test', 'subtest', 'status', 'expected', 'created'), - ('job_guid', 'repository'), - ('signature', 'test', 'created'), - ] - ), - ), - ], - ), - ] + ] + EXTRA_MIGRATIONS diff --git a/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py b/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py index f904c178339..431a0fd271e 100644 --- a/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py +++ b/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py @@ -1,9 +1,15 @@ # Generated by Django 3.0.8 on 2020-12-11 14:42 from django.db import migrations +from django.conf import settings MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan' +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + DROP_TYPE = 'FOREIGN_KEY' +else: + DROP_TYPE = 'CONSTRAINT' + class Migration(migrations.Migration): dependencies = [ @@ -15,7 +21,7 @@ class Migration(migrations.Migration): # add ON DELETE CASCADE at database level [ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;', @@ -23,7 +29,7 @@ class Migration(migrations.Migration): # put back the non-CASCADE foreign key constraint reverse_sql=[ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID);', diff --git a/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py b/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py index c76fa2043d3..bd7d53537ec 100644 --- a/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py +++ b/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py @@ -3,6 +3,12 @@ to update the column and fake this migration. Migration perf.0045 will restore a valid django's schema. """ from django.db import migrations, connection +from django.conf import settings + +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + QUERY = "ALTER TABLE performance_datum MODIFY COLUMN id BIGINT(20) NOT NULL AUTO_INCREMENT" +else: + QUERY = "ALTER TABLE performance_datum ALTER COLUMN id TYPE BIGINT using id::bigint" def alter_perfdatum_pk(apps, schema_editor): @@ -18,9 +24,7 @@ def alter_perfdatum_pk(apps, schema_editor): if pursue.lower() not in ('', 'y', 'yes'): raise Exception("Aborting…") with connection.cursor() as cursor: - cursor.execute( - "ALTER TABLE performance_datum MODIFY COLUMN id BIGINT(20) NOT NULL AUTO_INCREMENT" - ) + cursor.execute(QUERY) return diff --git a/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py b/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py index e6e1289320c..72923dee797 100644 --- a/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py +++ b/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py @@ -1,4 +1,5 @@ from django.db import migrations, models, connection +from django.conf import settings import django.db.models.deletion from django.db.utils import DatabaseError @@ -6,6 +7,10 @@ def check_perfdatum_pk(apps, schema_editor): """Ensure performance_datum FK has been updated to bigint type""" + # Not needed on postgresql + if settings.DATABASES['default']['ENGINE'] != 'django.db.backends.mysql': + return + with connection.cursor() as cursor: cursor.execute( "SELECT COLUMN_TYPE from INFORMATION_SCHEMA.COLUMNS WHERE " diff --git a/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py b/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py index 2dacc3b1533..1215f6d5dde 100644 --- a/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py +++ b/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py @@ -2,9 +2,15 @@ It restores the DB side CASCADE deletion behavior for perf_multicommitdatum table toward performance_datum """ from django.db import migrations +from django.conf import settings MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan' +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + DROP_TYPE = 'FOREIGN_KEY' +else: + DROP_TYPE = 'CONSTRAINT' + class Migration(migrations.Migration): dependencies = [ @@ -16,7 +22,7 @@ class Migration(migrations.Migration): # add ON DELETE CASCADE at database level [ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;', @@ -24,7 +30,7 @@ class Migration(migrations.Migration): # put back the non-CASCADE foreign key constraint reverse_sql=[ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID);', From e7ae669f3a46539a2a1025dd5ec5356b259ecc0b Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 5 Jul 2023 16:43:31 +0200 Subject: [PATCH 03/22] Run unit tests on CI with postgres --- .circleci/config.yml | 1 + tox.ini | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index fb19a9b0fc7..d558307576e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -67,6 +67,7 @@ jobs: pip install --upgrade pip pip install tox tox -e docker + tox -e docker-postgres - codecov/upload test-docker-build: diff --git a/tox.ini b/tox.ini index 35ef1568a20..e3a6fad6d05 100644 --- a/tox.ini +++ b/tox.ini @@ -49,5 +49,13 @@ commands_pre = commands = docker-compose run -e TREEHERDER_DEBUG=False backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" +[testenv:docker-postgres] +whitelist_externals= + docker-compose +commands_pre = + docker-compose build +commands = + docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@localhost/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" + [flake8] per-file-ignores = treeherder/model/models.py:E402 From ed9c63ea35fb81eff9da452f89df7aab046837ee Mon Sep 17 00:00:00 2001 From: EvaBardou Date: Thu, 6 Jul 2023 11:54:14 +0200 Subject: [PATCH 04/22] Fix typo --- treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py | 2 +- .../perf/migrations/0046_restore_cascade_perf_datum_deletion.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py b/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py index 431a0fd271e..f020966bdc0 100644 --- a/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py +++ b/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py @@ -6,7 +6,7 @@ MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan' if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': - DROP_TYPE = 'FOREIGN_KEY' + DROP_TYPE = 'FOREIGN KEY' else: DROP_TYPE = 'CONSTRAINT' diff --git a/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py b/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py index 1215f6d5dde..4fbdcb76967 100644 --- a/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py +++ b/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py @@ -7,7 +7,7 @@ MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan' if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': - DROP_TYPE = 'FOREIGN_KEY' + DROP_TYPE = 'FOREIGN KEY' else: DROP_TYPE = 'CONSTRAINT' From 9fc019d8bcd9fb3187a016cea6617d18a6cb4844 Mon Sep 17 00:00:00 2001 From: EvaBardou Date: Thu, 6 Jul 2023 12:07:08 +0200 Subject: [PATCH 05/22] Try to fix docker-postgres tests --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index e3a6fad6d05..23da59738ad 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ whitelist_externals = sh docker-compose commands_pre = - docker-compose up --detach mysql redis rabbitmq + docker-compose up --detach mysql postgres redis rabbitmq pip install --no-deps -r {toxinidir}/requirements/dev.txt pip install --no-deps -r {toxinidir}/requirements/common.txt commands = @@ -55,7 +55,7 @@ whitelist_externals= commands_pre = docker-compose build commands = - docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@localhost/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" + docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" [flake8] per-file-ignores = treeherder/model/models.py:E402 From 6345cbcf3d0b771c5a64a947a55018d8d7884dcf Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Thu, 6 Jul 2023 13:58:32 +0200 Subject: [PATCH 06/22] Make backend depend on postgres in docker-compose --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 34bfd4ef049..33af2a41e82 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,6 +45,7 @@ services: depends_on: - mysql - redis + - postgres - rabbitmq stdin_open: true tty: true From 7b94ddbcf57c311d1d85bfae14667ecb19158312 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Wed, 12 Jul 2023 17:41:40 +0200 Subject: [PATCH 07/22] Ensure postgres is reacheable before starting the application --- docker/entrypoint.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 1cf764607b8..9848e454df0 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -18,6 +18,7 @@ function check_service () { # Keep these in sync with DATABASE_URL. check_service "MySQL" "mysql" 3306 +check_service "PostgreSQL" "postgres" 5432 # Keep these in sync with CELERY_BROKER_URL. check_service "RabbitMQ" "rabbitmq" 5672 From 5ceaa0be6689547c663026c2e38d0f60d6347ddf Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Wed, 12 Jul 2023 17:43:04 +0200 Subject: [PATCH 08/22] Run tests with both DB backends in separate CI jobs --- .circleci/config.yml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d558307576e..7576e9e93a2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -49,7 +49,7 @@ jobs: command: tox -e linters name: Run linters - python-tests: + python-tests-mysql: machine: image: ubuntu-2004:202010-01 steps: @@ -67,6 +67,25 @@ jobs: pip install --upgrade pip pip install tox tox -e docker + - codecov/upload + + python-tests-postgres: + machine: + image: ubuntu-2004:202010-01 + steps: + - checkout + - docker/install-docker: + version: 19.03.13 + - docker/install-docker-compose: + version: 1.29.2 + - run: + command: sudo apt-get update && sudo apt-get install python3-venv -y + name: python for glean + - run: + name: Run tests and coverage within Docker container + command: | + pip install --upgrade pip + pip install tox tox -e docker-postgres - codecov/upload @@ -143,7 +162,8 @@ workflows: jobs: - javascript-tests - builds - - python-tests + - python-tests-mysql + - python-tests-postgres - test-docker-build - deploy: filters: From d7c4e195ae952e07da0d0cd050063b21f061ec0d Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 13 Jul 2023 10:52:09 +0200 Subject: [PATCH 09/22] Make celery/pulse task depend on postgres in docker-compose --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 33af2a41e82..2c01dc5985e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -135,6 +135,7 @@ services: - .:/app depends_on: - mysql + - postgres - rabbitmq platform: linux/amd64 @@ -152,6 +153,7 @@ services: - .:/app depends_on: - mysql + - postgres - redis - rabbitmq platform: linux/amd64 From 18361397fe495c593515e5a684e0b0f6983113df Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 13 Jul 2023 11:02:38 +0200 Subject: [PATCH 10/22] Separate services for postgres and mysql --- docker/entrypoint.sh | 9 +++++++-- tox.ini | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 9848e454df0..d40075579ed 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -17,8 +17,13 @@ function check_service () { } # Keep these in sync with DATABASE_URL. -check_service "MySQL" "mysql" 3306 -check_service "PostgreSQL" "postgres" 5432 +echo "Checking database status at $DATABASE_URL" +if [[ ${DATABASE_URL:0:8} == "mysql://" ]]; then + check_service "MySQL" "mysql" 3306; +fi +if [[ ${DATABASE_URL:0:7} == "psql://" ]]; then + check_service "PostgreSQL" "postgres" 5432; +fi # Keep these in sync with CELERY_BROKER_URL. check_service "RabbitMQ" "rabbitmq" 5672 diff --git a/tox.ini b/tox.ini index 23da59738ad..b5eb0744d6f 100644 --- a/tox.ini +++ b/tox.ini @@ -45,7 +45,7 @@ commands_post = whitelist_externals= docker-compose commands_pre = - docker-compose build + docker-compose up --build --detach mysql redis rabbitmq commands = docker-compose run -e TREEHERDER_DEBUG=False backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" @@ -53,7 +53,7 @@ commands = whitelist_externals= docker-compose commands_pre = - docker-compose build + docker-compose up --build --detach postgres redis rabbitmq commands = docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" From d573eec234a5664ba0ba096d9acf20b6ead71cba Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 13 Jul 2023 11:12:49 +0200 Subject: [PATCH 11/22] Use postgres:15-bullseye for Circle CI --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2c01dc5985e..2f68aa308f7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,7 +87,7 @@ services: postgres: container_name: postgres # https://hub.docker.com/r/library/postgres/ - image: postgres:15 + image: postgres:15-bullseye environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=mozilla1234 From dfd417529bf68766460aa713e8e6407ebd8e0f67 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 13 Jul 2023 11:47:21 +0200 Subject: [PATCH 12/22] Rely on depends_on for docker tox env --- tox.ini | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index b5eb0744d6f..cfa2b756f11 100644 --- a/tox.ini +++ b/tox.ini @@ -42,18 +42,16 @@ commands = commands_post = [testenv:docker] +commands_pre = whitelist_externals= docker-compose -commands_pre = - docker-compose up --build --detach mysql redis rabbitmq commands = docker-compose run -e TREEHERDER_DEBUG=False backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" [testenv:docker-postgres] +commands_pre = whitelist_externals= docker-compose -commands_pre = - docker-compose up --build --detach postgres redis rabbitmq commands = docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" From dbb6960b983f7289041c17aa06db3b3e8978afe6 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Wed, 12 Jul 2023 17:24:21 +0200 Subject: [PATCH 13/22] Sync migration schemas and add summary index to the django model --- .../0001_squashed_0022_modify_bugscache_and_bugjobmap.py | 9 ++++++++- treeherder/model/models.py | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py b/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py index 8f8a14b37ae..919060e0d8d 100644 --- a/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py +++ b/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py @@ -22,6 +22,12 @@ 'SET max_error_count=@old_max_error_count;', ], reverse_sql=['ALTER TABLE bugscache DROP INDEX idx_summary;'], + state_operations=[ + migrations.AddIndex( + model_name='bugscache', + index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'), + ) + ], ), # Since Django doesn't natively support creating composite prefix indicies for Mysql migrations.RunSQL( @@ -61,7 +67,8 @@ ), ), migrations.AddIndex( - model_name='bugscache', index=models.Index('summary', name='idx_summary') + model_name='bugscache', + index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'), ), ] diff --git a/treeherder/model/models.py b/treeherder/model/models.py index 21e5503050b..6052fabd634 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -225,6 +225,9 @@ class Bugscache(models.Model): class Meta: db_table = 'bugscache' verbose_name_plural = 'bugscache' + indexes = [ + models.Index(fields=['summary']), + ] def __str__(self): return "{0}".format(self.id) From 73e98569cc3f4052178285ec48ddb42723169fa4 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 19 Jul 2023 12:01:37 +0200 Subject: [PATCH 14/22] Implement Bugscache search for postgres --- treeherder/model/models.py | 47 ++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/treeherder/model/models.py b/treeherder/model/models.py index 6052fabd634..6f7fd3eea1b 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -11,7 +11,9 @@ warnings.filterwarnings('ignore', category=DeprecationWarning, module='newrelic') import newrelic.agent +from django.conf import settings from django.contrib.auth.models import User +from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector from django.core.cache import cache from django.core.exceptions import ObjectDoesNotExist from django.core.validators import MinLengthValidator @@ -251,24 +253,35 @@ def search(self, search_term): # see https://bugzilla.mozilla.org/show_bug.cgi?id=1704311 search_term_fulltext = self.sanitized_search_term(search_term) - # Substitute escape and wildcard characters, so the search term is used - # literally in the LIKE statement. - search_term_like = ( - search_term.replace('=', '==').replace('%', '=%').replace('_', '=_').replace('\\"', '') - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Substitute escape and wildcard characters, so the search term is used + # literally in the LIKE statement. + search_term_like = ( + search_term.replace('=', '==') + .replace('%', '=%') + .replace('_', '=_') + .replace('\\"', '') + ) - recent_qs = self.objects.raw( - """ - SELECT id, summary, crash_signature, keywords, resolution, status, dupe_of, - MATCH (`summary`) AGAINST (%s IN BOOLEAN MODE) AS relevance - FROM bugscache - WHERE 1 - AND `summary` LIKE CONCAT ('%%%%', %s, '%%%%') ESCAPE '=' - ORDER BY relevance DESC - LIMIT 0,%s - """, - [search_term_fulltext, search_term_like, max_size], - ) + recent_qs = self.objects.raw( + """ + SELECT id, summary, crash_signature, keywords, resolution, status, dupe_of, + MATCH (`summary`) AGAINST (%s IN BOOLEAN MODE) AS relevance + FROM bugscache + WHERE 1 + AND `summary` LIKE CONCAT ('%%%%', %s, '%%%%') ESCAPE '=' + ORDER BY relevance DESC + LIMIT 0,%s + """, + [search_term_fulltext, search_term_like, max_size], + ) + else: + # On PostgreSQL we can use the full text search features + vector = SearchVector("summary") + query = SearchQuery(search_term_fulltext) + recent_qs = Bugscache.objects.annotate(rank=SearchRank(vector, query)).order_by( + "-rank" + )[0:max_size] exclude_fields = ["modified", "processed_update"] try: From 60c2f61ffb2ec1b10e2542409c5a1460ab63f5a7 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 19 Jul 2023 12:01:59 +0200 Subject: [PATCH 15/22] Order in tests --- tests/etl/test_job_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/etl/test_job_loader.py b/tests/etl/test_job_loader.py index 8e183fb3b7c..44650c7bc1e 100644 --- a/tests/etl/test_job_loader.py +++ b/tests/etl/test_job_loader.py @@ -147,7 +147,8 @@ def test_ingest_pulse_jobs( }, ] assert [ - {"name": item.name, "url": item.url, "parse_status": item.status} for item in job_logs.all() + {"name": item.name, "url": item.url, "parse_status": item.status} + for item in job_logs.all().order_by("name") ] == logs_expected From 97bb92bf0a6b93e7a9e7c64a599b7a89823064f1 Mon Sep 17 00:00:00 2001 From: EvaBardou Date: Thu, 20 Jul 2023 10:48:56 +0200 Subject: [PATCH 16/22] Fix Perfherder cycling tests --- .../cycle_data/test_perfherder_cycling.py | 18 +- .../model/data_cycling/removal_strategies.py | 174 +++++++++++------- 2 files changed, 115 insertions(+), 77 deletions(-) diff --git a/tests/model/cycle_data/test_perfherder_cycling.py b/tests/model/cycle_data/test_perfherder_cycling.py index 81b5fad6a71..8655ed84856 100644 --- a/tests/model/cycle_data/test_perfherder_cycling.py +++ b/tests/model/cycle_data/test_perfherder_cycling.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +from django.conf import settings from django.core.management import call_command from django.db import connection, IntegrityError @@ -809,13 +810,16 @@ def test_deleting_performance_data_cascades_to_perf_multicomit_data(test_perf_da try: cursor = connection.cursor() - cursor.execute( - ''' - DELETE FROM `performance_datum` - WHERE id = %s - ''', - [perf_datum.id], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + cursor.execute( + ''' + DELETE FROM `performance_datum` + WHERE id = %s + ''', + [perf_datum.id], + ) + else: + PerformanceDatum.objects.filter(id=perf_datum.id).delete() except IntegrityError: pytest.fail() finally: diff --git a/treeherder/model/data_cycling/removal_strategies.py b/treeherder/model/data_cycling/removal_strategies.py index 008987c023a..15f2e721e28 100644 --- a/treeherder/model/data_cycling/removal_strategies.py +++ b/treeherder/model/data_cycling/removal_strategies.py @@ -6,6 +6,7 @@ from itertools import cycle from typing import List +from django.conf import settings from django.db.backends.utils import CursorWrapper from treeherder.model.models import Repository @@ -81,20 +82,28 @@ def max_timestamp(self): def remove(self, using: CursorWrapper): chunk_size = self._find_ideal_chunk_size() - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - using.execute( - ''' - DELETE FROM `performance_datum` - WHERE push_timestamp <= %s - LIMIT %s - ''', - [self._max_timestamp, chunk_size], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + using.execute( + ''' + DELETE FROM `performance_datum` + WHERE push_timestamp <= %s + LIMIT %s + ''', + [self._max_timestamp, chunk_size], + ) + else: + deleted, _ = PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + push_timestamp__lte=self._max_timestamp + ).values_list('id')[:chunk_size] + ).delete() + using.rowcount = deleted @property def name(self) -> str: @@ -184,25 +193,34 @@ def name(self) -> str: return 'try data removal strategy' def __attempt_remove(self, using): - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - total_signatures = len(self.target_signatures) - from_target_signatures = ' OR '.join(['signature_id = %s'] * total_signatures) - - delete_try_data = f''' - DELETE FROM `performance_datum` - WHERE repository_id = %s AND push_timestamp <= %s AND ({from_target_signatures}) - LIMIT %s - ''' - - using.execute( - delete_try_data, - [self.try_repo, self._max_timestamp, *self.target_signatures, self._chunk_size], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + total_signatures = len(self.target_signatures) + from_target_signatures = ' OR '.join(['signature_id = %s'] * total_signatures) + + delete_try_data = f''' + DELETE FROM `performance_datum` + WHERE repository_id = %s AND push_timestamp <= %s AND ({from_target_signatures}) + LIMIT %s + ''' + + using.execute( + delete_try_data, + [self.try_repo, self._max_timestamp, *self.target_signatures, self._chunk_size], + ) + else: + PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + repository_id=self.try_repo, + push_timestamp__lte=self._max_timestamp, + signature_id__in=self.target_signatures, + ).values_list('id')[: self._chunk_size] + ).delete() def __lookup_new_signature(self): self.__target_signatures = self.__try_signatures[: self.SIGNATURE_BULK_SIZE] @@ -267,24 +285,31 @@ def name(self) -> str: def remove(self, using: CursorWrapper): chunk_size = self._find_ideal_chunk_size() - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - using.execute( - ''' - DELETE FROM `performance_datum` - WHERE repository_id = %s AND push_timestamp <= %s - LIMIT %s - ''', - [ - self.irrelevant_repo, - self._max_timestamp, - chunk_size, - ], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + using.execute( + ''' + DELETE FROM `performance_datum` + WHERE repository_id = %s AND push_timestamp <= %s + LIMIT %s + ''', + [ + self.irrelevant_repo, + self._max_timestamp, + chunk_size, + ], + ) + else: + PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + repository_id=self.irrelevant_repo, push_timestamp__lte=self._max_timestamp + ).values_list('id')[:chunk_size] + ).delete() def _find_ideal_chunk_size(self) -> int: max_id_of_non_expired_row = ( @@ -376,25 +401,34 @@ def name(self) -> str: return 'stalled data removal strategy' def __attempt_remove(self, using: CursorWrapper): - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - using.execute( - ''' - DELETE FROM `performance_datum` - WHERE repository_id = %s AND signature_id = %s AND push_timestamp <= %s - LIMIT %s - ''', - [ - self.target_signature.repository_id, - self.target_signature.id, - self._max_timestamp, - self._chunk_size, - ], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + using.execute( + ''' + DELETE FROM `performance_datum` + WHERE repository_id = %s AND signature_id = %s AND push_timestamp <= %s + LIMIT %s + ''', + [ + self.target_signature.repository_id, + self.target_signature.id, + self._max_timestamp, + self._chunk_size, + ], + ) + else: + PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + repository_id=self.target_signature.repository_id, + signature_id=self.target_signature.id, + push_timestamp__lte=self._max_timestamp, + ).values_list('id')[: self._chunk_size] + ).delete() def __lookup_new_signature(self): try: From 877e89aa8c3ef215d755f4514c4edb8f2266c4f2 Mon Sep 17 00:00:00 2001 From: EvaBardou Date: Thu, 20 Jul 2023 10:52:52 +0200 Subject: [PATCH 17/22] Update cursor.rowcount everywhere --- treeherder/model/data_cycling/removal_strategies.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/treeherder/model/data_cycling/removal_strategies.py b/treeherder/model/data_cycling/removal_strategies.py index 15f2e721e28..272dbec6d6d 100644 --- a/treeherder/model/data_cycling/removal_strategies.py +++ b/treeherder/model/data_cycling/removal_strategies.py @@ -214,13 +214,14 @@ def __attempt_remove(self, using): [self.try_repo, self._max_timestamp, *self.target_signatures, self._chunk_size], ) else: - PerformanceDatum.objects.filter( + deleted, _ = PerformanceDatum.objects.filter( id__in=PerformanceDatum.objects.filter( repository_id=self.try_repo, push_timestamp__lte=self._max_timestamp, signature_id__in=self.target_signatures, ).values_list('id')[: self._chunk_size] ).delete() + using.rowcount = deleted def __lookup_new_signature(self): self.__target_signatures = self.__try_signatures[: self.SIGNATURE_BULK_SIZE] @@ -305,11 +306,12 @@ def remove(self, using: CursorWrapper): ], ) else: - PerformanceDatum.objects.filter( + deleted, _ = PerformanceDatum.objects.filter( id__in=PerformanceDatum.objects.filter( repository_id=self.irrelevant_repo, push_timestamp__lte=self._max_timestamp ).values_list('id')[:chunk_size] ).delete() + using.rowcount = deleted def _find_ideal_chunk_size(self) -> int: max_id_of_non_expired_row = ( @@ -422,13 +424,14 @@ def __attempt_remove(self, using: CursorWrapper): ], ) else: - PerformanceDatum.objects.filter( + deleted, _ = PerformanceDatum.objects.filter( id__in=PerformanceDatum.objects.filter( repository_id=self.target_signature.repository_id, signature_id=self.target_signature.id, push_timestamp__lte=self._max_timestamp, ).values_list('id')[: self._chunk_size] ).delete() + using.rowcount = deleted def __lookup_new_signature(self): try: From b606ac6fabd51112286ba191199d804eac370494 Mon Sep 17 00:00:00 2001 From: EvaBardou Date: Fri, 21 Jul 2023 13:43:19 +0200 Subject: [PATCH 18/22] Enforce querysets order between mysql and psql --- tests/conftest.py | 4 +-- tests/model/test_files_bugzilla_map.py | 32 +++++++++++-------- .../test_backfill_reports/conftest.py | 2 +- treeherder/model/models.py | 2 +- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a58fc1ec95a..d371f30df8a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -764,7 +764,7 @@ def bugs(mock_bugzilla_api_request): process = BzApiBugProcess() process.run() - return th_models.Bugscache.objects.all() + return th_models.Bugscache.objects.all().order_by('id') @pytest.fixture @@ -1006,7 +1006,7 @@ class RefdataHolder: @pytest.fixture def bug_data(eleven_jobs_stored, test_repository, test_push, bugs): - jobs = th_models.Job.objects.all() + jobs = th_models.Job.objects.all().order_by('id') bug_id = bugs[0].id job_id = jobs[0].id th_models.BugJobMap.create(job_id=job_id, bug_id=bug_id) diff --git a/tests/model/test_files_bugzilla_map.py b/tests/model/test_files_bugzilla_map.py index 375ab738720..4d8973f51dd 100644 --- a/tests/model/test_files_bugzilla_map.py +++ b/tests/model/test_files_bugzilla_map.py @@ -91,10 +91,12 @@ def test_data_ingestion(setup_repository_data, mock_file_bugzilla_map_request): ('mozilla.org', 'Different path, same product, different component'), ('mozilla.org', 'Licensing'), ] - assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_1 == list( - BugzillaComponent.objects.all() - .values_list('product', 'component') - .order_by('product', 'component') + assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_1 == sorted( + list( + BugzillaComponent.objects.all() + .values_list('product', 'component') + .order_by('product', 'component') + ) ) import_process.run_id = "import_2" @@ -129,12 +131,14 @@ def test_data_ingestion(setup_repository_data, mock_file_bugzilla_map_request): 'File first seen on mozilla-beta', ), ] - assert EXPECTED_FILES_BUGZILLA_DATA_IMPORT_2 == list( - FilesBugzillaMap.objects.all() - .values_list( - 'path', 'file_name', 'bugzilla_component__product', 'bugzilla_component__component' + assert EXPECTED_FILES_BUGZILLA_DATA_IMPORT_2 == sorted( + list( + FilesBugzillaMap.objects.all() + .values_list( + 'path', 'file_name', 'bugzilla_component__product', 'bugzilla_component__component' + ) + .order_by('path') ) - .order_by('path') ) EXPECTED_BUGZILLA_COMPONENTS_IMPORT_2 = [ @@ -145,8 +149,10 @@ def test_data_ingestion(setup_repository_data, mock_file_bugzilla_map_request): ('Testing', 'web-platform-tests'), ('mozilla.org', 'Import 2: same product, different component'), ] - assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_2 == list( - BugzillaComponent.objects.all() - .values_list('product', 'component') - .order_by('product', 'component') + assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_2 == sorted( + list( + BugzillaComponent.objects.all() + .values_list('product', 'component') + .order_by('product', 'component') + ) ) diff --git a/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py b/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py index ac338767ccd..570195c74ef 100644 --- a/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py +++ b/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py @@ -204,7 +204,7 @@ def test_bad_platform_names(): def prepare_graph_data_scenario(push_ids_to_keep, highlighted_push_id, perf_alert, perf_signature): original_job_count = Job.objects.count() - selectable_jobs = Job.objects.filter(push_id__in=push_ids_to_keep).order_by('push_id') + selectable_jobs = Job.objects.filter(push_id__in=push_ids_to_keep).order_by('push_id', 'id') Job.objects.exclude(push_id__in=push_ids_to_keep).delete() assert Job.objects.count() < original_job_count diff --git a/treeherder/model/models.py b/treeherder/model/models.py index 6f7fd3eea1b..273995a30ff 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -280,7 +280,7 @@ def search(self, search_term): vector = SearchVector("summary") query = SearchQuery(search_term_fulltext) recent_qs = Bugscache.objects.annotate(rank=SearchRank(vector, query)).order_by( - "-rank" + "-rank", "id" )[0:max_size] exclude_fields = ["modified", "processed_update"] From cbdb70079924e9ff5572c18a9bbac39ac4fc98a6 Mon Sep 17 00:00:00 2001 From: EvaBardou Date: Fri, 21 Jul 2023 14:00:59 +0200 Subject: [PATCH 19/22] Fix two more tests --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index d371f30df8a..da842c7b89f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -705,7 +705,7 @@ def test_perf_data(test_perf_signature, eleven_jobs_stored): # for making things easier, ids for jobs # and push should be the same; # also, we only need a subset of jobs - perf_jobs = th_models.Job.objects.filter(pk__in=range(7, 11)).order_by('push__time').all() + perf_jobs = th_models.Job.objects.filter(pk__in=range(7, 11)).order_by('push__time', 'id').all() for index, job in enumerate(perf_jobs, start=1): job.push_id = index From 8fb4c210785bbef707d059675afe9838ea5a9e15 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 25 Jul 2023 10:37:29 +0200 Subject: [PATCH 20/22] Fix test for MySQL astral unicode replacement --- tests/log_parser/test_store_failure_lines.py | 24 ++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/log_parser/test_store_failure_lines.py b/tests/log_parser/test_store_failure_lines.py index 2ddfe98aa7c..3458b03870e 100644 --- a/tests/log_parser/test_store_failure_lines.py +++ b/tests/log_parser/test_store_failure_lines.py @@ -99,13 +99,23 @@ def test_store_error_summary_astral(activate_responses, test_repository, test_jo assert failure.repository == test_repository - assert ( - failure.test - == u"toolkit/content/tests/widgets/test_videocontrols_video_direction.html " - ) - assert failure.subtest == u"Test timed out. " - assert failure.message == u"" - assert failure.stack.endswith("") + # Specific unicode chars cannot be inserted as MySQL pseudo-UTF8 and are replaced by a plain text representation + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + assert ( + failure.test + == "toolkit/content/tests/widgets/test_videocontrols_video_direction.html " + ) + assert failure.subtest == "Test timed out. " + assert failure.message == "" + assert failure.stack.endswith("") + else: + assert ( + failure.test + == "toolkit/content/tests/widgets/test_videocontrols_video_direction.html 🍆" + ) + assert failure.subtest == "Test timed out. 𐂁" + assert failure.message == "󰅑" + assert failure.stack.endswith("󰅑") assert failure.stackwalk_stdout is None assert failure.stackwalk_stderr is None From 33d79f942ae1d79d6f2e262c2c6a8644c6822581 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 25 Jul 2023 11:01:11 +0200 Subject: [PATCH 21/22] Rebase --- .../0050_cascade_perf_datum_deletion_replicate.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py b/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py index 8551a344898..b74299ca01c 100644 --- a/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py +++ b/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py @@ -1,11 +1,17 @@ # Generated by Django 3.0.8 on 2020-12-11 14:42 from django.db import migrations +from django.conf import settings DATUM_REPLICATE_CONSTRAINT_SYMBOL = ( 'performance_datum_re_performance_datum_id_fe2ed518_fk_performan' ) +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + DROP_TYPE = 'FOREIGN KEY' +else: + DROP_TYPE = 'CONSTRAINT' + class Migration(migrations.Migration): dependencies = [ @@ -17,7 +23,7 @@ class Migration(migrations.Migration): # add ON DELETE CASCADE at database level [ f'ALTER TABLE performance_datum_replicate ' - f'DROP FOREIGN KEY {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', f'ALTER TABLE performance_datum_replicate ' f'ADD CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (performance_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;', @@ -25,7 +31,7 @@ class Migration(migrations.Migration): # put back the non-CASCADE foreign key constraint reverse_sql=[ f'ALTER TABLE performance_datum_replicate ' - f'DROP FOREIGN KEY {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', f'ALTER TABLE performance_datum_replicate ' f'ADD CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (performance_datum_id) REFERENCES performance_datum (ID);', From 6c93737a5b05eaa39b134c43acd78d1e2387947c Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 25 Jul 2023 12:07:23 +0200 Subject: [PATCH 22/22] Fix last failing tests with postgres --- tests/conftest.py | 2 +- tests/model/cycle_data/test_perfherder_cycling.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index da842c7b89f..b13dc500a62 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -705,7 +705,7 @@ def test_perf_data(test_perf_signature, eleven_jobs_stored): # for making things easier, ids for jobs # and push should be the same; # also, we only need a subset of jobs - perf_jobs = th_models.Job.objects.filter(pk__in=range(7, 11)).order_by('push__time', 'id').all() + perf_jobs = th_models.Job.objects.filter(pk__in=range(7, 11)).order_by('id').all() for index, job in enumerate(perf_jobs, start=1): job.push_id = index diff --git a/tests/model/cycle_data/test_perfherder_cycling.py b/tests/model/cycle_data/test_perfherder_cycling.py index 8655ed84856..dfd35eb50bd 100644 --- a/tests/model/cycle_data/test_perfherder_cycling.py +++ b/tests/model/cycle_data/test_perfherder_cycling.py @@ -838,7 +838,7 @@ def test_deleting_performance_data_cascades_to_perf_datum_replicate(test_perf_da cursor = connection.cursor() cursor.execute( ''' - DELETE FROM `performance_datum` + DELETE FROM performance_datum WHERE id = %s ''', [perf_datum.id],