diff --git a/.circleci/config.yml b/.circleci/config.yml index fb19a9b0fc7..7576e9e93a2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -49,7 +49,7 @@ jobs: command: tox -e linters name: Run linters - python-tests: + python-tests-mysql: machine: image: ubuntu-2004:202010-01 steps: @@ -69,6 +69,26 @@ jobs: tox -e docker - codecov/upload + python-tests-postgres: + machine: + image: ubuntu-2004:202010-01 + steps: + - checkout + - docker/install-docker: + version: 19.03.13 + - docker/install-docker-compose: + version: 1.29.2 + - run: + command: sudo apt-get update && sudo apt-get install python3-venv -y + name: python for glean + - run: + name: Run tests and coverage within Docker container + command: | + pip install --upgrade pip + pip install tox + tox -e docker-postgres + - codecov/upload + test-docker-build: docker: - image: docker:19.03.15 @@ -142,7 +162,8 @@ workflows: jobs: - javascript-tests - builds - - python-tests + - python-tests-mysql + - python-tests-postgres - test-docker-build - deploy: filters: diff --git a/docker-compose.yml b/docker-compose.yml index ddfc557e8bd..2f68aa308f7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,6 +45,7 @@ services: depends_on: - mysql - redis + - postgres - rabbitmq stdin_open: true tty: true @@ -83,6 +84,19 @@ services: - '3306:3306' command: --character-set-server=utf8 --collation-server=utf8_bin + postgres: + container_name: postgres + # https://hub.docker.com/r/library/postgres/ + image: postgres:15-bullseye + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=mozilla1234 + - POSTGRES_DB=treeherder + volumes: + - postgres_data:/var/lib/postgresql/data + ports: + - '5432:5432' + redis: container_name: redis # https://hub.docker.com/_/redis/ @@ -121,6 +135,7 @@ services: - .:/app depends_on: - mysql + - postgres - rabbitmq platform: linux/amd64 @@ -138,9 +153,11 @@ services: - .:/app depends_on: - mysql + - postgres - redis - rabbitmq platform: linux/amd64 volumes: # TODO: Experiment with using tmpfs when testing, to speed up database-using Python tests. mysql_data: {} + postgres_data: {} diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 1cf764607b8..d40075579ed 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -17,7 +17,13 @@ function check_service () { } # Keep these in sync with DATABASE_URL. -check_service "MySQL" "mysql" 3306 +echo "Checking database status at $DATABASE_URL" +if [[ ${DATABASE_URL:0:8} == "mysql://" ]]; then + check_service "MySQL" "mysql" 3306; +fi +if [[ ${DATABASE_URL:0:7} == "psql://" ]]; then + check_service "PostgreSQL" "postgres" 5432; +fi # Keep these in sync with CELERY_BROKER_URL. check_service "RabbitMQ" "rabbitmq" 5672 diff --git a/requirements/common.in b/requirements/common.in index 5019f360dd2..f490477e831 100644 --- a/requirements/common.in +++ b/requirements/common.in @@ -9,6 +9,7 @@ newrelic==8.8.0 certifi==2023.5.7 mysqlclient==2.1.1 # Required by Django +psycopg2-binary==2.9.6 jsonschema==4.17.3 # import jsonschema djangorestframework==3.14.0 # Imported as rest_framework diff --git a/requirements/common.txt b/requirements/common.txt index 1bad225e357..802e5719c0d 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -831,6 +831,70 @@ prompt-toolkit==3.0.39 \ --hash=sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac \ --hash=sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88 # via click-repl +psycopg2-binary==2.9.6 \ + --hash=sha256:02c0f3757a4300cf379eb49f543fb7ac527fb00144d39246ee40e1df684ab514 \ + --hash=sha256:02c6e3cf3439e213e4ee930308dc122d6fb4d4bea9aef4a12535fbd605d1a2fe \ + --hash=sha256:0645376d399bfd64da57148694d78e1f431b1e1ee1054872a5713125681cf1be \ + --hash=sha256:0892ef645c2fabb0c75ec32d79f4252542d0caec1d5d949630e7d242ca4681a3 \ + --hash=sha256:0d236c2825fa656a2d98bbb0e52370a2e852e5a0ec45fc4f402977313329174d \ + --hash=sha256:0e0f754d27fddcfd74006455b6e04e6705d6c31a612ec69ddc040a5468e44b4e \ + --hash=sha256:15e2ee79e7cf29582ef770de7dab3d286431b01c3bb598f8e05e09601b890081 \ + --hash=sha256:1876843d8e31c89c399e31b97d4b9725a3575bb9c2af92038464231ec40f9edb \ + --hash=sha256:1f64dcfb8f6e0c014c7f55e51c9759f024f70ea572fbdef123f85318c297947c \ + --hash=sha256:2ab652e729ff4ad76d400df2624d223d6e265ef81bb8aa17fbd63607878ecbee \ + --hash=sha256:30637a20623e2a2eacc420059be11527f4458ef54352d870b8181a4c3020ae6b \ + --hash=sha256:34b9ccdf210cbbb1303c7c4db2905fa0319391bd5904d32689e6dd5c963d2ea8 \ + --hash=sha256:38601cbbfe600362c43714482f43b7c110b20cb0f8172422c616b09b85a750c5 \ + --hash=sha256:441cc2f8869a4f0f4bb408475e5ae0ee1f3b55b33f350406150277f7f35384fc \ + --hash=sha256:498807b927ca2510baea1b05cc91d7da4718a0f53cb766c154c417a39f1820a0 \ + --hash=sha256:4ac30da8b4f57187dbf449294d23b808f8f53cad6b1fc3623fa8a6c11d176dd0 \ + --hash=sha256:4c727b597c6444a16e9119386b59388f8a424223302d0c06c676ec8b4bc1f963 \ + --hash=sha256:4d67fbdaf177da06374473ef6f7ed8cc0a9dc640b01abfe9e8a2ccb1b1402c1f \ + --hash=sha256:4dfb4be774c4436a4526d0c554af0cc2e02082c38303852a36f6456ece7b3503 \ + --hash=sha256:4ea29fc3ad9d91162c52b578f211ff1c931d8a38e1f58e684c45aa470adf19e2 \ + --hash=sha256:51537e3d299be0db9137b321dfb6a5022caaab275775680e0c3d281feefaca6b \ + --hash=sha256:61b047a0537bbc3afae10f134dc6393823882eb263088c271331602b672e52e9 \ + --hash=sha256:6460c7a99fc939b849431f1e73e013d54aa54293f30f1109019c56a0b2b2ec2f \ + --hash=sha256:65bee1e49fa6f9cf327ce0e01c4c10f39165ee76d35c846ade7cb0ec6683e303 \ + --hash=sha256:65c07febd1936d63bfde78948b76cd4c2a411572a44ac50719ead41947d0f26b \ + --hash=sha256:71f14375d6f73b62800530b581aed3ada394039877818b2d5f7fc77e3bb6894d \ + --hash=sha256:7a40c00dbe17c0af5bdd55aafd6ff6679f94a9be9513a4c7e071baf3d7d22a70 \ + --hash=sha256:7e13a5a2c01151f1208d5207e42f33ba86d561b7a89fca67c700b9486a06d0e2 \ + --hash=sha256:7f0438fa20fb6c7e202863e0d5ab02c246d35efb1d164e052f2f3bfe2b152bd0 \ + --hash=sha256:8122cfc7cae0da9a3077216528b8bb3629c43b25053284cc868744bfe71eb141 \ + --hash=sha256:8338a271cb71d8da40b023a35d9c1e919eba6cbd8fa20a54b748a332c355d896 \ + --hash=sha256:84d2222e61f313c4848ff05353653bf5f5cf6ce34df540e4274516880d9c3763 \ + --hash=sha256:8a6979cf527e2603d349a91060f428bcb135aea2be3201dff794813256c274f1 \ + --hash=sha256:8a76e027f87753f9bd1ab5f7c9cb8c7628d1077ef927f5e2446477153a602f2c \ + --hash=sha256:964b4dfb7c1c1965ac4c1978b0f755cc4bd698e8aa2b7667c575fb5f04ebe06b \ + --hash=sha256:9972aad21f965599ed0106f65334230ce826e5ae69fda7cbd688d24fa922415e \ + --hash=sha256:a8c28fd40a4226b4a84bdf2d2b5b37d2c7bd49486b5adcc200e8c7ec991dfa7e \ + --hash=sha256:ae102a98c547ee2288637af07393dd33f440c25e5cd79556b04e3fca13325e5f \ + --hash=sha256:af335bac6b666cc6aea16f11d486c3b794029d9df029967f9938a4bed59b6a19 \ + --hash=sha256:afe64e9b8ea66866a771996f6ff14447e8082ea26e675a295ad3bdbffdd72afb \ + --hash=sha256:b4b24f75d16a89cc6b4cdff0eb6a910a966ecd476d1e73f7ce5985ff1328e9a6 \ + --hash=sha256:b6c8288bb8a84b47e07013bb4850f50538aa913d487579e1921724631d02ea1b \ + --hash=sha256:b83456c2d4979e08ff56180a76429263ea254c3f6552cd14ada95cff1dec9bb8 \ + --hash=sha256:bfb13af3c5dd3a9588000910178de17010ebcccd37b4f9794b00595e3a8ddad3 \ + --hash=sha256:c3dba7dab16709a33a847e5cd756767271697041fbe3fe97c215b1fc1f5c9848 \ + --hash=sha256:c48d8f2db17f27d41fb0e2ecd703ea41984ee19362cbce52c097963b3a1b4365 \ + --hash=sha256:c7e62ab8b332147a7593a385d4f368874d5fe4ad4e341770d4983442d89603e3 \ + --hash=sha256:c83a74b68270028dc8ee74d38ecfaf9c90eed23c8959fca95bd703d25b82c88e \ + --hash=sha256:cacbdc5839bdff804dfebc058fe25684cae322987f7a38b0168bc1b2df703fb1 \ + --hash=sha256:cf4499e0a83b7b7edcb8dabecbd8501d0d3a5ef66457200f77bde3d210d5debb \ + --hash=sha256:cfec476887aa231b8548ece2e06d28edc87c1397ebd83922299af2e051cf2827 \ + --hash=sha256:d26e0342183c762de3276cca7a530d574d4e25121ca7d6e4a98e4f05cb8e4df7 \ + --hash=sha256:d4e6036decf4b72d6425d5b29bbd3e8f0ff1059cda7ac7b96d6ac5ed34ffbacd \ + --hash=sha256:d57c3fd55d9058645d26ae37d76e61156a27722097229d32a9e73ed54819982a \ + --hash=sha256:dfa74c903a3c1f0d9b1c7e7b53ed2d929a4910e272add6700c38f365a6002820 \ + --hash=sha256:e3ed340d2b858d6e6fb5083f87c09996506af483227735de6964a6100b4e6a54 \ + --hash=sha256:e78e6e2a00c223e164c417628572a90093c031ed724492c763721c2e0bc2a8df \ + --hash=sha256:e9182eb20f41417ea1dd8e8f7888c4d7c6e805f8a7c98c1081778a3da2bee3e4 \ + --hash=sha256:e99e34c82309dd78959ba3c1590975b5d3c862d6f279f843d47d26ff89d7d7e1 \ + --hash=sha256:f6a88f384335bb27812293fdb11ac6aee2ca3f51d3c7820fe03de0a304ab6249 \ + --hash=sha256:f81e65376e52f03422e1fb475c9514185669943798ed019ac50410fb4c4df232 \ + --hash=sha256:ffe9dc0a884a8848075e576c1de0290d85a533a9f6e9c4e564f19adf8f6e54a7 + # via -r requirements/common.in pyasn1==0.5.0 \ --hash=sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57 \ --hash=sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde diff --git a/tests/conftest.py b/tests/conftest.py index a58fc1ec95a..b13dc500a62 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -705,7 +705,7 @@ def test_perf_data(test_perf_signature, eleven_jobs_stored): # for making things easier, ids for jobs # and push should be the same; # also, we only need a subset of jobs - perf_jobs = th_models.Job.objects.filter(pk__in=range(7, 11)).order_by('push__time').all() + perf_jobs = th_models.Job.objects.filter(pk__in=range(7, 11)).order_by('id').all() for index, job in enumerate(perf_jobs, start=1): job.push_id = index @@ -764,7 +764,7 @@ def bugs(mock_bugzilla_api_request): process = BzApiBugProcess() process.run() - return th_models.Bugscache.objects.all() + return th_models.Bugscache.objects.all().order_by('id') @pytest.fixture @@ -1006,7 +1006,7 @@ class RefdataHolder: @pytest.fixture def bug_data(eleven_jobs_stored, test_repository, test_push, bugs): - jobs = th_models.Job.objects.all() + jobs = th_models.Job.objects.all().order_by('id') bug_id = bugs[0].id job_id = jobs[0].id th_models.BugJobMap.create(job_id=job_id, bug_id=bug_id) diff --git a/tests/etl/test_job_loader.py b/tests/etl/test_job_loader.py index 8e183fb3b7c..44650c7bc1e 100644 --- a/tests/etl/test_job_loader.py +++ b/tests/etl/test_job_loader.py @@ -147,7 +147,8 @@ def test_ingest_pulse_jobs( }, ] assert [ - {"name": item.name, "url": item.url, "parse_status": item.status} for item in job_logs.all() + {"name": item.name, "url": item.url, "parse_status": item.status} + for item in job_logs.all().order_by("name") ] == logs_expected diff --git a/tests/log_parser/test_store_failure_lines.py b/tests/log_parser/test_store_failure_lines.py index 2ddfe98aa7c..3458b03870e 100644 --- a/tests/log_parser/test_store_failure_lines.py +++ b/tests/log_parser/test_store_failure_lines.py @@ -99,13 +99,23 @@ def test_store_error_summary_astral(activate_responses, test_repository, test_jo assert failure.repository == test_repository - assert ( - failure.test - == u"toolkit/content/tests/widgets/test_videocontrols_video_direction.html " - ) - assert failure.subtest == u"Test timed out. " - assert failure.message == u"" - assert failure.stack.endswith("") + # Specific unicode chars cannot be inserted as MySQL pseudo-UTF8 and are replaced by a plain text representation + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + assert ( + failure.test + == "toolkit/content/tests/widgets/test_videocontrols_video_direction.html " + ) + assert failure.subtest == "Test timed out. " + assert failure.message == "" + assert failure.stack.endswith("") + else: + assert ( + failure.test + == "toolkit/content/tests/widgets/test_videocontrols_video_direction.html 🍆" + ) + assert failure.subtest == "Test timed out. 𐂁" + assert failure.message == "󰅑" + assert failure.stack.endswith("󰅑") assert failure.stackwalk_stdout is None assert failure.stackwalk_stderr is None diff --git a/tests/model/cycle_data/test_perfherder_cycling.py b/tests/model/cycle_data/test_perfherder_cycling.py index 81b5fad6a71..dfd35eb50bd 100644 --- a/tests/model/cycle_data/test_perfherder_cycling.py +++ b/tests/model/cycle_data/test_perfherder_cycling.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +from django.conf import settings from django.core.management import call_command from django.db import connection, IntegrityError @@ -809,13 +810,16 @@ def test_deleting_performance_data_cascades_to_perf_multicomit_data(test_perf_da try: cursor = connection.cursor() - cursor.execute( - ''' - DELETE FROM `performance_datum` - WHERE id = %s - ''', - [perf_datum.id], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + cursor.execute( + ''' + DELETE FROM `performance_datum` + WHERE id = %s + ''', + [perf_datum.id], + ) + else: + PerformanceDatum.objects.filter(id=perf_datum.id).delete() except IntegrityError: pytest.fail() finally: @@ -834,7 +838,7 @@ def test_deleting_performance_data_cascades_to_perf_datum_replicate(test_perf_da cursor = connection.cursor() cursor.execute( ''' - DELETE FROM `performance_datum` + DELETE FROM performance_datum WHERE id = %s ''', [perf_datum.id], diff --git a/tests/model/test_files_bugzilla_map.py b/tests/model/test_files_bugzilla_map.py index 375ab738720..4d8973f51dd 100644 --- a/tests/model/test_files_bugzilla_map.py +++ b/tests/model/test_files_bugzilla_map.py @@ -91,10 +91,12 @@ def test_data_ingestion(setup_repository_data, mock_file_bugzilla_map_request): ('mozilla.org', 'Different path, same product, different component'), ('mozilla.org', 'Licensing'), ] - assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_1 == list( - BugzillaComponent.objects.all() - .values_list('product', 'component') - .order_by('product', 'component') + assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_1 == sorted( + list( + BugzillaComponent.objects.all() + .values_list('product', 'component') + .order_by('product', 'component') + ) ) import_process.run_id = "import_2" @@ -129,12 +131,14 @@ def test_data_ingestion(setup_repository_data, mock_file_bugzilla_map_request): 'File first seen on mozilla-beta', ), ] - assert EXPECTED_FILES_BUGZILLA_DATA_IMPORT_2 == list( - FilesBugzillaMap.objects.all() - .values_list( - 'path', 'file_name', 'bugzilla_component__product', 'bugzilla_component__component' + assert EXPECTED_FILES_BUGZILLA_DATA_IMPORT_2 == sorted( + list( + FilesBugzillaMap.objects.all() + .values_list( + 'path', 'file_name', 'bugzilla_component__product', 'bugzilla_component__component' + ) + .order_by('path') ) - .order_by('path') ) EXPECTED_BUGZILLA_COMPONENTS_IMPORT_2 = [ @@ -145,8 +149,10 @@ def test_data_ingestion(setup_repository_data, mock_file_bugzilla_map_request): ('Testing', 'web-platform-tests'), ('mozilla.org', 'Import 2: same product, different component'), ] - assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_2 == list( - BugzillaComponent.objects.all() - .values_list('product', 'component') - .order_by('product', 'component') + assert EXPECTED_BUGZILLA_COMPONENTS_IMPORT_2 == sorted( + list( + BugzillaComponent.objects.all() + .values_list('product', 'component') + .order_by('product', 'component') + ) ) diff --git a/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py b/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py index ac338767ccd..570195c74ef 100644 --- a/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py +++ b/tests/perf/auto_perf_sheriffing/test_backfill_reports/conftest.py @@ -204,7 +204,7 @@ def test_bad_platform_names(): def prepare_graph_data_scenario(push_ids_to_keep, highlighted_push_id, perf_alert, perf_signature): original_job_count = Job.objects.count() - selectable_jobs = Job.objects.filter(push_id__in=push_ids_to_keep).order_by('push_id') + selectable_jobs = Job.objects.filter(push_id__in=push_ids_to_keep).order_by('push_id', 'id') Job.objects.exclude(push_id__in=push_ids_to_keep).delete() assert Job.objects.count() < original_job_count diff --git a/tox.ini b/tox.ini index 35ef1568a20..cfa2b756f11 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ whitelist_externals = sh docker-compose commands_pre = - docker-compose up --detach mysql redis rabbitmq + docker-compose up --detach mysql postgres redis rabbitmq pip install --no-deps -r {toxinidir}/requirements/dev.txt pip install --no-deps -r {toxinidir}/requirements/common.txt commands = @@ -42,12 +42,18 @@ commands = commands_post = [testenv:docker] +commands_pre = whitelist_externals= docker-compose -commands_pre = - docker-compose build commands = docker-compose run -e TREEHERDER_DEBUG=False backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" +[testenv:docker-postgres] +commands_pre = +whitelist_externals= + docker-compose +commands = + docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception" + [flake8] per-file-ignores = treeherder/model/models.py:E402 diff --git a/treeherder/config/settings.py b/treeherder/config/settings.py index cff9686cb35..de520b8fbd6 100644 --- a/treeherder/config/settings.py +++ b/treeherder/config/settings.py @@ -142,10 +142,15 @@ # We're intentionally not using django-environ's query string options feature, # since it hides configuration outside of the repository, plus could lead to # drift between environments. -for alias in DATABASES: +for alias, db in DATABASES.items(): # Persist database connections for 5 minutes, to avoid expensive reconnects. - DATABASES[alias]['CONN_MAX_AGE'] = 300 - DATABASES[alias]['OPTIONS'] = { + db['CONN_MAX_AGE'] = 300 + + # These options are only valid for mysql + if db['ENGINE'] != 'django.db.backends.mysql': + continue + + db['OPTIONS'] = { # Override Django's default connection charset of 'utf8', otherwise it's # still not possible to insert non-BMP unicode into utf8mb4 tables. 'charset': 'utf8mb4', @@ -157,8 +162,8 @@ } # For use of the stage replica, use the 'deployment/gcp/ca-cert.pem' path for use in your local env file # or pass the variable to docker-compose command; additional certs are in the deployment directory. - if connection_should_use_tls(DATABASES[alias]['HOST']): - DATABASES[alias]['OPTIONS']['ssl'] = { + if connection_should_use_tls(db['HOST']): + db['OPTIONS']['ssl'] = { 'ca': env("TLS_CERT_PATH", default=None), } diff --git a/treeherder/model/data_cycling/removal_strategies.py b/treeherder/model/data_cycling/removal_strategies.py index 008987c023a..272dbec6d6d 100644 --- a/treeherder/model/data_cycling/removal_strategies.py +++ b/treeherder/model/data_cycling/removal_strategies.py @@ -6,6 +6,7 @@ from itertools import cycle from typing import List +from django.conf import settings from django.db.backends.utils import CursorWrapper from treeherder.model.models import Repository @@ -81,20 +82,28 @@ def max_timestamp(self): def remove(self, using: CursorWrapper): chunk_size = self._find_ideal_chunk_size() - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - using.execute( - ''' - DELETE FROM `performance_datum` - WHERE push_timestamp <= %s - LIMIT %s - ''', - [self._max_timestamp, chunk_size], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + using.execute( + ''' + DELETE FROM `performance_datum` + WHERE push_timestamp <= %s + LIMIT %s + ''', + [self._max_timestamp, chunk_size], + ) + else: + deleted, _ = PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + push_timestamp__lte=self._max_timestamp + ).values_list('id')[:chunk_size] + ).delete() + using.rowcount = deleted @property def name(self) -> str: @@ -184,25 +193,35 @@ def name(self) -> str: return 'try data removal strategy' def __attempt_remove(self, using): - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - total_signatures = len(self.target_signatures) - from_target_signatures = ' OR '.join(['signature_id = %s'] * total_signatures) - - delete_try_data = f''' - DELETE FROM `performance_datum` - WHERE repository_id = %s AND push_timestamp <= %s AND ({from_target_signatures}) - LIMIT %s - ''' - - using.execute( - delete_try_data, - [self.try_repo, self._max_timestamp, *self.target_signatures, self._chunk_size], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + total_signatures = len(self.target_signatures) + from_target_signatures = ' OR '.join(['signature_id = %s'] * total_signatures) + + delete_try_data = f''' + DELETE FROM `performance_datum` + WHERE repository_id = %s AND push_timestamp <= %s AND ({from_target_signatures}) + LIMIT %s + ''' + + using.execute( + delete_try_data, + [self.try_repo, self._max_timestamp, *self.target_signatures, self._chunk_size], + ) + else: + deleted, _ = PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + repository_id=self.try_repo, + push_timestamp__lte=self._max_timestamp, + signature_id__in=self.target_signatures, + ).values_list('id')[: self._chunk_size] + ).delete() + using.rowcount = deleted def __lookup_new_signature(self): self.__target_signatures = self.__try_signatures[: self.SIGNATURE_BULK_SIZE] @@ -267,24 +286,32 @@ def name(self) -> str: def remove(self, using: CursorWrapper): chunk_size = self._find_ideal_chunk_size() - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - using.execute( - ''' - DELETE FROM `performance_datum` - WHERE repository_id = %s AND push_timestamp <= %s - LIMIT %s - ''', - [ - self.irrelevant_repo, - self._max_timestamp, - chunk_size, - ], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + using.execute( + ''' + DELETE FROM `performance_datum` + WHERE repository_id = %s AND push_timestamp <= %s + LIMIT %s + ''', + [ + self.irrelevant_repo, + self._max_timestamp, + chunk_size, + ], + ) + else: + deleted, _ = PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + repository_id=self.irrelevant_repo, push_timestamp__lte=self._max_timestamp + ).values_list('id')[:chunk_size] + ).delete() + using.rowcount = deleted def _find_ideal_chunk_size(self) -> int: max_id_of_non_expired_row = ( @@ -376,25 +403,35 @@ def name(self) -> str: return 'stalled data removal strategy' def __attempt_remove(self, using: CursorWrapper): - # Django's queryset API doesn't support MySQL's - # DELETE statements with LIMIT constructs, - # even though this database is capable of doing that. - # - # If ever this support is added in Django, replace - # raw SQL bellow with equivalent queryset commands. - using.execute( - ''' - DELETE FROM `performance_datum` - WHERE repository_id = %s AND signature_id = %s AND push_timestamp <= %s - LIMIT %s - ''', - [ - self.target_signature.repository_id, - self.target_signature.id, - self._max_timestamp, - self._chunk_size, - ], - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Django's queryset API doesn't support MySQL's + # DELETE statements with LIMIT constructs, + # even though this database is capable of doing that. + # + # If ever this support is added in Django, replace + # raw SQL bellow with equivalent queryset commands. + using.execute( + ''' + DELETE FROM `performance_datum` + WHERE repository_id = %s AND signature_id = %s AND push_timestamp <= %s + LIMIT %s + ''', + [ + self.target_signature.repository_id, + self.target_signature.id, + self._max_timestamp, + self._chunk_size, + ], + ) + else: + deleted, _ = PerformanceDatum.objects.filter( + id__in=PerformanceDatum.objects.filter( + repository_id=self.target_signature.repository_id, + signature_id=self.target_signature.id, + push_timestamp__lte=self._max_timestamp, + ).values_list('id')[: self._chunk_size] + ).delete() + using.rowcount = deleted def __lookup_new_signature(self): try: diff --git a/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py b/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py index b19862e4dfd..919060e0d8d 100644 --- a/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py +++ b/treeherder/model/migrations/0001_squashed_0022_modify_bugscache_and_bugjobmap.py @@ -7,6 +7,72 @@ from django.db import migrations, models +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + EXTRA_MIGRATIONS = [ + # Manually created migrations. + # Since Django doesn't natively support creating FULLTEXT indices. + migrations.RunSQL( + [ + # Suppress the MySQL warning "InnoDB rebuilding table to add column FTS_DOC_ID": + # https://dev.mysql.com/doc/refman/5.7/en/innodb-fulltext-index.html#innodb-fulltext-index-docid + # The table is empty when the index is added, so we don't care about it being rebuilt, + # and there isn't a better way to add the index without Django FULLTEXT support. + 'SET @old_max_error_count=@@max_error_count, max_error_count=0;', + 'CREATE FULLTEXT INDEX idx_summary ON bugscache (summary);', + 'SET max_error_count=@old_max_error_count;', + ], + reverse_sql=['ALTER TABLE bugscache DROP INDEX idx_summary;'], + state_operations=[ + migrations.AddIndex( + model_name='bugscache', + index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'), + ) + ], + ), + # Since Django doesn't natively support creating composite prefix indicies for Mysql + migrations.RunSQL( + [ + 'CREATE INDEX failure_line_test_idx ON failure_line (test(50), subtest(25), status, expected, created);', + 'CREATE INDEX failure_line_signature_test_idx ON failure_line (signature(25), test(50), created);', + ], + reverse_sql=[ + 'DROP INDEX failure_line_test_idx ON failure_line;', + 'DROP INDEX failure_line_signature_test_idx ON failure_line;', + ], + state_operations=[ + migrations.AlterIndexTogether( + name='failureline', + index_together=set( + [ + ('test', 'subtest', 'status', 'expected', 'created'), + ('job_guid', 'repository'), + ('signature', 'test', 'created'), + ] + ), + ), + ], + ), + ] +else: + # On postgres we can use standard migrations + EXTRA_MIGRATIONS = [ + migrations.AlterIndexTogether( + name='failureline', + index_together=set( + [ + ('test', 'subtest', 'status', 'expected', 'created'), + ('job_guid', 'repository'), + ('signature', 'test', 'created'), + ] + ), + ), + migrations.AddIndex( + model_name='bugscache', + index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'), + ), + ] + + class Migration(migrations.Migration): initial = True @@ -965,41 +1031,4 @@ class Migration(migrations.Migration): name='bugjobmap', unique_together=set([('job', 'bug_id')]), ), - # Manually created migrations. - # Since Django doesn't natively support creating FULLTEXT indices. - migrations.RunSQL( - [ - # Suppress the MySQL warning "InnoDB rebuilding table to add column FTS_DOC_ID": - # https://dev.mysql.com/doc/refman/5.7/en/innodb-fulltext-index.html#innodb-fulltext-index-docid - # The table is empty when the index is added, so we don't care about it being rebuilt, - # and there isn't a better way to add the index without Django FULLTEXT support. - 'SET @old_max_error_count=@@max_error_count, max_error_count=0;', - 'CREATE FULLTEXT INDEX idx_summary ON bugscache (summary);', - 'SET max_error_count=@old_max_error_count;', - ], - reverse_sql=['ALTER TABLE bugscache DROP INDEX idx_summary;'], - ), - # Since Django doesn't natively support creating composite prefix indicies. - migrations.RunSQL( - [ - 'CREATE INDEX failure_line_test_idx ON failure_line (test(50), subtest(25), status, expected, created);', - 'CREATE INDEX failure_line_signature_test_idx ON failure_line (signature(25), test(50), created);', - ], - reverse_sql=[ - 'DROP INDEX failure_line_test_idx ON failure_line;', - 'DROP INDEX failure_line_signature_test_idx ON failure_line;', - ], - state_operations=[ - migrations.AlterIndexTogether( - name='failureline', - index_together=set( - [ - ('test', 'subtest', 'status', 'expected', 'created'), - ('job_guid', 'repository'), - ('signature', 'test', 'created'), - ] - ), - ), - ], - ), - ] + ] + EXTRA_MIGRATIONS diff --git a/treeherder/model/models.py b/treeherder/model/models.py index 21e5503050b..273995a30ff 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -11,7 +11,9 @@ warnings.filterwarnings('ignore', category=DeprecationWarning, module='newrelic') import newrelic.agent +from django.conf import settings from django.contrib.auth.models import User +from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector from django.core.cache import cache from django.core.exceptions import ObjectDoesNotExist from django.core.validators import MinLengthValidator @@ -225,6 +227,9 @@ class Bugscache(models.Model): class Meta: db_table = 'bugscache' verbose_name_plural = 'bugscache' + indexes = [ + models.Index(fields=['summary']), + ] def __str__(self): return "{0}".format(self.id) @@ -248,24 +253,35 @@ def search(self, search_term): # see https://bugzilla.mozilla.org/show_bug.cgi?id=1704311 search_term_fulltext = self.sanitized_search_term(search_term) - # Substitute escape and wildcard characters, so the search term is used - # literally in the LIKE statement. - search_term_like = ( - search_term.replace('=', '==').replace('%', '=%').replace('_', '=_').replace('\\"', '') - ) + if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + # Substitute escape and wildcard characters, so the search term is used + # literally in the LIKE statement. + search_term_like = ( + search_term.replace('=', '==') + .replace('%', '=%') + .replace('_', '=_') + .replace('\\"', '') + ) - recent_qs = self.objects.raw( - """ - SELECT id, summary, crash_signature, keywords, resolution, status, dupe_of, - MATCH (`summary`) AGAINST (%s IN BOOLEAN MODE) AS relevance - FROM bugscache - WHERE 1 - AND `summary` LIKE CONCAT ('%%%%', %s, '%%%%') ESCAPE '=' - ORDER BY relevance DESC - LIMIT 0,%s - """, - [search_term_fulltext, search_term_like, max_size], - ) + recent_qs = self.objects.raw( + """ + SELECT id, summary, crash_signature, keywords, resolution, status, dupe_of, + MATCH (`summary`) AGAINST (%s IN BOOLEAN MODE) AS relevance + FROM bugscache + WHERE 1 + AND `summary` LIKE CONCAT ('%%%%', %s, '%%%%') ESCAPE '=' + ORDER BY relevance DESC + LIMIT 0,%s + """, + [search_term_fulltext, search_term_like, max_size], + ) + else: + # On PostgreSQL we can use the full text search features + vector = SearchVector("summary") + query = SearchQuery(search_term_fulltext) + recent_qs = Bugscache.objects.annotate(rank=SearchRank(vector, query)).order_by( + "-rank", "id" + )[0:max_size] exclude_fields = ["modified", "processed_update"] try: diff --git a/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py b/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py index f904c178339..f020966bdc0 100644 --- a/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py +++ b/treeherder/perf/migrations/0036_cascade_perf_datum_deletion.py @@ -1,9 +1,15 @@ # Generated by Django 3.0.8 on 2020-12-11 14:42 from django.db import migrations +from django.conf import settings MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan' +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + DROP_TYPE = 'FOREIGN KEY' +else: + DROP_TYPE = 'CONSTRAINT' + class Migration(migrations.Migration): dependencies = [ @@ -15,7 +21,7 @@ class Migration(migrations.Migration): # add ON DELETE CASCADE at database level [ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;', @@ -23,7 +29,7 @@ class Migration(migrations.Migration): # put back the non-CASCADE foreign key constraint reverse_sql=[ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID);', diff --git a/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py b/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py index c76fa2043d3..bd7d53537ec 100644 --- a/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py +++ b/treeherder/perf/migrations/0044_perfdatum_bigint_fk.py @@ -3,6 +3,12 @@ to update the column and fake this migration. Migration perf.0045 will restore a valid django's schema. """ from django.db import migrations, connection +from django.conf import settings + +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + QUERY = "ALTER TABLE performance_datum MODIFY COLUMN id BIGINT(20) NOT NULL AUTO_INCREMENT" +else: + QUERY = "ALTER TABLE performance_datum ALTER COLUMN id TYPE BIGINT using id::bigint" def alter_perfdatum_pk(apps, schema_editor): @@ -18,9 +24,7 @@ def alter_perfdatum_pk(apps, schema_editor): if pursue.lower() not in ('', 'y', 'yes'): raise Exception("Aborting…") with connection.cursor() as cursor: - cursor.execute( - "ALTER TABLE performance_datum MODIFY COLUMN id BIGINT(20) NOT NULL AUTO_INCREMENT" - ) + cursor.execute(QUERY) return diff --git a/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py b/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py index e6e1289320c..72923dee797 100644 --- a/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py +++ b/treeherder/perf/migrations/0045_restore_perf_multicommitdatum_and_schema.py @@ -1,4 +1,5 @@ from django.db import migrations, models, connection +from django.conf import settings import django.db.models.deletion from django.db.utils import DatabaseError @@ -6,6 +7,10 @@ def check_perfdatum_pk(apps, schema_editor): """Ensure performance_datum FK has been updated to bigint type""" + # Not needed on postgresql + if settings.DATABASES['default']['ENGINE'] != 'django.db.backends.mysql': + return + with connection.cursor() as cursor: cursor.execute( "SELECT COLUMN_TYPE from INFORMATION_SCHEMA.COLUMNS WHERE " diff --git a/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py b/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py index 2dacc3b1533..4fbdcb76967 100644 --- a/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py +++ b/treeherder/perf/migrations/0046_restore_cascade_perf_datum_deletion.py @@ -2,9 +2,15 @@ It restores the DB side CASCADE deletion behavior for perf_multicommitdatum table toward performance_datum """ from django.db import migrations +from django.conf import settings MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan' +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + DROP_TYPE = 'FOREIGN KEY' +else: + DROP_TYPE = 'CONSTRAINT' + class Migration(migrations.Migration): dependencies = [ @@ -16,7 +22,7 @@ class Migration(migrations.Migration): # add ON DELETE CASCADE at database level [ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;', @@ -24,7 +30,7 @@ class Migration(migrations.Migration): # put back the non-CASCADE foreign key constraint reverse_sql=[ f'ALTER TABLE perf_multicommitdatum ' - f'DROP FOREIGN KEY {MULTICOMMIT_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};', f'ALTER TABLE perf_multicommitdatum ' f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID);', diff --git a/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py b/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py index 8551a344898..b74299ca01c 100644 --- a/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py +++ b/treeherder/perf/migrations/0050_cascade_perf_datum_deletion_replicate.py @@ -1,11 +1,17 @@ # Generated by Django 3.0.8 on 2020-12-11 14:42 from django.db import migrations +from django.conf import settings DATUM_REPLICATE_CONSTRAINT_SYMBOL = ( 'performance_datum_re_performance_datum_id_fe2ed518_fk_performan' ) +if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql': + DROP_TYPE = 'FOREIGN KEY' +else: + DROP_TYPE = 'CONSTRAINT' + class Migration(migrations.Migration): dependencies = [ @@ -17,7 +23,7 @@ class Migration(migrations.Migration): # add ON DELETE CASCADE at database level [ f'ALTER TABLE performance_datum_replicate ' - f'DROP FOREIGN KEY {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', f'ALTER TABLE performance_datum_replicate ' f'ADD CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (performance_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;', @@ -25,7 +31,7 @@ class Migration(migrations.Migration): # put back the non-CASCADE foreign key constraint reverse_sql=[ f'ALTER TABLE performance_datum_replicate ' - f'DROP FOREIGN KEY {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', + f'DROP {DROP_TYPE} {DATUM_REPLICATE_CONSTRAINT_SYMBOL};', f'ALTER TABLE performance_datum_replicate ' f'ADD CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL} ' f'FOREIGN KEY (performance_datum_id) REFERENCES performance_datum (ID);',