Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenSearch DSL #7068

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ jobs:
env:
TEST_RUNNER: core.testutils.runners.StdoutCapturingTestRunner
ES_PORT: ${{ job.services.elasticsearch.ports[9200] }}
ES_SCHEMA: http

- name: Prepare test coverage
if: steps.filter.outputs.backend == 'true' && matrix.toxenv == 'unittest'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/functional-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
uses: cypress-io/github-action@v2
env:
MAPBOX_ACCESS_TOKEN: ${{ secrets.MAPBOX_ACCESS_TOKEN }}
ES_SCHEMA: http
with:
spec: |
test/cypress/integration/admin/admin.js
Expand Down
9 changes: 5 additions & 4 deletions cfgov/ask_cfpb/documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from ask_cfpb.models.answer_page import AnswerPage
from search.elasticsearch_helpers import (
Expand All @@ -19,8 +19,8 @@ class AnswerPageDocument(Document):
url = fields.TextField()
preview = fields.TextField(attr="answer_content_preview")

def get_queryset(self):
query_set = super().get_queryset()
def get_queryset(self, *args, **kwargs):
query_set = super().get_queryset(*args, **kwargs)
return query_set.filter(live=True, redirect_to_page=None)

def prepare_autocomplete(self, instance):
Expand All @@ -41,6 +41,7 @@ def prepare_url(self, instance):
class Index:
name = environment_specific_index("ask-cfpb")
settings = {"number_of_shards": 1, "number_of_replicas": 0}
auto_refresh = False

class Django:
model = AnswerPage
Expand Down
2 changes: 1 addition & 1 deletion cfgov/ask_cfpb/models/search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from elasticsearch.exceptions import RequestError
from opensearchpy.exceptions import RequestError

from ask_cfpb.documents import AnswerPageDocument
from search.models import AUTOCOMPLETE_MAX_CHARS
Expand Down
23 changes: 12 additions & 11 deletions cfgov/ask_cfpb/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from django.apps import apps
from django.db import models
from django.test import TestCase
from django.test import TestCase, override_settings

from wagtail.core.models import Site

from django_elasticsearch_dsl import fields
from django_elasticsearch_dsl.documents import DocType
from django_elasticsearch_dsl.exceptions import ModelFieldNotMappedError
from django_opensearch_dsl import fields
from django_opensearch_dsl.documents import Document
from django_opensearch_dsl.exceptions import ModelFieldNotMappedError
from model_bakery import baker

from ask_cfpb.documents import AnswerPageDocument
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_ignore_signal_default(self):
self.assertFalse(AnswerPageDocument.django.ignore_signals)

def test_auto_refresh_default(self):
self.assertFalse(AnswerPageDocument.django.auto_refresh)
self.assertFalse(AnswerPageDocument.Index.auto_refresh)

def test_fields_populated(self):
mapping = AnswerPageDocument._doc_type.mapping
Expand All @@ -121,7 +121,7 @@ def test_fields_populated(self):
)

def test_to_field(self):
doc = DocType()
doc = Document()
for f in ["question", "statement"]:
nameField = doc.to_field(f, AnswerPage._meta.get_field(f))
self.assertIsInstance(nameField, fields.TextField)
Expand All @@ -142,7 +142,7 @@ def test_to_field(self):
self.assertEqual(intField._path, ["featured_rank"])

def test_to_field_with_unknown_field(self):
doc = DocType()
doc = Document()
with self.assertRaises(ModelFieldNotMappedError):
doc.to_field(
"answer_base", AnswerPage._meta.get_field("answer_base")
Expand Down Expand Up @@ -213,10 +213,11 @@ def test_prepare_es(self):
},
)

@override_settings(OPENSEARCH_DSL_AUTO_REFRESH=True)
def test_model_instance_update_no_refresh(self):
self.es_parent_page.add_child(instance=self.es_page)
self.es_page.save_revision().publish()
self.doc.django.auto_refresh = False
with patch("django_elasticsearch_dsl.documents.bulk") as mock:
self.doc.update(self.es_page)
self.assertNotIn("refresh", mock.call_args_list[0][1])
self.doc.Index.auto_refresh = False
with patch("django_opensearch_dsl.documents.bulk") as mock:
self.doc.update(self.es_page, "update")
self.assertFalse(mock.call_args_list[0][1]["refresh"])
2 changes: 1 addition & 1 deletion cfgov/ask_cfpb/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from django.test import TestCase, override_settings
from django.urls import reverse

from elasticsearch.exceptions import RequestError
from opensearchpy.exceptions import RequestError

from ask_cfpb.documents import AnswerPageDocument
from ask_cfpb.models import ENGLISH_PARENT_SLUG, SPANISH_PARENT_SLUG
Expand Down
26 changes: 18 additions & 8 deletions cfgov/cfgov/settings/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import json
import os
import secrets
from pathlib import Path

from django.conf import global_settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.translation import gettext_lazy as _

import dj_database_url
from elasticsearch import RequestsHttpConnection
from opensearchpy import RequestsHttpConnection
from requests_aws4auth import AWS4Auth

from cfgov.util import admin_emails
Expand Down Expand Up @@ -99,7 +98,7 @@
"form_explainer.apps.FormExplainerConfig",
"teachers_digital_platform",
"wagtailmedia",
"django_elasticsearch_dsl",
"django_opensearch_dsl",
"corsheaders",
"login",
"filing_instruction_guide",
Expand Down Expand Up @@ -323,10 +322,12 @@
)

# ElasticSearch 7 Configuration
TESTING = False
ES_SCHEMA = os.getenv("ES_SCHEMA", "http")
ES_HOST = os.getenv("ES_HOST", "localhost")
ES_PORT = os.getenv("ES_PORT", "9200")
ELASTICSEARCH_BIGINT = 50000
ELASTICSEARCH_DEFAULT_ANALYZER = "snowball"
OPENSEARCH_BIGINT = 50000
OPENSEARCH_DEFAULT_ANALYZER = "snowball"

if os.environ.get("USE_AWS_ES", False):
awsauth = AWS4Auth(
Expand All @@ -335,7 +336,7 @@
"us-east-1",
"es",
)
ELASTICSEARCH_DSL = {
OPENSEARCH_DSL = {
"default": {
"hosts": [{"host": ES_HOST, "port": 443}],
"http_auth": awsauth,
Expand All @@ -345,9 +346,18 @@
},
}
else:
ELASTICSEARCH_DSL = {"default": {"hosts": f"http://{ES_HOST}:{ES_PORT}"}}
OPENSEARCH_DSL = {
"default": {
"hosts": f"{ES_SCHEMA}://{ES_HOST}:{ES_PORT}",
"http_auth": (
os.getenv("ES_USER", "admin"),
os.getenv("ES_PASS", "admin"),
),
"verify_certs": False,
}
}

ELASTICSEARCH_DSL_SIGNAL_PROCESSOR = (
OPENSEARCH_DSL_SIGNAL_PROCESSOR = (
"search.elasticsearch_helpers.WagtailSignalProcessor"
)

Expand Down
4 changes: 2 additions & 2 deletions cfgov/cfgov/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
# test runner cleans up this directory after the tests run.
MEDIA_ROOT = os.path.join(PROJECT_ROOT, "cfgov", "tests", "test-media")

ELASTICSEARCH_DSL_AUTO_REFRESH = False
ELASTICSEARCH_DSL_AUTOSYNC = False
OPENSEARCH_DSL_AUTO_REFRESH = False
OPENSEARCH_DSL_AUTOSYNC = False

if os.getenv("SKIP_DJANGO_MIGRATIONS"):

Expand Down
8 changes: 4 additions & 4 deletions cfgov/paying_for_college/documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.urls import reverse

from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from paying_for_college.models import School
from search.elasticsearch_helpers import (
Expand All @@ -18,9 +18,9 @@ class SchoolDocument(Document):
url = fields.TextField()
nicknames = fields.TextField()

def get_queryset(self):
def get_queryset(self, *args, **kwargs):
"""Prevent schools that have closed from being indexed."""
query_set = super().get_queryset()
query_set = super().get_queryset(*args, **kwargs)
return query_set.filter(operating=True)

def prepare_autocomplete(self, instance):
Expand Down
4 changes: 2 additions & 2 deletions cfgov/regulations3k/documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from regulations3k.models import SectionParagraph
from search.elasticsearch_helpers import environment_specific_index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.core.management import call_command
from django.core.management.base import BaseCommand

from regulations3k.documents import SectionParagraphDocument
from regulations3k.models import Part, Section


Expand All @@ -12,7 +13,19 @@
def _run_elasticsearch_rebuild():
"""Rebuild the Elasticsearch index after prepping section paragraphs."""
call_command(
"search_index", "--rebuild", "-f", "--models", "regulations3k"
"opensearch",
"index",
"--force",
"rebuild",
SectionParagraphDocument.Index.name,
)
call_command(
"opensearch",
"document",
"--force",
"--refresh",
"-i",
SectionParagraphDocument.Index.name,
)


Expand Down
30 changes: 21 additions & 9 deletions cfgov/search/elasticsearch_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
pre_page_move,
)

from django_elasticsearch_dsl.signals import BaseSignalProcessor
from elasticsearch_dsl import analyzer, token_filter, tokenizer
from django_opensearch_dsl.signals import BaseSignalProcessor
from opensearch_dsl import analyzer, token_filter, tokenizer

from search.models import Synonym

Expand Down Expand Up @@ -130,14 +130,14 @@ def setUpClass(cls):
# See https://github.com/django-es/django-elasticsearch-dsl/pull/323
# for a proposed pull request to django-elasticsearch-dsl to support
# this blocking behavior.
from elasticsearch.helpers import bulk as original_bulk
from opensearchpy.helpers import bulk as original_bulk

def bulk_with_refresh(*args, **kwargs):
kwargs.setdefault("refresh", True)
return original_bulk(*args, **kwargs)

cls.patched_es_bulk = patch(
"django_elasticsearch_dsl.documents.bulk", new=bulk_with_refresh
"django_opensearch_dsl.documents.bulk", new=bulk_with_refresh
)
cls.patched_es_bulk.start()

Expand All @@ -150,18 +150,30 @@ def tearDownClass(cls):
cls.patched_es_bulk.stop()

@staticmethod
def rebuild_elasticsearch_index(*models, stdout=sys.stdout):
def rebuild_elasticsearch_index(*indices, stdout=sys.stdout):
"""Rebuild an Elasticsearch index, waiting for its completion.

This method is an alias for the built-in search_index Django management
command provided by django-elasticsearch-dsl.
command provided by django-opensearch-dsl.

"""
call_command(
"search_index",
action="rebuild",
"opensearch",
"index",
"rebuild",
*indices,
force=True,
stdout=stdout,
)
if indices:
indices = ("-i", *indices)
call_command(
"opensearch",
"document",
"index",
"--refresh",
*indices,
force=True,
models=models,
stdout=stdout,
)

Expand Down
2 changes: 1 addition & 1 deletion cfgov/search/management/commands/es_health.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.core.management.base import BaseCommand, CommandError

from elasticsearch_dsl import connections
from opensearch_dsl import connections


class Command(BaseCommand):
Expand Down
2 changes: 1 addition & 1 deletion cfgov/search/tests/management/commands/test_es_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_bad_connection_name(self):
with self.assertRaises(CommandError):
call_command("es_health", "notarealconnection", stdout=StringIO())

@mock.patch("elasticsearch_dsl.connections.get_connection")
@mock.patch("opensearch_dsl.connections.get_connection")
def test_foo(self, mock_es_get_connection):
mock_elasticsearch = mock.MagicMock()
mock_elasticsearch.cat.health.return_value = "health table"
Expand Down
10 changes: 6 additions & 4 deletions cfgov/teachers_digital_platform/documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from search.elasticsearch_helpers import environment_specific_index, strip_html
from teachers_digital_platform.models.pages import ActivityPage
Expand Down Expand Up @@ -55,10 +55,12 @@ class Django:
model = ActivityPage
fields = ["id"]

def get_queryset(self):
def get_queryset(self, *args, **kwargs):
"""Prevent non-live pages from being indexed."""
return (
super(ActivityPageDocument, self).get_queryset().filter(live=True)
super(ActivityPageDocument, self)
.get_queryset(*args, **kwargs)
.filter(live=True)
)

def prepare_activity_duration(self, instance):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)
from wagtail.core.fields import StreamField

from elasticsearch_dsl import Q
from opensearch_dsl import Q

from teachers_digital_platform.documents import ActivityPageDocument
from teachers_digital_platform.models.django import (
Expand Down
Loading