Skip to content

Commit

Permalink
Initial OpenSearch swapout
Browse files Browse the repository at this point in the history
  • Loading branch information
jslay-excella committed May 17, 2022
2 parents 899e6fa + e6e5890 commit 4e4c5db
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 7 deletions.
2 changes: 1 addition & 1 deletion cfgov/ask_cfpb/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_ask_search_autocomplete_honors_max_chars(self, mock_search):
self.client.get(
reverse("ask-autocomplete-en"), {"term": too_long_term}
)
self.assertTrue(mock_search.called_with(valid_term))
self.assertTrue(mock_search.called_with(too_long_term))

@mock.patch.object(AnswerPageDocument, "search")
def test_ask_search_autocomplete(self, mock_search):
Expand Down
32 changes: 29 additions & 3 deletions cfgov/core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_add_link_markup_anchor(self):
)

def check_external_link(
self, url, expected_href=None, expected_pretty_href=None
self, url, expected_href=None, expected_pretty_href=None, is_gov=False
):
tag = f'<a href="{url}">foo</a>'
path = "/about-us/blog/"
Expand All @@ -144,7 +144,7 @@ def check_external_link(
expected_pretty_href = expected_pretty_href or url

# .gov URLs don't get a data-pretty-href attribute
if ".gov" not in url:
if not is_gov:
data_pretty_href = f'data-pretty-href="{expected_pretty_href}" '

expected_html = (
Expand All @@ -159,9 +159,35 @@ def check_external_link(

self.assertEqual(add_link_markup(tag, path), str(expected_tag))

def test_govdelivery_url1(self):
url = "https://public.govdelivery.com"
self.check_external_link(url, expected_href=signed_redirect(url))

def test_govdelivery_url2(self):
url = "https://www.govdelivery.com"
self.check_external_link(url, expected_href=signed_redirect(url))

def test_govdelivery_url3(self):
url = "https://www.govdelivery.com/something"
self.check_external_link(url, expected_href=signed_redirect(url))

def test_dot_gov_urls(self):
url = "https://www.federalreserve.gov"
self.check_external_link(url, expected_href=url)
self.check_external_link(
url, expected_href=url, expected_pretty_href=None, is_gov=True
)

def test_dot_gov_urls2(self):
url = "https://www.federalreserve.gov/something"
self.check_external_link(
url, expected_href=url, expected_pretty_href=None, is_gov=True
)

def test_content_cfgov(self):
url = "http://content.cfpb.gov"
tag = "<a href='{}'>foo</a>".format(url)
path = "/"
self.assertIsNone(add_link_markup(tag, path))

def test_urls_with_gov_in_them(self):
url = "https://www.realgovsite.lol"
Expand Down
17 changes: 15 additions & 2 deletions cfgov/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@
NON_GOV_LINKS = re.compile(
r"https?:\/\/(?:www\.)?(?![^\?]+\.gov)(?!(content\.)?localhost).*"
)

NON_CFPB_LINKS = re.compile(
r"(https?:\/\/(?:www\.)?(?![^\?]*(cfpb|consumerfinance).gov)"
r"(?!(content\.)?localhost).*)"
)

LINK_PATTERN = re.compile(
r"^(?P<schema>https?)://(?P<domain>[^/:]+):?(?P<port>\d+)?(?P<path>/?.*)?$"
)

DOWNLOAD_LINKS = re.compile(
r"(?i)(\.pdf|\.doc|\.docx|\.xls|\.xlsx|\.csv|\.zip)$"
)
Expand Down Expand Up @@ -62,6 +68,13 @@
]


def should_interstitial(url: str) -> bool:
match = LINK_PATTERN.match(url)
if match.group("domain").endswith(".gov") and NON_CFPB_LINKS.match(url):
return False
return True


def sign_url(url):
signer = Signer(sep="||")
url, signature = signer.sign(url).split("||")
Expand Down Expand Up @@ -162,7 +175,7 @@ def add_link_markup(tag, request_path):
elif NON_CFPB_LINKS.match(href):
# Sets the icon to indicate you're leaving consumerfinance.gov
icon = "external-link"
if NON_GOV_LINKS.match(href):
if should_interstitial(href):
# Add pretty URL for print styles
tag["data-pretty-href"] = href
# Add the redirect notice as well
Expand Down Expand Up @@ -190,7 +203,7 @@ def add_link_markup(tag, request_path):
return str(tag)

if not icon:
return None
return

icon_classes = {"class": LINK_ICON_TEXT_CLASSES}
spans = tag.findAll("span", icon_classes)
Expand Down
2 changes: 1 addition & 1 deletion refresh-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ refresh_data() {

update_index() {
echo 'Updating search indexes'
./cfgov/manage.py opensearh index --force rebuild
./cfgov/manage.py opensearch index --force rebuild
}

get_data() {
Expand Down

0 comments on commit 4e4c5db

Please sign in to comment.