Skip to content

Commit

Permalink
[SD-4728] Attempt to match the case of the original text in the Repla…
Browse files Browse the repository at this point in the history
…ce American Words macro
  • Loading branch information
marwoodandrew committed Jun 20, 2016
1 parent 904df81 commit 64c440c
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 7 deletions.
45 changes: 42 additions & 3 deletions server/aap/macros/replace_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,52 @@ def find_and_replace(item, **kwargs):
"""
diff = {}

def repl(new, old):
"""
Returns a version of the "new" string that matches the case of the "old" string
:param new:
:param old:
:return: a string which is a version of "new" that matches the case of old.
"""
if old.islower():
return new.lower()
elif old.isupper():
return new.upper()
else:
# the old string starts with upper case so we use the title function
if old[:1].isupper():
return new.title()
# it is more complex so try to match it
else:
result = ''
all_upper = True
for i, c in enumerate(old):
if i >= len(new):
break
if c.isupper():
result += new[i].upper()
else:
result += new[i].lower()
all_upper = False
# append any remaining characters from new
if all_upper:
result += new[i + 1:].upper()
else:
result += new[i + 1:].lower()
return result

def do_find_replace(input_string, words_list):
for word in words_list:
pattern = r'{}'.format(re.escape(word.get('existing', '')))

if re.search(pattern, input_string, flags=re.IGNORECASE):
diff[word.get('existing', '')] = word.get('replacement', '')
input_string = re.sub(pattern, word.get('replacement', ''), input_string, flags=re.IGNORECASE)
while re.search(pattern, input_string, flags=re.IGNORECASE):
# get the original string from the input
original = re.search(pattern, input_string, flags=re.IGNORECASE).group(0)
replacement = repl(word.get('replacement', ''), original)
if original == replacement:
break
diff[original] = replacement
input_string = input_string.replace(original, replacement)

return input_string

Expand Down
18 changes: 14 additions & 4 deletions server/aap/macros/replace_words_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,18 @@ def test_find_replace_words_multiple_words(self):

result, diff = self.under_test_find_and_replace(item)
self.assertEqual(result['slugline'], 'this is colour')
self.assertEqual(result['headline'], 'colour is bad.')
self.assertEqual(result['headline'], 'Colour is bad.')
self.assertEqual(result['body_html'], 'this is colour. tyre is great.')
self.assertDictEqual(diff, {'color': 'colour', 'tire': 'tyre'})
self.assertDictEqual(diff, {'ColOr': 'Colour', 'color': 'colour', 'tire': 'tyre'})

def test_find_replace_words_same_words(self):
item = {
'body_html': 'Center is great. Center is far'
}

result, diff = self.under_test_find_and_replace(item)
self.assertEqual(result['body_html'], 'centre is great. centre is far')
self.assertDictEqual(diff, {'center': 'centre'})
self.assertEqual(result['body_html'], 'Centre is great. Centre is far')
self.assertDictEqual(diff, {'Center': 'Centre'})

def test_find_replace_words(self):
item = {
Expand All @@ -78,3 +78,13 @@ def test_find_replace_words_ending_with_dots(self):
self.assertEqual(result['body_html'], 'George W Bush centre opens at 5 am')
self.assertEqual(result['slugline'], 'George W Bush')
self.assertDictEqual(diff, {'a.m.': 'am', 'George W. Bush': 'George W Bush', 'center': 'centre'})

def test_repetition_of_words_with_different_case(self):
item = {'body_html': 'Color color COLOR cOlOr'}
result, diff = self.under_test_find_and_replace(item)
self.assertEqual(result['body_html'], 'Colour colour COLOUR cOlOur')

def test_partial_word(self):
item = {'body_html': 'Something coloring it red'}
result, diff = self.under_test_find_and_replace(item)
self.assertEqual(result['body_html'], 'Something colouring it red')

0 comments on commit 64c440c

Please sign in to comment.