From 64c440c198b4fa08c0909557c17b7003348f659d Mon Sep 17 00:00:00 2001 From: marwoodandrew Date: Mon, 20 Jun 2016 17:15:00 +1000 Subject: [PATCH] [SD-4728] Attempt to match the case of the original text in the Replace American Words macro --- server/aap/macros/replace_words.py | 45 +++++++++++++++++++++++-- server/aap/macros/replace_words_test.py | 18 +++++++--- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/server/aap/macros/replace_words.py b/server/aap/macros/replace_words.py index b879bead8..7d5005f86 100644 --- a/server/aap/macros/replace_words.py +++ b/server/aap/macros/replace_words.py @@ -22,13 +22,52 @@ def find_and_replace(item, **kwargs): """ diff = {} + def repl(new, old): + """ + Returns a version of the "new" string that matches the case of the "old" string + :param new: + :param old: + :return: a string which is a version of "new" that matches the case of old. + """ + if old.islower(): + return new.lower() + elif old.isupper(): + return new.upper() + else: + # the old string starts with upper case so we use the title function + if old[:1].isupper(): + return new.title() + # it is more complex so try to match it + else: + result = '' + all_upper = True + for i, c in enumerate(old): + if i >= len(new): + break + if c.isupper(): + result += new[i].upper() + else: + result += new[i].lower() + all_upper = False + # append any remaining characters from new + if all_upper: + result += new[i + 1:].upper() + else: + result += new[i + 1:].lower() + return result + def do_find_replace(input_string, words_list): for word in words_list: pattern = r'{}'.format(re.escape(word.get('existing', ''))) - if re.search(pattern, input_string, flags=re.IGNORECASE): - diff[word.get('existing', '')] = word.get('replacement', '') - input_string = re.sub(pattern, word.get('replacement', ''), input_string, flags=re.IGNORECASE) + while re.search(pattern, input_string, flags=re.IGNORECASE): + # get the original string from the input + original = re.search(pattern, input_string, flags=re.IGNORECASE).group(0) + replacement = repl(word.get('replacement', ''), original) + if original == replacement: + break + diff[original] = replacement + input_string = input_string.replace(original, replacement) return input_string diff --git a/server/aap/macros/replace_words_test.py b/server/aap/macros/replace_words_test.py index c7b16ca99..62e0f1d38 100644 --- a/server/aap/macros/replace_words_test.py +++ b/server/aap/macros/replace_words_test.py @@ -44,9 +44,9 @@ def test_find_replace_words_multiple_words(self): result, diff = self.under_test_find_and_replace(item) self.assertEqual(result['slugline'], 'this is colour') - self.assertEqual(result['headline'], 'colour is bad.') + self.assertEqual(result['headline'], 'Colour is bad.') self.assertEqual(result['body_html'], 'this is colour. tyre is great.') - self.assertDictEqual(diff, {'color': 'colour', 'tire': 'tyre'}) + self.assertDictEqual(diff, {'ColOr': 'Colour', 'color': 'colour', 'tire': 'tyre'}) def test_find_replace_words_same_words(self): item = { @@ -54,8 +54,8 @@ def test_find_replace_words_same_words(self): } result, diff = self.under_test_find_and_replace(item) - self.assertEqual(result['body_html'], 'centre is great. centre is far') - self.assertDictEqual(diff, {'center': 'centre'}) + self.assertEqual(result['body_html'], 'Centre is great. Centre is far') + self.assertDictEqual(diff, {'Center': 'Centre'}) def test_find_replace_words(self): item = { @@ -78,3 +78,13 @@ def test_find_replace_words_ending_with_dots(self): self.assertEqual(result['body_html'], 'George W Bush centre opens at 5 am') self.assertEqual(result['slugline'], 'George W Bush') self.assertDictEqual(diff, {'a.m.': 'am', 'George W. Bush': 'George W Bush', 'center': 'centre'}) + + def test_repetition_of_words_with_different_case(self): + item = {'body_html': 'Color color COLOR cOlOr'} + result, diff = self.under_test_find_and_replace(item) + self.assertEqual(result['body_html'], 'Colour colour COLOUR cOlOur') + + def test_partial_word(self): + item = {'body_html': 'Something coloring it red'} + result, diff = self.under_test_find_and_replace(item) + self.assertEqual(result['body_html'], 'Something colouring it red')