Skip to content

Commit

Permalink
Merge pull request #97 from robinwhittleton/nbsp-support
Browse files Browse the repository at this point in the history
Add an option to preserve the input space characters
  • Loading branch information
ppannuto authored Apr 6, 2024
2 parents 418c57c + 027dd21 commit 42f43e2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
18 changes: 15 additions & 3 deletions titlecase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,18 @@ def set_small_word_list(small=SMALL):
SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small)


def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False):
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, normalise_space_characters=False):
"""
:param text: Titlecases input text
:param callback: Callback function that returns the titlecase version of a specific word
:param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing
:param preserve_blank_lines: Preserve blank lines in the output
:param normalise_space_characters: Convert all original spaces to normal space characters
:type text: str
:type callback: function
:type small_first_last: bool
:type preserve_blank_lines: bool
:type normalise_space_characters: bool
This filter changes all words to Title Caps, and attempts to be clever
about *un*capitalizing SMALL words like a/an/the in the input.
Expand All @@ -100,7 +104,9 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
processed = []
for line in lines:
all_caps = line.upper() == line
words = regex.split('[\t ]', line)
split_line = regex.split(r'(\s)', line)
words = split_line[::2]
spaces = split_line[1::2]
tc_line = []
for word in words:
if callback:
Expand Down Expand Up @@ -188,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
lambda m: m.group(0).capitalize(), tc_line[-1]
)

result = " ".join(tc_line)
if normalise_space_characters:
result = " ".join(tc_line)
else:
line_to_be_joined = tc_line + spaces
line_to_be_joined[::2] = tc_line
line_to_be_joined[1::2] = spaces
result = "".join(line_to_be_joined)

result = SUBPHRASE.sub(lambda m: '%s%s' % (
m.group(1),
Expand Down
16 changes: 15 additions & 1 deletion titlecase/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from titlecase import titlecase, set_small_word_list, create_wordlist_filter_from_file


# (executed by `test_input_output` below)
# (executed by `test_specific_string` below)
TEST_DATA = (
(
"",
Expand Down Expand Up @@ -307,6 +307,10 @@
"Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person",
"Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person",
),
(
"a mix of\tdifferent\u200aspace\u2006characters",
"A Mix of\tDifferent\u200aSpace\u2006Characters",
),
)


Expand Down Expand Up @@ -429,6 +433,16 @@ def test_complex_blanks(self):
self.assertEqual(titlecase(s, preserve_blank_lines=True),
'\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n')

class TestNormaliseSpaceCharacters(unittest.TestCase):
def test_tabs(self):
s = 'text\twith\ttabs'
self.assertEqual(titlecase(s), 'Text\tWith\tTabs')
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Tabs')

def test_nbsps(self):
s = 'text with nonbreaking spaces'
self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces')
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Nonbreaking Spaces')

if __name__ == '__main__':
unittest.main()

0 comments on commit 42f43e2

Please sign in to comment.