Skip to content

Commit

Permalink
Added logic to split nested parentheses and keep track of them better…
Browse files Browse the repository at this point in the history
…, with tests
  • Loading branch information
beveradb committed Dec 4, 2024
1 parent e05874e commit 2d8bc4d
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 15 deletions.
67 changes: 53 additions & 14 deletions karaoke_lyrics_processor/karaoke_lyrics_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,45 +196,84 @@ def process_line(self, line):
Process a single line to ensure it's within the maximum length,
handle parentheses, and replace non-printable spaces.
"""
# Replace non-printable spaces at the beginning
line = self.replace_non_printable_spaces(line)
# Clean up punctuation spacing
line = self.clean_punctuation_spacing(line)
# Fix commas inside quotes
line = self.fix_commas_inside_quotes(line)

processed_lines = []
iteration_count = 0
max_iterations = 100 # Failsafe limit

while len(line) > self.max_line_length:
if iteration_count > max_iterations:
self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")
break

while len(line) > self.max_line_length and iteration_count < max_iterations:
# Check if the line contains parentheses
if "(" in line and ")" in line:
start_paren = line.find("(")
end_paren = line.find(")") + 1
end_paren = self.find_matching_paren(line, start_paren)
if end_paren < len(line) and line[end_paren] == ",":
end_paren += 1

# Process text before parentheses if it exists
if start_paren > 0:
processed_lines.append(line[:start_paren].strip())
processed_lines.append(line[start_paren:end_paren].strip())
line = line[end_paren:].strip()
before_paren = line[:start_paren].strip()
processed_lines.extend(self.split_line(before_paren))

# Process text within parentheses
paren_content = line[start_paren : end_paren + 1].strip()
if len(paren_content) > self.max_line_length:
# Split the content within parentheses if it's too long
split_paren_content = self.split_line(paren_content)
processed_lines.extend(split_paren_content)
else:
processed_lines.append(paren_content)

line = line[end_paren + 1 :].strip()
else:
split_point = self.find_best_split_point(line)
processed_lines.append(line[:split_point].strip())
line = line[split_point:].strip()

iteration_count += 1

if line: # Add the remaining part if not empty
processed_lines.append(line)
if line: # Add any remaining part
processed_lines.extend(self.split_line(line))

if iteration_count >= max_iterations:
self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")

return processed_lines

def find_matching_paren(self, line, start_index):
"""
Find the index of the matching closing parenthesis for the opening parenthesis at start_index.
"""
stack = 0
for i in range(start_index, len(line)):
if line[i] == "(":
stack += 1
elif line[i] == ")":
stack -= 1
if stack == 0:
return i
return -1 # No matching parenthesis found

def split_line(self, line):
"""
Split a line into multiple lines if it exceeds the maximum length.
"""
if len(line) <= self.max_line_length:
return [line]

split_lines = []
while len(line) > self.max_line_length:
split_point = self.find_best_split_point(line)
split_lines.append(line[:split_point].strip())
line = line[split_point:].strip()

if line:
split_lines.append(line)

return split_lines

def process(self):
self.logger.info(f"Processing input lyrics from {self.input_filename}")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "karaoke-lyrics-processor"
version = "0.3.2"
version = "0.4.0"
description = "Process song lyrics to prepare them for karaoke video production, e.g. by splitting long lines"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand Down
92 changes: 92 additions & 0 deletions tests/test_karaoke_lyrics_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,98 @@ def test_commas_inside_quotes_with_no_commas(self):

self.assertEqual(result.strip(), expected_output.strip())

def test_long_content_within_parentheses(self):
input_lyrics = (
"This line has a very long (content inside parentheses that exceeds the maximum line length) and should be split correctly."
)
expected_output = [
"This line has a very long",
"(content inside parentheses that",
"exceeds the maximum line length)",
"and should be split correctly.",
]

self.processor.input_lyrics_lines = [input_lyrics]
result = self.processor.process()

self.assertEqual(result, "\n".join(expected_output))

def test_long_content_within_parentheses_at_start(self):
input_lyrics = (
"(This is a very long content inside parentheses that exceeds the maximum line length) and should be split correctly."
)
expected_output = [
"(This is a very long content inside",
"parentheses that exceeds",
"the maximum line length)",
"and should be split correctly.",
]

self.processor.input_lyrics_lines = [input_lyrics]
result = self.processor.process()

self.assertEqual(result, "\n".join(expected_output))

def test_long_content_within_parentheses_at_end(self):
input_lyrics = (
"This line should be split correctly with (a very long content inside parentheses that exceeds the maximum line length)."
)
expected_output = [
"This line should",
"be split correctly with",
"(a very long content inside",
"parentheses that exceeds",
"the maximum line length).",
]

self.processor.input_lyrics_lines = [input_lyrics]
result = self.processor.process()

self.assertEqual(result, "\n".join(expected_output))

def test_long_content_within_nested_parentheses(self):
input_lyrics = "This line has (nested (parentheses with very long content that exceeds the maximum line length)) and should be split correctly."
expected_output = [
"This line has",
"(nested (parentheses with very long",
"content that exceeds",
"the maximum line length))",
"and should be split correctly.",
]

self.processor.input_lyrics_lines = [input_lyrics]
result = self.processor.process()

self.assertEqual(result, "\n".join(expected_output))

def test_split_line_function(self):
# Directly test the split_line function
long_line = "This is a very long line that should be split into multiple lines because it exceeds the maximum line length."
expected_output = [
"This is a very long line that",
"should be split into multiple lines",
"because it exceeds",
"the maximum line length.",
]

result = self.processor.split_line(long_line)
self.assertEqual(result, expected_output)

def test_find_matching_paren(self):
# Test cases for find_matching_paren
test_cases = [
("(a (b) c)", 0, 8), # Simple nested
("(a (b (c) d) e)", 0, 14), # More complex nesting
("(a (b (c) d) e)", 3, 11), # Start from inner parenthesis
("No parentheses", 0, -1), # No parentheses
("(a (b (c) d) e", 0, -1), # Unmatched parenthesis
]

for line, start_index, expected in test_cases:
with self.subTest(line=line, start_index=start_index):
result = self.processor.find_matching_paren(line, start_index)
self.assertEqual(result, expected)


if __name__ == "__main__":
unittest.main()

0 comments on commit 2d8bc4d

Please sign in to comment.