From 7411d5edbf126db8d4257a4184595a1869501bef Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Wed, 7 Aug 2024 22:26:38 +0200 Subject: [PATCH] modify transcript function --- app/main.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/app/main.py b/app/main.py index 0c132d0..1b5e596 100755 --- a/app/main.py +++ b/app/main.py @@ -1924,15 +1924,28 @@ def remove_text_before_operator(text): def extract_names_and_descriptions(text): - # Define a regular expression pattern to match names and descriptions pattern = r'([A-Z][a-zA-Z\s]+):\s+(.*?)(?=\n[A-Z][a-zA-Z\s]+:|$)' matches = re.findall(pattern, text, re.DOTALL) extracted_data = [] + for match in matches: name = match[0].strip() description = match[1].strip() - # Append the current name and description to the list - extracted_data.append({'name': name, 'description': description}) + + # Split the description into sentences + sentences = re.split(r'(?<=[.!?])\s+', description) + + # Add line breaks every 3 sentences + formatted_description = "" + for i, sentence in enumerate(sentences, 1): + formatted_description += sentence + " " + if i % 3 == 0: + formatted_description += "

" + + formatted_description = formatted_description.strip() + + extracted_data.append({'name': name, 'description': formatted_description}) + return extracted_data