Skip to content

Commit

Permalink
solving pipeline errors
Browse files Browse the repository at this point in the history
  • Loading branch information
AthulyaMS committed Oct 13, 2023
1 parent 520e649 commit 3e213ca
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 62 deletions.
42 changes: 17 additions & 25 deletions app/crud/files_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@

from dependencies import log

def extract_usj_chapter(converted_content: dict, chapter: int) -> dict:
def extract_dict_chapter(converted_content:dict, chapter:int) -> dict:
'''Extracts just one chapter from the dict or JSON of usfm grammar'''
output_content = {"book": {}}

if 'book' in converted_content:
for item in converted_content['book']:
if item != 'chapters':
output_content['book'][item] = converted_content['book'][item]
else:
output_content['book']['chapters'] = []
for chapter_dict in converted_content['book']['chapters']:
if int(chapter_dict.get('chapterNumber', 0)) == chapter:
output_content['book']['chapters'].append(chapter_dict)
break

output_content = {"book":{}}
for item in converted_content['book']:
if item != 'chapters':
output_content['book'][item] = converted_content['book'][item]
else:
output_content['book']['chapters'] = []
for chapter_dict in converted_content['book']['chapters']:
if int(chapter_dict['chapterNumber']) == chapter:
output_content['book']['chapters'].append(chapter_dict)
break
return output_content


def extract_list_chapter(converted_content: list, chapter:int) -> list:
'''Extract the rows of specified chapter from usfm-grammar's list output'''
output_content = [converted_content[0]]
Expand Down Expand Up @@ -60,20 +56,15 @@ def extract_usx_chapter(converted_content, chapter:int):
return output_content

def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON,
content_filter=usfm_grammar.Filter.PARAGRAPHS, # Updated filter name
content_filter=usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS,
chapter=None):
'''Tries to parse the input usfm and provide the output as per the filter and format'''
usfm_parser = usfm_grammar.USFMParser(input_usfm)
match output_format:
case usfm_grammar.Format.JSON:
if content_filter == usfm_grammar.Filter.PARAGRAPHS:
output_content = usfm_parser.to_usj()
if chapter is not None:
output_content = extract_usj_chapter(output_content, chapter)
elif content_filter == usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS:
output_content = usfm_parser.to_usj()
if chapter is not None:
output_content = extract_usj_chapter(output_content, chapter)
output_content = usfm_parser.to_dict(content_filter)
if chapter is not None:
output_content = extract_dict_chapter(output_content, chapter)
case usfm_grammar.Format.CSV:
output_content = usfm_parser.to_list(content_filter)
if chapter is not None:
Expand All @@ -82,10 +73,11 @@ def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON,
case usfm_grammar.Format.ST:
output_content = usfm_parser.to_syntax_tree()
if chapter is not None:
log.warning("Not implemented chapter extractor for syntax_tree")
log.warning("Not implemented chapter extracter for syntax_tree")
case usfm_grammar.Format.USX:
output_content = usfm_parser.to_usx(content_filter)
if chapter is not None:
output_content = extract_usx_chapter(output_content, chapter)
output_content = etree.tostring(output_content, encoding='unicode', pretty_print=True) #pylint: disable=I1101
return output_content

85 changes: 49 additions & 36 deletions app/test/test_file_ops.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
''' tests for file manipulation APIs'''

from . import client
from . import assert_input_validation_error, assert_not_available_content
Expand All @@ -20,60 +21,72 @@ def test_usfm_to_json():
'''positive test to convert usfm to dict format'''
for usfm_input in gospel_books_data:
resp = client.put(f"{UNIT_URL}usfm/to/json", json=usfm_input)

assert "type" in resp.json()
assert "version" in resp.json()
assert "content" in resp.json()
content = resp.json()["content"]
assert isinstance(content, list)
assert len(content) > 0
assert "book" in resp.json()
assert "chapters" in resp.json()['book']

for usfm_input in gospel_books_data:
resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=paragraph",
resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-bcv",
json=usfm_input)
output = resp.json()
assert "type" in output
assert "content" in output

# Iterate through the content to find chapters and verses
found_chapter = False
assert "book" in output
assert "chapters" in output['book']
assert output['book']['chapters'][0]['chapterNumber']
found_verse = False
for content_item in output["content"]:
if content_item.get("type") == "chapter:c":
found_chapter = True
assert "number" in content_item
elif content_item.get("type") == "verse:v":
for content in output['book']['chapters'][0]['contents']:
if "verseNumber" in content and "verseText" in content:
found_verse = True
assert "number" in content_item
assert "sid" in content_item

break
assert found_verse

for usfm_input in gospel_books_data:
resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-paragraph",
json=usfm_input)
output = resp.json()
assert "book" in output
assert "chapters" in output['book']
assert output['book']['chapters'][0]['chapterNumber']
found_para = False
found_verse = False
for content in output['book']['chapters'][0]['contents']:
if "paragraph" in content:
found_para = True
for item in content['paragraph']:
if "verseNumber" in item:
found_verse = True
break
assert found_para
assert found_verse

# chapter filter
resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", json=gospel_books_data[0])
resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10",
json=gospel_books_data[0])
output = resp.json()
assert "book" in output
assert "chapters" in output['book']
assert len(output['book']['chapters']) == 0

# Check if 'book' has 'chapters' key and 'chapters' is an empty list for this particular chapter filter
if 'chapters' in output['book']:
assert len(output['book']['chapters']) == 0
else:

assert "book" in output
assert "chapters" not in output['book']

resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=2",
json=gospel_books_data[0])
output = resp.json()
assert "book" in output
assert "chapters" in output['book']
assert len(output['book']['chapters']) == 1
assert int(output['book']['chapters'][0]['chapterNumber']) == 2


def test_usfm_to_table():
'''positive test to convert usfm to dict format'''
for usfm_input in gospel_books_data:
resp = client.put(f"{UNIT_URL}usfm/to/table", json=usfm_input)

assert "Book\tChapter" in resp.json()
for usfm_input in gospel_books_data:
resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=paragraph",
resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-paragraph",
json=usfm_input)
print("RESP.JSON",resp.json())
assert "Book\tChapter\tVerse\tText\tType" in resp.json()

assert "Book\tChapter\tType\tContent" in resp.json()
for usfm_input in gospel_books_data:
resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-bcv",
json=usfm_input)
assert "Book\tChapter\tVerse\tText" in resp.json()

# chapter filter
resp = client.put(f"{UNIT_URL}usfm/to/table?chapter=2",
Expand All @@ -89,4 +102,4 @@ def test_usfm_to_usx():
print(resp.json())
assert resp.json().startswith("<usx")
assert resp.json().strip().endswith("</usx>")

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ beautifulsoup4==4.11.1
starlette==0.27.0
pylint==2.16.1
jsonpickle==2.2.0
usfm-grammar==3.0.0b2
usfm-grammar==3.0.0a4
pytz==2023.3

0 comments on commit 3e213ca

Please sign in to comment.