From 3e213ca0089d7467908f74b992a7ae1f09c6de4f Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Fri, 13 Oct 2023 13:57:01 +0530 Subject: [PATCH] solving pipeline errors --- app/crud/files_crud.py | 42 ++++++++----------- app/test/test_file_ops.py | 85 ++++++++++++++++++++++----------------- requirements.txt | 2 +- 3 files changed, 67 insertions(+), 62 deletions(-) diff --git a/app/crud/files_crud.py b/app/crud/files_crud.py index 4d623cbc..3106a4b4 100644 --- a/app/crud/files_crud.py +++ b/app/crud/files_crud.py @@ -5,24 +5,20 @@ from dependencies import log -def extract_usj_chapter(converted_content: dict, chapter: int) -> dict: +def extract_dict_chapter(converted_content:dict, chapter:int) -> dict: '''Extracts just one chapter from the dict or JSON of usfm grammar''' - output_content = {"book": {}} - - if 'book' in converted_content: - for item in converted_content['book']: - if item != 'chapters': - output_content['book'][item] = converted_content['book'][item] - else: - output_content['book']['chapters'] = [] - for chapter_dict in converted_content['book']['chapters']: - if int(chapter_dict.get('chapterNumber', 0)) == chapter: - output_content['book']['chapters'].append(chapter_dict) - break - + output_content = {"book":{}} + for item in converted_content['book']: + if item != 'chapters': + output_content['book'][item] = converted_content['book'][item] + else: + output_content['book']['chapters'] = [] + for chapter_dict in converted_content['book']['chapters']: + if int(chapter_dict['chapterNumber']) == chapter: + output_content['book']['chapters'].append(chapter_dict) + break return output_content - def extract_list_chapter(converted_content: list, chapter:int) -> list: '''Extract the rows of specified chapter from usfm-grammar's list output''' output_content = [converted_content[0]] @@ -60,20 +56,15 @@ def extract_usx_chapter(converted_content, chapter:int): return output_content def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, - content_filter=usfm_grammar.Filter.PARAGRAPHS, # Updated filter name + content_filter=usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS, chapter=None): '''Tries to parse the input usfm and provide the output as per the filter and format''' usfm_parser = usfm_grammar.USFMParser(input_usfm) match output_format: case usfm_grammar.Format.JSON: - if content_filter == usfm_grammar.Filter.PARAGRAPHS: - output_content = usfm_parser.to_usj() - if chapter is not None: - output_content = extract_usj_chapter(output_content, chapter) - elif content_filter == usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS: - output_content = usfm_parser.to_usj() - if chapter is not None: - output_content = extract_usj_chapter(output_content, chapter) + output_content = usfm_parser.to_dict(content_filter) + if chapter is not None: + output_content = extract_dict_chapter(output_content, chapter) case usfm_grammar.Format.CSV: output_content = usfm_parser.to_list(content_filter) if chapter is not None: @@ -82,10 +73,11 @@ def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, case usfm_grammar.Format.ST: output_content = usfm_parser.to_syntax_tree() if chapter is not None: - log.warning("Not implemented chapter extractor for syntax_tree") + log.warning("Not implemented chapter extracter for syntax_tree") case usfm_grammar.Format.USX: output_content = usfm_parser.to_usx(content_filter) if chapter is not None: output_content = extract_usx_chapter(output_content, chapter) output_content = etree.tostring(output_content, encoding='unicode', pretty_print=True) #pylint: disable=I1101 return output_content + \ No newline at end of file diff --git a/app/test/test_file_ops.py b/app/test/test_file_ops.py index 3f918ee4..6c8e1ab6 100644 --- a/app/test/test_file_ops.py +++ b/app/test/test_file_ops.py @@ -1,3 +1,4 @@ +''' tests for file manipulation APIs''' from . import client from . import assert_input_validation_error, assert_not_available_content @@ -20,60 +21,72 @@ def test_usfm_to_json(): '''positive test to convert usfm to dict format''' for usfm_input in gospel_books_data: resp = client.put(f"{UNIT_URL}usfm/to/json", json=usfm_input) - - assert "type" in resp.json() - assert "version" in resp.json() - assert "content" in resp.json() - content = resp.json()["content"] - assert isinstance(content, list) - assert len(content) > 0 + assert "book" in resp.json() + assert "chapters" in resp.json()['book'] for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=paragraph", + resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-bcv", json=usfm_input) output = resp.json() - assert "type" in output - assert "content" in output - - # Iterate through the content to find chapters and verses - found_chapter = False + assert "book" in output + assert "chapters" in output['book'] + assert output['book']['chapters'][0]['chapterNumber'] found_verse = False - for content_item in output["content"]: - if content_item.get("type") == "chapter:c": - found_chapter = True - assert "number" in content_item - elif content_item.get("type") == "verse:v": + for content in output['book']['chapters'][0]['contents']: + if "verseNumber" in content and "verseText" in content: found_verse = True - assert "number" in content_item - assert "sid" in content_item - + break + assert found_verse + + for usfm_input in gospel_books_data: + resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-paragraph", + json=usfm_input) + output = resp.json() + assert "book" in output + assert "chapters" in output['book'] + assert output['book']['chapters'][0]['chapterNumber'] + found_para = False + found_verse = False + for content in output['book']['chapters'][0]['contents']: + if "paragraph" in content: + found_para = True + for item in content['paragraph']: + if "verseNumber" in item: + found_verse = True + break + assert found_para + assert found_verse + # chapter filter - resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", json=gospel_books_data[0]) + resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", + json=gospel_books_data[0]) output = resp.json() assert "book" in output + assert "chapters" in output['book'] + assert len(output['book']['chapters']) == 0 - # Check if 'book' has 'chapters' key and 'chapters' is an empty list for this particular chapter filter - if 'chapters' in output['book']: - assert len(output['book']['chapters']) == 0 - else: - - assert "book" in output - assert "chapters" not in output['book'] - + resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=2", + json=gospel_books_data[0]) + output = resp.json() + assert "book" in output + assert "chapters" in output['book'] + assert len(output['book']['chapters']) == 1 + assert int(output['book']['chapters'][0]['chapterNumber']) == 2 def test_usfm_to_table(): '''positive test to convert usfm to dict format''' for usfm_input in gospel_books_data: resp = client.put(f"{UNIT_URL}usfm/to/table", json=usfm_input) - assert "Book\tChapter" in resp.json() for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=paragraph", + resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-paragraph", json=usfm_input) - print("RESP.JSON",resp.json()) - assert "Book\tChapter\tVerse\tText\tType" in resp.json() - + assert "Book\tChapter\tType\tContent" in resp.json() + for usfm_input in gospel_books_data: + resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-bcv", + json=usfm_input) + assert "Book\tChapter\tVerse\tText" in resp.json() # chapter filter resp = client.put(f"{UNIT_URL}usfm/to/table?chapter=2", @@ -89,4 +102,4 @@ def test_usfm_to_usx(): print(resp.json()) assert resp.json().startswith("") - + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5268fa60..3829a51c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,5 @@ beautifulsoup4==4.11.1 starlette==0.27.0 pylint==2.16.1 jsonpickle==2.2.0 -usfm-grammar==3.0.0b2 +usfm-grammar==3.0.0a4 pytz==2023.3 \ No newline at end of file