solving pipeline errors

Bridgeconn · Oct 13, 2023 · 3e213ca · 3e213ca
1 parent 520e649
commit 3e213ca
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 62 deletions.
diff --git a/app/crud/files_crud.py b/app/crud/files_crud.py
@@ -5,24 +5,20 @@
 
 from dependencies import log
 
-def extract_usj_chapter(converted_content: dict, chapter: int) -> dict:
+def extract_dict_chapter(converted_content:dict, chapter:int) -> dict:
     '''Extracts just one chapter from the dict or JSON of usfm grammar'''
-    output_content = {"book": {}}
-
-    if 'book' in converted_content:
-        for item in converted_content['book']:
-            if item != 'chapters':
-                output_content['book'][item] = converted_content['book'][item]
-            else:
-                output_content['book']['chapters'] = []
-                for chapter_dict in converted_content['book']['chapters']:
-                    if int(chapter_dict.get('chapterNumber', 0)) == chapter:
-                        output_content['book']['chapters'].append(chapter_dict)
-                        break
-
+    output_content = {"book":{}}
+    for item in converted_content['book']:
+        if item != 'chapters':
+            output_content['book'][item] = converted_content['book'][item]
+        else:
+            output_content['book']['chapters'] = []
+            for chapter_dict in converted_content['book']['chapters']:
+                if int(chapter_dict['chapterNumber']) == chapter:
+                    output_content['book']['chapters'].append(chapter_dict)
+                    break
     return output_content
 
-
 def extract_list_chapter(converted_content: list, chapter:int) -> list:
     '''Extract the rows of specified chapter from usfm-grammar's list output'''
     output_content = [converted_content[0]]
@@ -60,20 +56,15 @@ def extract_usx_chapter(converted_content, chapter:int):
     return output_content
 
 def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON,
-    content_filter=usfm_grammar.Filter.PARAGRAPHS,  # Updated filter name
+    content_filter=usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS,
     chapter=None):
     '''Tries to parse the input usfm and provide the output as per the filter and format'''
     usfm_parser = usfm_grammar.USFMParser(input_usfm)
     match output_format:
         case usfm_grammar.Format.JSON:
-            if content_filter == usfm_grammar.Filter.PARAGRAPHS:
-                output_content = usfm_parser.to_usj()
-                if chapter is not None:
-                    output_content = extract_usj_chapter(output_content, chapter)
-            elif content_filter == usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS:
-                output_content = usfm_parser.to_usj()
-                if chapter is not None:
-                    output_content = extract_usj_chapter(output_content, chapter)
+            output_content = usfm_parser.to_dict(content_filter)
+            if chapter is not None:
+                output_content = extract_dict_chapter(output_content, chapter)
         case usfm_grammar.Format.CSV:
             output_content = usfm_parser.to_list(content_filter)
             if chapter is not None:
@@ -82,10 +73,11 @@ def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON,
         case usfm_grammar.Format.ST:
             output_content = usfm_parser.to_syntax_tree()
             if chapter is not None:
-                log.warning("Not implemented chapter extractor for syntax_tree")
+                log.warning("Not implemented chapter extracter for syntax_tree")
         case usfm_grammar.Format.USX:
             output_content = usfm_parser.to_usx(content_filter)
             if chapter is not None:
                 output_content = extract_usx_chapter(output_content, chapter)
             output_content = etree.tostring(output_content, encoding='unicode', pretty_print=True) #pylint: disable=I1101
     return output_content
+
diff --git a/app/test/test_file_ops.py b/app/test/test_file_ops.py
@@ -1,3 +1,4 @@
+''' tests for file manipulation APIs'''
 
 from . import client
 from . import assert_input_validation_error, assert_not_available_content
@@ -20,60 +21,72 @@ def test_usfm_to_json():
     '''positive test to convert usfm to dict format'''
     for usfm_input in gospel_books_data:
         resp = client.put(f"{UNIT_URL}usfm/to/json", json=usfm_input)
-
-        assert "type" in resp.json()
-        assert "version" in resp.json()
-        assert "content" in resp.json()
-        content = resp.json()["content"]
-        assert isinstance(content, list)
-        assert len(content) > 0
+        assert "book" in resp.json()
+        assert "chapters" in resp.json()['book']
 
     for usfm_input in gospel_books_data:
-        resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=paragraph",
+        resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-bcv",
             json=usfm_input)
         output = resp.json()
-        assert "type" in output
-        assert "content" in output
-
-        # Iterate through the content to find chapters and verses
-        found_chapter = False
+        assert "book" in output
+        assert "chapters" in output['book']
+        assert output['book']['chapters'][0]['chapterNumber']
         found_verse = False
-        for content_item in output["content"]:
-            if content_item.get("type") == "chapter:c":
-                found_chapter = True
-                assert "number" in content_item 
-            elif content_item.get("type") == "verse:v":
+        for content in output['book']['chapters'][0]['contents']:
+            if "verseNumber" in content and "verseText" in content:
                 found_verse = True
-                assert "number" in content_item 
-                assert "sid" in content_item  
-
+                break
+        assert found_verse
+
+    for usfm_input in gospel_books_data:
+        resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-paragraph",
+            json=usfm_input)
+        output = resp.json()
+        assert "book" in output
+        assert "chapters" in output['book']
+        assert output['book']['chapters'][0]['chapterNumber']
+        found_para = False
+        found_verse = False
+        for content in output['book']['chapters'][0]['contents']:
+            if "paragraph" in content:
+                found_para = True
+                for item in content['paragraph']:
+                    if "verseNumber" in item:
+                        found_verse = True
+                        break
+        assert found_para
+        assert found_verse
+
     # chapter filter
-    resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", json=gospel_books_data[0])
+    resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10",
+        json=gospel_books_data[0])
     output = resp.json()
     assert "book" in output
+    assert "chapters" in output['book']
+    assert len(output['book']['chapters']) == 0
 
-    # Check if 'book' has 'chapters' key and 'chapters' is an empty list for this particular chapter filter
-    if 'chapters' in output['book']:
-        assert len(output['book']['chapters']) == 0
-    else:
-
-        assert "book" in output
-        assert "chapters" not in output['book']
-
+    resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=2",
+        json=gospel_books_data[0])
+    output = resp.json()
+    assert "book" in output
+    assert "chapters" in output['book']
+    assert len(output['book']['chapters']) == 1
+    assert int(output['book']['chapters'][0]['chapterNumber']) == 2
 
 
 def test_usfm_to_table():
     '''positive test to convert usfm to dict format'''
     for usfm_input in gospel_books_data:
         resp = client.put(f"{UNIT_URL}usfm/to/table", json=usfm_input)
-
         assert "Book\tChapter" in resp.json()
     for usfm_input in gospel_books_data:
-        resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=paragraph",
+        resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-paragraph",
             json=usfm_input)
-        print("RESP.JSON",resp.json())
-        assert "Book\tChapter\tVerse\tText\tType" in resp.json()
-
+        assert "Book\tChapter\tType\tContent" in resp.json()
+    for usfm_input in gospel_books_data:
+        resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-bcv",
+            json=usfm_input)
+        assert "Book\tChapter\tVerse\tText" in resp.json()
 
     # chapter filter
     resp = client.put(f"{UNIT_URL}usfm/to/table?chapter=2",
@@ -89,4 +102,4 @@ def test_usfm_to_usx():
         print(resp.json())
         assert resp.json().startswith("<usx")
         assert resp.json().strip().endswith("</usx>")
-
+        
diff --git a/requirements.txt b/requirements.txt
@@ -18,5 +18,5 @@ beautifulsoup4==4.11.1
 starlette==0.27.0
 pylint==2.16.1
 jsonpickle==2.2.0
-usfm-grammar==3.0.0b2
+usfm-grammar==3.0.0a4
 pytz==2023.3