From 323a2c4b0a724e533732cb3c904e17ab51344e5d Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Thu, 5 Oct 2023 09:01:38 +0530 Subject: [PATCH 1/8] Upgrading USFM-GRAMMAR --- app/crud/files_crud.py | 41 ++++++++++------ app/routers/filehandling_apis.py | 41 +++++++++++----- app/test/test_file_ops.py | 84 +++++++++++++------------------- 3 files changed, 90 insertions(+), 76 deletions(-) diff --git a/app/crud/files_crud.py b/app/crud/files_crud.py index e466d2446..365893d5f 100644 --- a/app/crud/files_crud.py +++ b/app/crud/files_crud.py @@ -5,20 +5,24 @@ from dependencies import log -def extract_dict_chapter(converted_content:dict, chapter:int) -> dict: +def extract_usj_chapter(converted_content: dict, chapter: int) -> dict: '''Extracts just one chapter from the dict or JSON of usfm grammar''' - output_content = {"book":{}} - for item in converted_content['book']: - if item != 'chapters': - output_content['book'][item] = converted_content['book'][item] - else: - output_content['book']['chapters'] = [] - for chapter_dict in converted_content['book']['chapters']: - if int(chapter_dict['chapterNumber']) == chapter: - output_content['book']['chapters'].append(chapter_dict) - break + output_content = {"book": {}} + + if 'book' in converted_content: + for item in converted_content['book']: + if item != 'chapters': + output_content['book'][item] = converted_content['book'][item] + else: + output_content['book']['chapters'] = [] + for chapter_dict in converted_content['book']['chapters']: + if int(chapter_dict.get('chapterNumber', 0)) == chapter: + output_content['book']['chapters'].append(chapter_dict) + break + return output_content + def extract_list_chapter(converted_content: list, chapter:int) -> list: '''Extract the rows of specified chapter from usfm-grammar's list output''' output_content = [converted_content[0]] @@ -56,15 +60,20 @@ def extract_usx_chapter(converted_content, chapter:int): return output_content def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, - content_filter=usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS, + content_filter=usfm_grammar.Filter.PARAGRAPHS, # Updated filter name chapter=None): '''Tries to parse the input usfm and provide the output as per the filter and format''' usfm_parser = usfm_grammar.USFMParser(input_usfm) match output_format: case usfm_grammar.Format.JSON: - output_content = usfm_parser.to_dict(content_filter) - if chapter is not None: - output_content = extract_dict_chapter(output_content, chapter) + if content_filter == usfm_grammar.Filter.PARAGRAPHS: + output_content = usfm_parser.to_usj() + if chapter is not None: + output_content = extract_usj_chapter(output_content, chapter) + elif content_filter == usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS: + output_content = usfm_parser.to_usj(scripture=True) + if chapter is not None: + output_content = extract_usj_chapter(output_content, chapter) case usfm_grammar.Format.CSV: output_content = usfm_parser.to_list(content_filter) if chapter is not None: @@ -73,7 +82,7 @@ def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, case usfm_grammar.Format.ST: output_content = usfm_parser.to_syntax_tree() if chapter is not None: - log.warning("Not implemented chapter extracter for syntax_tree") + log.warning("Not implemented chapter extractor for syntax_tree") case usfm_grammar.Format.USX: output_content = usfm_parser.to_usx(content_filter) if chapter is not None: diff --git a/app/routers/filehandling_apis.py b/app/routers/filehandling_apis.py index cf1bd1c24..4fb057439 100644 --- a/app/routers/filehandling_apis.py +++ b/app/routers/filehandling_apis.py @@ -24,34 +24,46 @@ async def usfm_parse_resource_bible(request: Request, resource_name: schemas.TableNamePattern = Path(..., examples="hi_IRV_1_bible"), book_code: schemas.BookCodePattern=Path(..., examples="mat"), output_format: usfm_grammar.Format = Path(..., examples="usx"), - content_filter: usfm_grammar.Filter = Query(usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS), + content_filter_str: str = Query("PARAGRAPHS"), # Use a string parameter chapter: int=Query(None, examples=1), - # verse: int=Query(None, examples=1), last_verse: int=Query(None, examples=15), active: bool=True, - # skip: int=Query(0, ge=0), limit: int=Query(100, ge=0), user_details = Depends(get_user_or_none), db_: Session = Depends(get_db)): '''Selects a bible from servers and converts it to required format using usfm-grammar''' log.info("In usfm_parse_resource_bible router function") log.debug('resource_name: %s, format: %s, filter: %s', resource_name, - output_format,content_filter) + output_format, content_filter_str) + + # Set a default value for content_filter + content_filter = None + + if content_filter_str: + # Convert the string to the enum within the function + content_filter = usfm_grammar.Filter[content_filter_str] + src_response = await content_apis.get_available_bible_book( request=request, resource_name=resource_name, book_code=book_code, content_type=schema_content.BookContentType.USFM, active=active, - skip=0,limit=100, + skip=0, limit=100, user_details=user_details, db_=db_ - ) + ) if "error" in src_response: raise GenericException(src_response['error']) if len(src_response) == 0: raise NotAvailableException(f"Book, {book_code}, is not available in {resource_name}") input_usfm = src_response[0]['USFM'] log.debug("Obtained usfm from resource bible, %s", input_usfm[:50]) - return files_crud.parse_with_usfm_grammar(input_usfm, output_format, content_filter, chapter) + + # Parse the USFM content and convert it to JSON + usfm_parser = usfm_grammar.USFMParser(input_usfm) + output_content = usfm_parser.to_usj(content_filter) + + return output_content + @router.put('/v2/files/usfm/to/{output_format}', responses={422: {"model": schemas.ErrorResponse}, 500: {"model": schemas.ErrorResponse}}, @@ -59,13 +71,20 @@ async def usfm_parse_resource_bible(request: Request, @get_auth_access_check_decorator async def parse_uploaded_usfm(request:Request, output_format: usfm_grammar.Format = Path(..., examples="usx"), - content_filter: usfm_grammar.Filter = Query(usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS), + content_filter_str: str = Query("PARAGRAPHS"), input_usfm: schema_content.UploadedUsfm = Body(...), chapter: int=Query(None, examples=1), - # verse: int=Query(None, examples=1), last_verse: int=Query(None, examples=15), user_details=Depends(get_user_or_none)): '''Allows to upload a USFM file to be converted to another format. uses usfm-grammar''' log.info("In parse_uploaded_usfm router function") - log.debug("output_format: %s, content_filter: %s", output_format, content_filter) + log.debug("output_format: %s, content_filter: %s", output_format, content_filter_str) + + # Set a default value for content_filter + content_filter = None + + if content_filter_str: + # Convert the string to the enum within the function + content_filter = usfm_grammar.Filter[content_filter_str] + return files_crud.parse_with_usfm_grammar( - input_usfm.USFM, output_format, content_filter, chapter) + input_usfm.USFM, output_format, content_filter, chapter) \ No newline at end of file diff --git a/app/test/test_file_ops.py b/app/test/test_file_ops.py index 483cd0448..3f918ee4d 100644 --- a/app/test/test_file_ops.py +++ b/app/test/test_file_ops.py @@ -1,4 +1,3 @@ -''' tests for file manipulation APIs''' from . import client from . import assert_input_validation_error, assert_not_available_content @@ -21,72 +20,60 @@ def test_usfm_to_json(): '''positive test to convert usfm to dict format''' for usfm_input in gospel_books_data: resp = client.put(f"{UNIT_URL}usfm/to/json", json=usfm_input) - assert "book" in resp.json() - assert "chapters" in resp.json()['book'] + + assert "type" in resp.json() + assert "version" in resp.json() + assert "content" in resp.json() + content = resp.json()["content"] + assert isinstance(content, list) + assert len(content) > 0 for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-bcv", + resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=paragraph", json=usfm_input) output = resp.json() - assert "book" in output - assert "chapters" in output['book'] - assert output['book']['chapters'][0]['chapterNumber'] - found_verse = False - for content in output['book']['chapters'][0]['contents']: - if "verseNumber" in content and "verseText" in content: - found_verse = True - break - assert found_verse + assert "type" in output + assert "content" in output - for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-paragraph", - json=usfm_input) - output = resp.json() - assert "book" in output - assert "chapters" in output['book'] - assert output['book']['chapters'][0]['chapterNumber'] - found_para = False + # Iterate through the content to find chapters and verses + found_chapter = False found_verse = False - for content in output['book']['chapters'][0]['contents']: - if "paragraph" in content: - found_para = True - for item in content['paragraph']: - if "verseNumber" in item: - found_verse = True - break - assert found_para - assert found_verse - + for content_item in output["content"]: + if content_item.get("type") == "chapter:c": + found_chapter = True + assert "number" in content_item + elif content_item.get("type") == "verse:v": + found_verse = True + assert "number" in content_item + assert "sid" in content_item + # chapter filter - resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", - json=gospel_books_data[0]) + resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", json=gospel_books_data[0]) output = resp.json() assert "book" in output - assert "chapters" in output['book'] - assert len(output['book']['chapters']) == 0 - resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=2", - json=gospel_books_data[0]) - output = resp.json() - assert "book" in output - assert "chapters" in output['book'] - assert len(output['book']['chapters']) == 1 - assert int(output['book']['chapters'][0]['chapterNumber']) == 2 + # Check if 'book' has 'chapters' key and 'chapters' is an empty list for this particular chapter filter + if 'chapters' in output['book']: + assert len(output['book']['chapters']) == 0 + else: + + assert "book" in output + assert "chapters" not in output['book'] + def test_usfm_to_table(): '''positive test to convert usfm to dict format''' for usfm_input in gospel_books_data: resp = client.put(f"{UNIT_URL}usfm/to/table", json=usfm_input) + assert "Book\tChapter" in resp.json() for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-paragraph", + resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=paragraph", json=usfm_input) - assert "Book\tChapter\tType\tContent" in resp.json() - for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-bcv", - json=usfm_input) - assert "Book\tChapter\tVerse\tText" in resp.json() + print("RESP.JSON",resp.json()) + assert "Book\tChapter\tVerse\tText\tType" in resp.json() + # chapter filter resp = client.put(f"{UNIT_URL}usfm/to/table?chapter=2", @@ -103,4 +90,3 @@ def test_usfm_to_usx(): assert resp.json().startswith("") - From 2167cdfb60902f5980061a90884aa01a2bbff6dd Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Thu, 5 Oct 2023 09:08:05 +0530 Subject: [PATCH 2/8] Upgrading USFM-GRAMMAR --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3829a51c2..5268fa608 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,5 @@ beautifulsoup4==4.11.1 starlette==0.27.0 pylint==2.16.1 jsonpickle==2.2.0 -usfm-grammar==3.0.0a4 +usfm-grammar==3.0.0b2 pytz==2023.3 \ No newline at end of file From ff83280302f231b29f4e6f56746eb266371ca247 Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Mon, 9 Oct 2023 22:33:15 +0530 Subject: [PATCH 3/8] checking runners --- app/crud/files_crud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/crud/files_crud.py b/app/crud/files_crud.py index 365893d5f..4d623cbc5 100644 --- a/app/crud/files_crud.py +++ b/app/crud/files_crud.py @@ -71,7 +71,7 @@ def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, if chapter is not None: output_content = extract_usj_chapter(output_content, chapter) elif content_filter == usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS: - output_content = usfm_parser.to_usj(scripture=True) + output_content = usfm_parser.to_usj() if chapter is not None: output_content = extract_usj_chapter(output_content, chapter) case usfm_grammar.Format.CSV: From a62f5646a955ddade0bf3d0bc3e1d3c521a78ac5 Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Fri, 13 Oct 2023 12:46:17 +0530 Subject: [PATCH 4/8] Integrating graphql to vachan-engine --- docker/docker-compose.yml | 27 +++++++++++++++++++++++++++ docker/nginx/default.conf | 10 ++++++++++ docker/nginx/prod/app.conf.template | 4 ++++ 3 files changed, 41 insertions(+) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 8ef411362..899ee8073 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -208,6 +208,33 @@ services: networks: - VE-network + vachan-cms-graphql: + image: athulyams/graphql-12:v2.0.0-alpha.1 + ports: + - "8004:8004" + expose: + - 8004 + command: uvicorn main:app --host 0.0.0.0 --port 8004 + restart: always + environment: + - VACHAN_POSTGRES_HOST=vachan-db + - VACHAN_POSTGRES_USER=${VACHAN_POSTGRES_USER:-postgres} + - VACHAN_POSTGRES_PASSWORD=${VACHAN_POSTGRES_PASSWORD:-password} + - VACHAN_POSTGRES_DATABASE=${VACHAN_POSTGRES_DATABASE:-vachan_dev} + - VACHAN_POSTGRES_PORT=5432 + - VACHAN_DOMAIN=${VACHAN_DOMAIN:-api.vachanengine.org} + - VACHAN_LOGGING_LEVEL=INFO + # volumes: + # - logs-vol:/app/logs + depends_on: + - vachan-api + - vachan-db + profiles: + - local-run + - deployment + networks: + - VE-network + # Web Server web-server-local: image: nginx:latest diff --git a/docker/nginx/default.conf b/docker/nginx/default.conf index b65f9e675..834dd6cf9 100644 --- a/docker/nginx/default.conf +++ b/docker/nginx/default.conf @@ -10,6 +10,16 @@ server { try_files $uri $uri/ =404; } + + location /graphql/ { + # Important, make sure you always remove the trailing slash + proxy_pass http://vachan-cms-graphql:8004; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-Proto $scheme; + } + location /v2/demos/ { # Important, make sure you always remove the trailing slash proxy_pass http://vachan-demos:8002; diff --git a/docker/nginx/prod/app.conf.template b/docker/nginx/prod/app.conf.template index 4a6010c67..657db4e13 100644 --- a/docker/nginx/prod/app.conf.template +++ b/docker/nginx/prod/app.conf.template @@ -36,6 +36,10 @@ server { ssl_certificate /etc/nginx/ssl/live/${VACHAN_DOMAIN}/fullchain.pem; ssl_certificate_key /etc/nginx/ssl/live/${VACHAN_DOMAIN}/privkey.pem; + + location /graphql/ { + proxy_pass http://vachan-cms-graphql:8004; + } location /v2/demos/ { proxy_pass http://vachan-demos:8002; } From 393bffe49ca9d830c230bf54a0fa0789f45ab5f2 Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Fri, 13 Oct 2023 13:20:21 +0530 Subject: [PATCH 5/8] Integrating Graphql to vachan-engine --- docker/docker-compose.yml | 2 +- docker/nginx/default.conf | 3 +-- docker/nginx/prod/app.conf.template | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 83c3fcffc..02b74e45d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -47,7 +47,7 @@ services: image: oryd/kratos:v1.0.0 ports: - '4433:4433' # public - # - '4434:4434' # admin + # -'4434:4434' # admin restart: unless-stopped environment: - DSN=${VACHAN_AUTH_DATABASE:-postgres://kratos:secret@kratos-postgresd:5432/kratos?sslmode=disable&max_conns=20&max_idle_conns=4} diff --git a/docker/nginx/default.conf b/docker/nginx/default.conf index 834dd6cf9..462ba7d2c 100644 --- a/docker/nginx/default.conf +++ b/docker/nginx/default.conf @@ -10,14 +10,13 @@ server { try_files $uri $uri/ =404; } - location /graphql/ { # Important, make sure you always remove the trailing slash proxy_pass http://vachan-cms-graphql:8004; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header Host $http_host; - proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Proto $scheme; } location /v2/demos/ { diff --git a/docker/nginx/prod/app.conf.template b/docker/nginx/prod/app.conf.template index 657db4e13..d4f47161f 100644 --- a/docker/nginx/prod/app.conf.template +++ b/docker/nginx/prod/app.conf.template @@ -40,6 +40,7 @@ server { location /graphql/ { proxy_pass http://vachan-cms-graphql:8004; } + location /v2/demos/ { proxy_pass http://vachan-demos:8002; } From 520e6490ff6b6c101d1f3ead1c62fcb8688e31fb Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Fri, 13 Oct 2023 13:43:07 +0530 Subject: [PATCH 6/8] solving linting errors --- app/routers/filehandling_apis.py | 39 ++++++++------------------------ 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/app/routers/filehandling_apis.py b/app/routers/filehandling_apis.py index 4fb057439..c78469938 100644 --- a/app/routers/filehandling_apis.py +++ b/app/routers/filehandling_apis.py @@ -24,46 +24,34 @@ async def usfm_parse_resource_bible(request: Request, resource_name: schemas.TableNamePattern = Path(..., examples="hi_IRV_1_bible"), book_code: schemas.BookCodePattern=Path(..., examples="mat"), output_format: usfm_grammar.Format = Path(..., examples="usx"), - content_filter_str: str = Query("PARAGRAPHS"), # Use a string parameter + content_filter: usfm_grammar.Filter = Query(usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS), chapter: int=Query(None, examples=1), + # verse: int=Query(None, examples=1), last_verse: int=Query(None, examples=15), active: bool=True, + # skip: int=Query(0, ge=0), limit: int=Query(100, ge=0), user_details = Depends(get_user_or_none), db_: Session = Depends(get_db)): '''Selects a bible from servers and converts it to required format using usfm-grammar''' log.info("In usfm_parse_resource_bible router function") log.debug('resource_name: %s, format: %s, filter: %s', resource_name, - output_format, content_filter_str) - - # Set a default value for content_filter - content_filter = None - - if content_filter_str: - # Convert the string to the enum within the function - content_filter = usfm_grammar.Filter[content_filter_str] - + output_format,content_filter) src_response = await content_apis.get_available_bible_book( request=request, resource_name=resource_name, book_code=book_code, content_type=schema_content.BookContentType.USFM, active=active, - skip=0, limit=100, + skip=0,limit=100, user_details=user_details, db_=db_ - ) + ) if "error" in src_response: raise GenericException(src_response['error']) if len(src_response) == 0: raise NotAvailableException(f"Book, {book_code}, is not available in {resource_name}") input_usfm = src_response[0]['USFM'] log.debug("Obtained usfm from resource bible, %s", input_usfm[:50]) - - # Parse the USFM content and convert it to JSON - usfm_parser = usfm_grammar.USFMParser(input_usfm) - output_content = usfm_parser.to_usj(content_filter) - - return output_content - + return files_crud.parse_with_usfm_grammar(input_usfm, output_format, content_filter, chapter) @router.put('/v2/files/usfm/to/{output_format}', responses={422: {"model": schemas.ErrorResponse}, 500: {"model": schemas.ErrorResponse}}, @@ -71,20 +59,13 @@ async def usfm_parse_resource_bible(request: Request, @get_auth_access_check_decorator async def parse_uploaded_usfm(request:Request, output_format: usfm_grammar.Format = Path(..., examples="usx"), - content_filter_str: str = Query("PARAGRAPHS"), + content_filter: usfm_grammar.Filter = Query(usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS), input_usfm: schema_content.UploadedUsfm = Body(...), chapter: int=Query(None, examples=1), + # verse: int=Query(None, examples=1), last_verse: int=Query(None, examples=15), user_details=Depends(get_user_or_none)): '''Allows to upload a USFM file to be converted to another format. uses usfm-grammar''' log.info("In parse_uploaded_usfm router function") - log.debug("output_format: %s, content_filter: %s", output_format, content_filter_str) - - # Set a default value for content_filter - content_filter = None - - if content_filter_str: - # Convert the string to the enum within the function - content_filter = usfm_grammar.Filter[content_filter_str] - + log.debug("output_format: %s, content_filter: %s", output_format, content_filter) return files_crud.parse_with_usfm_grammar( input_usfm.USFM, output_format, content_filter, chapter) \ No newline at end of file From 3e213ca0089d7467908f74b992a7ae1f09c6de4f Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Fri, 13 Oct 2023 13:57:01 +0530 Subject: [PATCH 7/8] solving pipeline errors --- app/crud/files_crud.py | 42 ++++++++----------- app/test/test_file_ops.py | 85 ++++++++++++++++++++++----------------- requirements.txt | 2 +- 3 files changed, 67 insertions(+), 62 deletions(-) diff --git a/app/crud/files_crud.py b/app/crud/files_crud.py index 4d623cbc5..3106a4b47 100644 --- a/app/crud/files_crud.py +++ b/app/crud/files_crud.py @@ -5,24 +5,20 @@ from dependencies import log -def extract_usj_chapter(converted_content: dict, chapter: int) -> dict: +def extract_dict_chapter(converted_content:dict, chapter:int) -> dict: '''Extracts just one chapter from the dict or JSON of usfm grammar''' - output_content = {"book": {}} - - if 'book' in converted_content: - for item in converted_content['book']: - if item != 'chapters': - output_content['book'][item] = converted_content['book'][item] - else: - output_content['book']['chapters'] = [] - for chapter_dict in converted_content['book']['chapters']: - if int(chapter_dict.get('chapterNumber', 0)) == chapter: - output_content['book']['chapters'].append(chapter_dict) - break - + output_content = {"book":{}} + for item in converted_content['book']: + if item != 'chapters': + output_content['book'][item] = converted_content['book'][item] + else: + output_content['book']['chapters'] = [] + for chapter_dict in converted_content['book']['chapters']: + if int(chapter_dict['chapterNumber']) == chapter: + output_content['book']['chapters'].append(chapter_dict) + break return output_content - def extract_list_chapter(converted_content: list, chapter:int) -> list: '''Extract the rows of specified chapter from usfm-grammar's list output''' output_content = [converted_content[0]] @@ -60,20 +56,15 @@ def extract_usx_chapter(converted_content, chapter:int): return output_content def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, - content_filter=usfm_grammar.Filter.PARAGRAPHS, # Updated filter name + content_filter=usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS, chapter=None): '''Tries to parse the input usfm and provide the output as per the filter and format''' usfm_parser = usfm_grammar.USFMParser(input_usfm) match output_format: case usfm_grammar.Format.JSON: - if content_filter == usfm_grammar.Filter.PARAGRAPHS: - output_content = usfm_parser.to_usj() - if chapter is not None: - output_content = extract_usj_chapter(output_content, chapter) - elif content_filter == usfm_grammar.Filter.SCRIPTURE_PARAGRAPHS: - output_content = usfm_parser.to_usj() - if chapter is not None: - output_content = extract_usj_chapter(output_content, chapter) + output_content = usfm_parser.to_dict(content_filter) + if chapter is not None: + output_content = extract_dict_chapter(output_content, chapter) case usfm_grammar.Format.CSV: output_content = usfm_parser.to_list(content_filter) if chapter is not None: @@ -82,10 +73,11 @@ def parse_with_usfm_grammar(input_usfm, output_format=usfm_grammar.Format.JSON, case usfm_grammar.Format.ST: output_content = usfm_parser.to_syntax_tree() if chapter is not None: - log.warning("Not implemented chapter extractor for syntax_tree") + log.warning("Not implemented chapter extracter for syntax_tree") case usfm_grammar.Format.USX: output_content = usfm_parser.to_usx(content_filter) if chapter is not None: output_content = extract_usx_chapter(output_content, chapter) output_content = etree.tostring(output_content, encoding='unicode', pretty_print=True) #pylint: disable=I1101 return output_content + \ No newline at end of file diff --git a/app/test/test_file_ops.py b/app/test/test_file_ops.py index 3f918ee4d..6c8e1ab66 100644 --- a/app/test/test_file_ops.py +++ b/app/test/test_file_ops.py @@ -1,3 +1,4 @@ +''' tests for file manipulation APIs''' from . import client from . import assert_input_validation_error, assert_not_available_content @@ -20,60 +21,72 @@ def test_usfm_to_json(): '''positive test to convert usfm to dict format''' for usfm_input in gospel_books_data: resp = client.put(f"{UNIT_URL}usfm/to/json", json=usfm_input) - - assert "type" in resp.json() - assert "version" in resp.json() - assert "content" in resp.json() - content = resp.json()["content"] - assert isinstance(content, list) - assert len(content) > 0 + assert "book" in resp.json() + assert "chapters" in resp.json()['book'] for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=paragraph", + resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-bcv", json=usfm_input) output = resp.json() - assert "type" in output - assert "content" in output - - # Iterate through the content to find chapters and verses - found_chapter = False + assert "book" in output + assert "chapters" in output['book'] + assert output['book']['chapters'][0]['chapterNumber'] found_verse = False - for content_item in output["content"]: - if content_item.get("type") == "chapter:c": - found_chapter = True - assert "number" in content_item - elif content_item.get("type") == "verse:v": + for content in output['book']['chapters'][0]['contents']: + if "verseNumber" in content and "verseText" in content: found_verse = True - assert "number" in content_item - assert "sid" in content_item - + break + assert found_verse + + for usfm_input in gospel_books_data: + resp = client.put(f"{UNIT_URL}usfm/to/json?content_filter=scripture-paragraph", + json=usfm_input) + output = resp.json() + assert "book" in output + assert "chapters" in output['book'] + assert output['book']['chapters'][0]['chapterNumber'] + found_para = False + found_verse = False + for content in output['book']['chapters'][0]['contents']: + if "paragraph" in content: + found_para = True + for item in content['paragraph']: + if "verseNumber" in item: + found_verse = True + break + assert found_para + assert found_verse + # chapter filter - resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", json=gospel_books_data[0]) + resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=10", + json=gospel_books_data[0]) output = resp.json() assert "book" in output + assert "chapters" in output['book'] + assert len(output['book']['chapters']) == 0 - # Check if 'book' has 'chapters' key and 'chapters' is an empty list for this particular chapter filter - if 'chapters' in output['book']: - assert len(output['book']['chapters']) == 0 - else: - - assert "book" in output - assert "chapters" not in output['book'] - + resp = client.put(f"{UNIT_URL}usfm/to/json?chapter=2", + json=gospel_books_data[0]) + output = resp.json() + assert "book" in output + assert "chapters" in output['book'] + assert len(output['book']['chapters']) == 1 + assert int(output['book']['chapters'][0]['chapterNumber']) == 2 def test_usfm_to_table(): '''positive test to convert usfm to dict format''' for usfm_input in gospel_books_data: resp = client.put(f"{UNIT_URL}usfm/to/table", json=usfm_input) - assert "Book\tChapter" in resp.json() for usfm_input in gospel_books_data: - resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=paragraph", + resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-paragraph", json=usfm_input) - print("RESP.JSON",resp.json()) - assert "Book\tChapter\tVerse\tText\tType" in resp.json() - + assert "Book\tChapter\tType\tContent" in resp.json() + for usfm_input in gospel_books_data: + resp = client.put(f"{UNIT_URL}usfm/to/table?content_filter=scripture-bcv", + json=usfm_input) + assert "Book\tChapter\tVerse\tText" in resp.json() # chapter filter resp = client.put(f"{UNIT_URL}usfm/to/table?chapter=2", @@ -89,4 +102,4 @@ def test_usfm_to_usx(): print(resp.json()) assert resp.json().startswith("") - + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5268fa608..3829a51c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,5 @@ beautifulsoup4==4.11.1 starlette==0.27.0 pylint==2.16.1 jsonpickle==2.2.0 -usfm-grammar==3.0.0b2 +usfm-grammar==3.0.0a4 pytz==2023.3 \ No newline at end of file From 54e548feb4deb7ae0d3d49620a140d57fa18f308 Mon Sep 17 00:00:00 2001 From: AthulyaMS Date: Fri, 13 Oct 2023 14:02:50 +0530 Subject: [PATCH 8/8] solving errors --- app/routers/filehandling_apis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/routers/filehandling_apis.py b/app/routers/filehandling_apis.py index c78469938..deed4d303 100644 --- a/app/routers/filehandling_apis.py +++ b/app/routers/filehandling_apis.py @@ -68,4 +68,5 @@ async def parse_uploaded_usfm(request:Request, log.info("In parse_uploaded_usfm router function") log.debug("output_format: %s, content_filter: %s", output_format, content_filter) return files_crud.parse_with_usfm_grammar( - input_usfm.USFM, output_format, content_filter, chapter) \ No newline at end of file + input_usfm.USFM, output_format, content_filter, chapter) + \ No newline at end of file