diff --git a/.github/workflows/deploy_push.yml b/.github/workflows/deploy_push.yml index 45db91a3..b1f99ad9 100644 --- a/.github/workflows/deploy_push.yml +++ b/.github/workflows/deploy_push.yml @@ -139,7 +139,7 @@ jobs: if: github.ref_name != 'master' run: | cd infra - aws s3 sync ../html s3://mtchoun-mouh-$GITHUB_REF_NAME-mtchoun-mouh.mongulu.cm --exclude "index.html" --delete + aws s3 sync ../html s3://mtchoun-mouh-$GITHUB_REF_NAME-mtchoun-mouh.mongulu.cm --exclude "index.html" --exclude "demo.html" --delete - name: "website: check readiness" uses: APina03/wait_for_response@master with: diff --git a/README.md b/README.md index 28989035..e4c8dcfb 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ If you want to see what it looks like, go to: https://mtchoun-mouh.mongulu.cm/ ### Current behavior -When a user goes to the Cameroonian Consulate in Marseille to apply for a passport, he is reminded that in order to know if his passport has been issued, he must regularly look at a page on the [consulate] website (https://www.consulacam-marseille.fr/index.php?p=consulat-cameroun-passeports). This page contains all the passport arrival notices in chronological order, beginning with the mention **Passport arrival notice of the dd month year** . +When a user goes to the Cameroonian Consulate in Marseille to apply for a passport, he is reminded that in order to know if his passport has been issued, he must regularly look at a page on the [consulate website](https://www.consulacam-marseille.fr/index.php?p=consulat-cameroun-passeports). This page contains all the passport arrival notices in chronological order, beginning with the mention **Passport arrival notice of the dd month year** . You have to scroll through this page from the beginning to a release date later than the filing date. The reason is that these are images and not published text; it is therefore impossible to use the browser's search function. ### Problem diff --git a/coverage.svg b/coverage.svg index b4a82e60..f9eb6b47 100644 --- a/coverage.svg +++ b/coverage.svg @@ -15,7 +15,7 @@ coverage coverage - 56% - 56% + 55% + 55% diff --git a/html/error.html b/html/error.html index 6d39041c..35e5bf72 100644 --- a/html/error.html +++ b/html/error.html @@ -5,6 +5,5 @@ Error - diff --git a/infra/api/extract.py b/infra/api/extract.py index 57f9a2cc..03c95f01 100644 --- a/infra/api/extract.py +++ b/infra/api/extract.py @@ -4,6 +4,7 @@ import os import zulip from textractor import Textractor +from PIL import UnidentifiedImageError def Images_in_Bucket(Bucket_Name): """Gets a list of all image names in an S3 bucket. @@ -98,23 +99,30 @@ def Extract_Users(s3BucketName, ImageName): # sourcery no-metrics list: A list of extracted user information dicts. """ region = os.environ["REGION"] - extractor = Textractor(region_name=os.environ["REGION"]) - document = extractor.detect_document_text(file_source=f"s3://{s3BucketName}/{ImageName}") - filtered_lines = [] + extractor = Textractor(region_name=region) errors_tab = [] + filtered_lines = [] + + try: + # Try to detect document text, catch UnidentifiedImageError if occurs + document = extractor.detect_document_text(file_source=f"s3://{s3BucketName}/{ImageName}") + except UnidentifiedImageError as e: + print(f"UnidentifiedImageError: {str(e)} - Image: {ImageName}") + errors_tab.append({"error": str(e), "image": ImageName}) + return errors_tab + for line in document.lines: # Vérifie si aucun mot de stop_words n'est présent dans la ligne if not any(stop_word in str(line) for stop_word in stopWords): filtered_lines.append(str(line)) + for line in filtered_lines: - try: UserName = line.split(". ")[1:] if "." in line else line if isinstance(UserName, list): UserName = ". ".join(UserName) if UserName != "": - # We choosed to save all the names in lower former instead of upper because of the DU stopWord - # Indeed if upper names , all persons DU like DURAND in their names will not be detected. + # We choose to save all names in lower case to avoid issues with stop words like 'DU' print(f"Username={UserName.lower()}") insert_dynamodb(UserName.lower(), ImageName) except Exception as e: @@ -140,7 +148,6 @@ def extract_names_from_images(): bucket_name = os.environ["BUCKET_NAME"] Image_List = Images_in_Bucket(bucket_name) - # Image_List = ["communique-071218-A.jpg"] for image in Image_List: print(f"-------> Image name: {image}") # TODO : recuperer le tableau d'erreurs et envoyer le mail @@ -157,9 +164,7 @@ def extract_names_from_images(): result = zulip_client.send_message(request) print(errors_tab) - Delete_Image( - bucket_name, image - ) # so that if it executed 2 times extracted images will not be there + Delete_Image(bucket_name, image) Empty_Bucket(bucket_name) diff --git a/infra/api/lambda.py b/infra/api/lambda.py index 52f94d78..59117afb 100644 --- a/infra/api/lambda.py +++ b/infra/api/lambda.py @@ -52,6 +52,26 @@ def register_handler(event, context): } ), } + else: + + os.environ['LINKS_TABLE'] = "Links" + Scan_reponse = Scan_Users(name.lower(), "Users") + image_url = Scan_reponse[0]["URLImage"] + print(image_url) + + return { + "statusCode": 200, + "headers": { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Headers": "Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token", + "Access-Control-Allow-Methods": "GET,OPTIONS,POST,PUT", + }, + "body": json.dumps( + { + "message": f" {image_url}", + } + ), + } except Exception as e: capture_exception(e) raise diff --git a/infra/main.tf b/infra/main.tf index 2e309515..d9499144 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -1,5 +1,22 @@ locals { requirements_path = "api/requirements.txt" + url = join("/", [aws_api_gateway_deployment.test.invoke_url, aws_api_gateway_resource.resource.path_part]) + + demo_page = templatefile("templates/demo.tmpl", { + url = local.url + contact = var.MAINTAINER_MAIL + }) + + index_page = templatefile("templates/index.tmpl", { + url = local.url + contact = var.MAINTAINER_MAIL + }) + + terratag_added_main = { "environment" = "mtchoun-mouh-master", "project" = "mtchoun-mouh" } + + # If your backend is not Terraform Cloud, the value is ${terraform.workspace} + # otherwise the value retrieved is that of the TFC_WORKSPACE_NAME with trimprefix + workspace = var.TFC_WORKSPACE_NAME != "" ? trimprefix("${var.TFC_WORKSPACE_NAME}", "mtchoun-mouh-") : "${terraform.workspace}" } resource "aws_s3_bucket" "images" { @@ -42,10 +59,8 @@ resource "aws_s3_bucket_public_access_block" "website" { depends_on = [aws_s3_bucket.website] } - resource "aws_s3_bucket_policy" "website" { bucket = aws_s3_bucket.website.id - policy = jsonencode({ Version = "2012-10-17" Statement = [ @@ -83,8 +98,8 @@ resource "aws_dynamodb_table" "Users" { resource "aws_dynamodb_table" "Link_table" { name = (terraform.workspace == "mtchoun-mouh-master") ? var.table_links : "${terraform.workspace}-${var.table_links}" - billing_mode = "PROVISIONED" - read_capacity = 1 + billing_mode = "PAY_PER_REQUEST" + read_capacity = 3 write_capacity = 1 hash_key = "link" @@ -153,7 +168,7 @@ resource "aws_lambda_permission" "apigw_lambda" { resource "aws_lambda_layer_version" "test_lambda_layer" { filename = "make_lamda_layer/python.zip" layer_name = "test_lambda_layer" - compatible_runtimes = ["python3.8", "python3.7"] + compatible_runtimes = ["python3.8"] } resource "aws_lambda_function" "lambda" { @@ -193,6 +208,8 @@ resource "aws_lambda_function" "scan" { source_code_hash = data.archive_file.lambda_zip.output_base64sha256 runtime = "python3.8" timeout = 900 + layers = [aws_lambda_layer_version.test_lambda_layer.arn] //lambda_layer here is the name + depends_on = [aws_lambda_layer_version.test_lambda_layer] environment { variables = { @@ -208,7 +225,6 @@ resource "aws_lambda_function" "scan" { } } - tags = local.terratag_added_main } @@ -287,7 +303,7 @@ resource "local_file" "index_page" { } // Terraform cloud have the file but the CI no so we upload it from terraform cloud -resource "aws_s3_bucket_object" "example_file" { +resource "aws_s3_bucket_object" "index_page" { bucket = aws_s3_bucket.website.id key = "index.html" source = "../html/index.html" @@ -296,11 +312,19 @@ resource "aws_s3_bucket_object" "example_file" { depends_on = [local_file.index_page] } +resource "aws_s3_bucket_object" "demo_page" { + bucket = aws_s3_bucket.website.id + key = "demo.html" + source = "../html/demo.html" + content_type = "text/html" + + depends_on = [local_file.demo_page] +} + # Inspired from https://frama.link/GFCHrjEL module "cors" { source = "squidfunk/api-gateway-enable-cors/aws" version = "0.3.3" - api_id = aws_api_gateway_rest_api.api.id api_resource_id = aws_api_gateway_resource.resource.id } @@ -325,7 +349,3 @@ resource "aws_lambda_permission" "allow_cloudwatch_to_call_check_foo" { principal = "events.amazonaws.com" source_arn = aws_cloudwatch_event_rule.scheduler.arn } - -locals { - terratag_added_main = { "environment" = "mtchoun-mouh-master", "project" = "mtchoun-mouh" } -} diff --git a/infra/templates/demo.tmpl b/infra/templates/demo.tmpl new file mode 100644 index 00000000..ff49913a --- /dev/null +++ b/infra/templates/demo.tmpl @@ -0,0 +1,175 @@ + + + + + + + + + + + + Mtchoun' mouh + + + + + + + +
+
+ Logo cameroun +
+

Si vous êtes là c'est que vous voulez une démonstration. Pas de soucis 🙂

+

Nous vous proposons donc de choisir un nom au hasard dans les communiqués de 2024. + Pour cela rien de plus simple: rendez vous juste sur la page du consulat + et entrez ci-dessous celui-ci choisi. +

+ NOTEZ BIEN + + + +
+ + + +

Retournez en page d'accueil

+ +
+ Logo cameroun +
+

© 2020 - Contact

+
+ + + + + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2b651d98..70072775 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,4 @@ selenium-wire==4.2.0 certifi>=2023.7.22 # not directly required, pinned by Snyk to avoid a vulnerability requests>=2.32.2 # not directly required, pinned by Snyk to avoid a vulnerability urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability -Werkzeug==3.0.3 -jinja2>=2.10,<3.0 # version added manualy to fix dependency-conflicts with diagrams +Werkzeug==2.1.2