From 3b0c22e4151dc0e6e704cebb901be6f994279ff9 Mon Sep 17 00:00:00 2001 From: Prasanjeet-Microsoft Date: Mon, 14 Oct 2024 12:52:49 +0530 Subject: [PATCH] feat: Implemented functionality to enable multilingual filename uploads in the Admin App. (#1404) --- code/backend/pages/01_Ingest_Data.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/code/backend/pages/01_Ingest_Data.py b/code/backend/pages/01_Ingest_Data.py index 7e7f76960..8f572a719 100644 --- a/code/backend/pages/01_Ingest_Data.py +++ b/code/backend/pages/01_Ingest_Data.py @@ -1,4 +1,5 @@ from os import path +import re import streamlit as st import traceback import requests @@ -56,6 +57,11 @@ def add_urls(): add_url_embeddings(urls) +def sanitize_metadata_value(value): + # Remove invalid characters + return re.sub(r"[^a-zA-Z0-9-_ .]", "?", value) + + def add_url_embeddings(urls: list[str]): params = {} if env_helper.FUNCTION_KEY is not None: @@ -89,7 +95,7 @@ def add_url_embeddings(urls: list[str]): for up in uploaded_files: # To read file as bytes: bytes_data = up.getvalue() - title = up.name.encode("latin-1", "replace").decode("latin-1") + title = sanitize_metadata_value(up.name) if st.session_state.get("filename", "") != up.name: # Upload a new file st.session_state["filename"] = up.name