diff --git a/marker_app.py b/marker_app.py index a302cf52..5f9624cf 100644 --- a/marker_app.py +++ b/marker_app.py @@ -2,7 +2,6 @@ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" os.environ["IN_STREAMLIT"] = "true" -os.environ["PDFTEXT_CPU_WORKERS"] = "1" import base64 import io @@ -25,8 +24,10 @@ def load_models(): def convert_pdf(fname: str, **kwargs) -> (str, Dict[str, Any], dict): config_parser = ConfigParser(kwargs) + config_dict = config_parser.generate_config_dict() + config_dict["pdftext_workers"] = 1 converter = PdfConverter( - config=config_parser.generate_config_dict(), + config=config_dict, artifact_dict=model_dict, processor_list=config_parser.get_processors(), renderer=config_parser.get_renderer() @@ -51,7 +52,6 @@ def img_to_html(img, img_alt): def markdown_insert_images(markdown, images): image_tags = re.findall(r'(!\[(?P[^\]]*)\]\((?P[^\)"\s]+)\s*([^\)]*)\))', markdown) - print(image_tags) for image in image_tags: image_markdown = image[0] image_alt = image[1] diff --git a/marker_server.py b/marker_server.py index d6e52278..af1031bb 100644 --- a/marker_server.py +++ b/marker_server.py @@ -1,10 +1,5 @@ -import argparse -import os - import click -os.environ["PDFTEXT_CPU_WORKERS"] = "1" - import uvicorn from pydantic import BaseModel, Field from starlette.responses import HTMLResponse @@ -83,8 +78,10 @@ async def convert_pdf( try: options = params.model_dump() config_parser = ConfigParser(options) + config_dict = config_parser.generate_config_dict() + config_dict["pdftext_workers"] = 1 converter = PdfConverter( - config=config_parser.generate_config_dict(), + config=config_dict, artifact_dict=app_data["models"], processor_list=config_parser.get_processors(), renderer=config_parser.get_renderer() diff --git a/run_marker_app.py b/run_marker_app.py index fe027a22..03f32d7f 100644 --- a/run_marker_app.py +++ b/run_marker_app.py @@ -6,7 +6,7 @@ def run(): cur_dir = os.path.dirname(os.path.abspath(__file__)) app_path = os.path.join(cur_dir, "marker_app.py") cmd = ["streamlit", "run", app_path] - subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true", "PDFTEXT_CPU_WORKERS": "1"}) + subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true"}) if __name__ == "__main__":