Skip to content

Commit

Permalink
Fix pdftext workers config
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Nov 21, 2024
1 parent e461cb6 commit fdb5564
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 10 deletions.
6 changes: 3 additions & 3 deletions marker_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["IN_STREAMLIT"] = "true"
os.environ["PDFTEXT_CPU_WORKERS"] = "1"

import base64
import io
Expand All @@ -25,8 +24,10 @@ def load_models():

def convert_pdf(fname: str, **kwargs) -> (str, Dict[str, Any], dict):
config_parser = ConfigParser(kwargs)
config_dict = config_parser.generate_config_dict()
config_dict["pdftext_workers"] = 1
converter = PdfConverter(
config=config_parser.generate_config_dict(),
config=config_dict,
artifact_dict=model_dict,
processor_list=config_parser.get_processors(),
renderer=config_parser.get_renderer()
Expand All @@ -51,7 +52,6 @@ def img_to_html(img, img_alt):
def markdown_insert_images(markdown, images):
image_tags = re.findall(r'(!\[(?P<image_title>[^\]]*)\]\((?P<image_path>[^\)"\s]+)\s*([^\)]*)\))', markdown)

print(image_tags)
for image in image_tags:
image_markdown = image[0]
image_alt = image[1]
Expand Down
9 changes: 3 additions & 6 deletions marker_server.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
import argparse
import os

import click

os.environ["PDFTEXT_CPU_WORKERS"] = "1"

import uvicorn
from pydantic import BaseModel, Field
from starlette.responses import HTMLResponse
Expand Down Expand Up @@ -83,8 +78,10 @@ async def convert_pdf(
try:
options = params.model_dump()
config_parser = ConfigParser(options)
config_dict = config_parser.generate_config_dict()
config_dict["pdftext_workers"] = 1
converter = PdfConverter(
config=config_parser.generate_config_dict(),
config=config_dict,
artifact_dict=app_data["models"],
processor_list=config_parser.get_processors(),
renderer=config_parser.get_renderer()
Expand Down
2 changes: 1 addition & 1 deletion run_marker_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def run():
cur_dir = os.path.dirname(os.path.abspath(__file__))
app_path = os.path.join(cur_dir, "marker_app.py")
cmd = ["streamlit", "run", app_path]
subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true", "PDFTEXT_CPU_WORKERS": "1"})
subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true"})


if __name__ == "__main__":
Expand Down

0 comments on commit fdb5564

Please sign in to comment.