From b372dfba4ebe3e0d71ad313e757a4c93dbdf890d Mon Sep 17 00:00:00 2001 From: Ryan Scott Brown Date: Sat, 27 Jan 2024 09:32:21 -0500 Subject: [PATCH] Move benchmark.py -> marker_benchmark.py Marker produces the same top-level `benchmark.py` as one of its deps, `texify`. This PR adds a `marker_` prefix to avoid the collision. Below, see an exerpted message from my install process with the conflicting versions. ``` .../lib/python3.10/site-packages/benchmark.py was provided by: ...marker_pdf-0.1.3-py3-none-any.whl/benchmark.py ...texify-0.1.8-py3-none-any.whl/benchmark.py ``` --- README.md | 4 ++-- benchmark.py => marker_benchmark.py | 0 pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename benchmark.py => marker_benchmark.py (100%) diff --git a/README.md b/README.md index 67456b45..f969c278 100644 --- a/README.md +++ b/README.md @@ -185,10 +185,10 @@ Marker takes about 2GB of VRAM on average per task, so you can convert 24 docume You can benchmark the performance of marker on your machine. First, download the benchmark data [here](https://drive.google.com/file/d/1WiN4K2-jQfwyQMe4wSSurbpz3hxo2fG9/view?usp=drive_link) and unzip. -Then run `benchmark.py` like this: +Then run `marker_benchmark.py` like this: ``` -python benchmark.py data/pdfs data/references report.json --nougat +python marker_benchmark.py data/pdfs data/references report.json --nougat ``` This will benchmark marker against other text extraction methods. It sets up batch sizes for nougat and marker to use a similar amount of GPU RAM for each. diff --git a/benchmark.py b/marker_benchmark.py similarity index 100% rename from benchmark.py rename to marker_benchmark.py diff --git a/pyproject.toml b/pyproject.toml index fe96fd38..1d3f5754 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ include = [ "convert.py", "convert_single.py", "chunk_convert.sh", - "benchmark.py", + "marker_benchmark.py", "chunk_convert.py", ]