-
Notifications
You must be signed in to change notification settings - Fork 1.1k
39 lines (33 loc) · 1.29 KB
/
tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
name: Integration test with benchmark
on: [push]
env:
TESSDATA_PREFIX: "/usr/share/tesseract-ocr/4.00/tessdata"
TORCH_DEVICE: "cpu"
OCR_ENGINE: "tesseract" # So we don't have to install ghostscript, which takes a long time
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install system dependencies
run: cat scripts/install/apt-requirements.txt | xargs sudo apt-get install -y
- name: Show tessdata folders
run: ls /usr/share/tesseract-ocr/
- name: Install python dependencies
run: |
pip install poetry
poetry install
poetry remove torch
poetry run pip install torch --index-url https://download.pytorch.org/whl/cpu
- name: Download benchmark data
run: |
wget -O benchmark_data.zip "https://drive.google.com/uc?export=download&id=1ktVDYPEeyHlKLaF56FnHjI5VjVnYa1xL"
unzip benchmark_data.zip
- name: Run benchmark test
run: |
poetry run python benchmark.py benchmark_data/pdfs benchmark_data/references report.json
poetry run python scripts/verify_benchmark_scores.py report.json