Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Remove tessdata directory and replace it by a submodule #2459

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "test"]
path = test
url = https://github.com/tesseract-ocr/test
[submodule "tessdata"]
path = tessdata
url = https://github.com/tesseract-ocr/tessconfigs
34 changes: 32 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
## run autogen.sh to create Makefile.in from this file

# Default location for tessdata directory.
# It can be overriden with configure option --datarootdir=DIR.
datadir = @datarootdir@/tessdata

ACLOCAL_AMFLAGS = -I m4

if ENABLE_TRAINING
Expand All @@ -18,17 +23,34 @@ training:
@echo "Need to reconfigure project, so there are no errors"
endif

.PHONY: doc install-langs ScrollView.jar install-jars training
.PHONY: doc install-langs ScrollView.jar install-jars install-tessdata training

SUBDIRS = src/arch src/ccutil src/viewer src/cutil src/opencl src/ccstruct
SUBDIRS += src/dict src/classify src/wordrec src/textord src/lstm
SUBDIRS += src/ccmain src/api . tessdata doc unittest
SUBDIRS += src/ccmain src/api . doc unittest

EXTRA_DIST = README.md LICENSE
EXTRA_DIST += aclocal.m4 config configure.ac autogen.sh
EXTRA_DIST += tesseract.pc.in $(TRAINING_SUBDIR) java doc
EXTRA_DIST += CMakeLists.txt tesseract.pc.cmake cmake VERSION src/vs2010 cppan.yml

# Files for tessdata.
TESSDATA_FILES = $(top_srcdir)/tessdata/pdf.ttf

# Files for tessdata/configs.
CONFIG_FILES = tessdata/pdf.ttf
CONFIG_FILES = inter makebox box.train unlv ambigs.train lstm.train lstmdebug
CONFIG_FILES += api_config kannada box.train.stderr quiet logfile digits get.images
CONFIG_FILES += lstmbox wordstrbox
# Configurations for OCR output.
CONFIG_FILES += alto hocr pdf tsv txt
CONFIG_FILES += linebox rebox strokewidth bigram

# Files for tessdata/tessconfigs.
TESSDATA_CONFIG_FILES = ${CONFIG_FILES:%=$(top_srcdir)/tessdata/configs/%}
TESSCONFIG_FILES = batch batch.nochop nobatch matdemo segdemo msdemo
TESSDATA_TESSCONFIG_FILES = ${TESSCONFIG_FILES:%=$(top_srcdir)/tessdata/tessconfigs/%}

DIST_SUBDIRS = $(SUBDIRS) $(TRAINING_SUBDIR)

uninstall-hook:
Expand All @@ -50,6 +72,14 @@ ScrollView.jar:
install-jars:
@cd "$(top_builddir)/java" && $(MAKE) $@

install-tessdata:
mkdir -p $(DESTDIR)$(datadir)
$(INSTALL) -m 644 $(TESSDATA_FILES) $(DESTDIR)$(datadir)
mkdir -p $(DESTDIR)$(datadir)/configs
$(INSTALL) -m 644 $(TESSDATA_CONFIG_FILES) $(DESTDIR)$(datadir)/configs
mkdir -p $(DESTDIR)$(datadir)/tessconfigs
$(INSTALL) -m 644 $(TESSDATA_TESSCONFIG_FILES) $(DESTDIR)$(datadir)/tessconfigs

doc:
-srcdir="$(top_srcdir)" builddir="$(top_builddir)" \
version="@PACKAGE_VERSION@" name="@PACKAGE_NAME@" \
Expand Down
3 changes: 0 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,6 @@ AC_CONFIG_FILES([src/lstm/Makefile])
AC_CONFIG_FILES([src/textord/Makefile])
AC_CONFIG_FILES([src/viewer/Makefile])
AC_CONFIG_FILES([src/wordrec/Makefile])
AC_CONFIG_FILES([tessdata/Makefile])
AC_CONFIG_FILES([tessdata/configs/Makefile])
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])
AC_CONFIG_FILES([unittest/Makefile])
AC_CONFIG_FILES([java/Makefile])
AC_CONFIG_FILES([java/com/Makefile])
Expand Down
1 change: 1 addition & 0 deletions tessdata
Submodule tessdata added at 3c86eb
12 changes: 0 additions & 12 deletions tessdata/Makefile.am

This file was deleted.

8 changes: 0 additions & 8 deletions tessdata/configs/Makefile.am

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/alto

This file was deleted.

7 changes: 0 additions & 7 deletions tessdata/configs/ambigs.train

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/api_config

This file was deleted.

4 changes: 0 additions & 4 deletions tessdata/configs/bazaar

This file was deleted.

5 changes: 0 additions & 5 deletions tessdata/configs/bigram

This file was deleted.

13 changes: 0 additions & 13 deletions tessdata/configs/box.train

This file was deleted.

14 changes: 0 additions & 14 deletions tessdata/configs/box.train.stderr

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/digits

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/get.images

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/configs/hocr

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/configs/inter

This file was deleted.

4 changes: 0 additions & 4 deletions tessdata/configs/kannada

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/configs/linebox

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/logfile

This file was deleted.

12 changes: 0 additions & 12 deletions tessdata/configs/lstm.train

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/lstmbox

This file was deleted.

4 changes: 0 additions & 4 deletions tessdata/configs/lstmdebug

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/makebox

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/pdf

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/quiet

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/configs/rebox

This file was deleted.

12 changes: 0 additions & 12 deletions tessdata/configs/strokewidth

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/tsv

This file was deleted.

3 changes: 0 additions & 3 deletions tessdata/configs/txt

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/configs/unlv

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/configs/wordstrbox

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/eng.user-patterns

This file was deleted.

5 changes: 0 additions & 5 deletions tessdata/eng.user-words

This file was deleted.

Binary file removed tessdata/pdf.ttf
Binary file not shown.
3 changes: 0 additions & 3 deletions tessdata/tessconfigs/Makefile.am

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/tessconfigs/batch

This file was deleted.

2 changes: 0 additions & 2 deletions tessdata/tessconfigs/batch.nochop

This file was deleted.

7 changes: 0 additions & 7 deletions tessdata/tessconfigs/matdemo

This file was deleted.

13 changes: 0 additions & 13 deletions tessdata/tessconfigs/msdemo

This file was deleted.

1 change: 0 additions & 1 deletion tessdata/tessconfigs/nobatch

This file was deleted.

10 changes: 0 additions & 10 deletions tessdata/tessconfigs/segdemo

This file was deleted.