Skip to content

Commit

Permalink
Explicitly encode output as UTF-8
Browse files Browse the repository at this point in the history
I think I've covered all the cases we need to, but there could be more.

This is needed as if a locale isn't set to UTF-8, an error will result
of this form:

  UnicodeEncodeError: 'ascii' codec can't encode character u'\u0113' in position 60: ordinal not in range(128)

While the ideal solution is for the user to set their locale to UTF-8,
it is better that we print debug output which may not be displayed
correctly than that we output a fatal (and non-obvious) error,
potentially some time into processing.

This also fixes some cases of implicitly combining str and *obj together
when printing debug output, which fails with some Python versions, by
explicitly using str.join(obj).
  • Loading branch information
nickjwhite committed Oct 24, 2017
1 parent ebd8235 commit 2b97ad9
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 6 deletions.
9 changes: 7 additions & 2 deletions ocropus-gpageseg
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import os
import os.path
import sys
import traceback
import codecs
from multiprocessing import Pool

import numpy as np
Expand All @@ -31,6 +32,10 @@ from ocrolib import psegutils,morph,sl
from ocrolib.exceptions import OcropusException
from ocrolib.toplevel import *

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)

parser = argparse.ArgumentParser(add_help=False)

# error checking
Expand Down Expand Up @@ -131,11 +136,11 @@ def check_page(image):


def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
print("INFO: ", " ".join(objs))


def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
print("ERROR: ", " ".join(objs), file=sys.stderr)


if len(args.files)<1:
Expand Down
9 changes: 7 additions & 2 deletions ocropus-nlbin
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import argparse
import os
import multiprocessing
import sys
import codecs

import numpy as np
import matplotlib.pyplot as plt
Expand All @@ -14,6 +15,10 @@ from scipy import stats

import ocrolib

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)


parser = argparse.ArgumentParser("""
Image binarization using non-linear processing.
Expand Down Expand Up @@ -51,11 +56,11 @@ if len(args.files)<1:


def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
print("INFO: ", " ".join(objs))


def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
print("ERROR: ", " ".join(objs), file=sys.stderr)


def check_page(image):
Expand Down
9 changes: 7 additions & 2 deletions ocropus-rpred
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import print_function

import sys
import traceback
import codecs
import os.path
Expand All @@ -19,6 +20,10 @@ from ocrolib import lstm
from ocrolib import edist
from ocrolib.exceptions import FileNotFound, OcropusException

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)

parser = argparse.ArgumentParser("apply an RNN recognizer")

# error checking
Expand Down Expand Up @@ -72,11 +77,11 @@ args = parser.parse_args()


def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
print("INFO: ", " ".join(objs))


def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
print("ERROR: ", " ".join(objs), file=sys.stderr)


def check_line(image):
Expand Down
4 changes: 4 additions & 0 deletions ocropus-rtrain
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import os.path
import traceback
import argparse
import sys
import codecs

import numpy as np
import matplotlib.pyplot as plt
Expand All @@ -16,6 +17,9 @@ import ocrolib
import ocrolib.lstm as lstm
from ocrolib import lineest

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)

np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')

parser = argparse.ArgumentParser("train an RNN recognizer")
Expand Down

0 comments on commit 2b97ad9

Please sign in to comment.