tesseract-ocr · JKamlah · Apr 4, 2022 · Apr 20, 2022 · Apr 25, 2022 · Jul 1, 2022
diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h
@@ -326,6 +326,17 @@ class TESS_API TessBaseAPI {
    */
   void SetImage(Pix *pix);
 
+  /**
+   * Preprocessing the InputImage 
+   * Grayscale normalizatin based on nlbin (Thomas Breuel)
+   * Current modes: 
+   *  - 0 = No normalization
+   *  - 1 = Thresholding+Recognition
+   *  - 2 = Thresholding
+   *  - 3 = Recognition
+   */
+  bool NormalizeImage(int mode);
+
   /**
    * Set the resolution of the source image in pixels per inch so font size
    * information can be calculated in results.  Call this after SetImage().

diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
@@ -925,6 +925,25 @@ Pix *TessBaseAPI::GetInputImage() {
   return tesseract_->pix_original();
 }
 
+// Grayscale normalization (preprocessing)
+bool TessBaseAPI::NormalizeImage(int mode){
+  if (!GetInputImage()){
+    tprintf("Please use SetImage before applying the image pre-processing steps.");
+    return false;
+  }
+  if (mode == 1) {
+    SetInputImage(thresholder_->GetPixNormRectGrey());
+    thresholder_->SetImage(GetInputImage());
+  } else if (mode == 2) {
+    thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
+  } else if (mode == 3) {
+    SetInputImage(thresholder_->GetPixNormRectGrey());
+  } else {
+    return false;
+  }
+  return true;
+}
+
 const char *TessBaseAPI::GetInputName() {
   if (!input_file_.empty()) {
     return input_file_.c_str();
@@ -1253,8 +1272,31 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
 bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
                               const char *retry_config, int timeout_millisec,
                               TessResultRenderer *renderer) {
+
   SetInputName(filename);
+
   SetImage(pix);
+
+  // Image preprocessing on image
+  // Grayscale normalization
+  int graynorm_mode;
+  GetIntVariable("preprocess_graynorm_mode", &graynorm_mode);
+  if (graynorm_mode > 0 && NormalizeImage(graynorm_mode) && tesseract_->tessedit_write_images) {
+    // Write normalized image 
+    std::string output_filename = output_file_ + ".preprocessed";
+    if (page_index > 0) {
+      output_filename += std::to_string(page_index);
+    }
+    output_filename += ".tif";
+    if (graynorm_mode == 2) {
+      pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4);
+    } else {
+      pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4);
+    }
+  }
+
+  // Recognition
+
   bool failed = false;
 
   if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
@@ -1301,6 +1343,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
     // Switch to alternate mode for retry.
     ReadConfigFile(retry_config);
     SetImage(pix);
+
+    // Apply image preprocessing
+    NormalizeImage(graynorm_mode);
+
+    //if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
     Recognize(nullptr);
     // Restore saved config variables.
     ReadConfigFile(kOldVarsFile);
@@ -1309,7 +1356,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
   if (renderer && !failed) {
     failed = !renderer->AddImage(this);
   }
-
+  //pixDestroy(&pixs);
   return !failed;
 }
 

diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp
@@ -75,6 +75,11 @@ Tesseract::Tesseract()
                "11=sparse_text, 12=sparse_text+osd, 13=raw_line"
                " (Values from PageSegMode enum in tesseract/publictypes.h)",
                this->params())
+    , INT_MEMBER(preprocess_graynorm_mode, 0, 
+                "Grayscale normalization mode: 0=no normalization, 1=tresholding+recognition, "
+                "2=tresholding_only, 3=recognition_only "
+                "The modes 1–3 are applied on the fullimage", 
+                this->params())
     , INT_MEMBER(thresholding_method,
                  static_cast<int>(ThresholdMethod::Otsu),
                  "Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = "

diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h
@@ -757,6 +757,7 @@ class TESS_API Tesseract : public Wordrec {
   BOOL_VAR_H(tessedit_dump_pageseg_images);
   BOOL_VAR_H(tessedit_do_invert);
   INT_VAR_H(tessedit_pageseg_mode);
+  INT_VAR_H(preprocess_graynorm_mode);
   INT_VAR_H(thresholding_method);
   BOOL_VAR_H(thresholding_debug);
   double_VAR_H(thresholding_window_size);

diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp
@@ -187,6 +187,103 @@ void ImageThresholder::SetImage(const Image pix) {
   Init();
 }
 
+/*----------------------------------------------------------------------*
+ *                  Non-linear contrast normalization                   *
+ *----------------------------------------------------------------------*/
+/*!
+ * \brief   pixNLNorm()
+ *
+ * \param[in]    pixs          8 or 32 bpp
+ * \param[out]   ptresh        l_int32 global threshold value
+ * \return       pixd          8 bpp grayscale, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This composite operation is good for adaptively removing
+ *          dark background. Adaption of Thomas Breuel's nlbin version
+ *          from ocropus.
+ *      (2) A good thresholder together NLNorm is WAN
+ * </pre>
+ */
+Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) {
+  l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval;
+  l_uint32 black_val, white_val;
+  l_float32 factor, threshpos, avefg, avebg;
+  PIX *pixg, *pixd, *pixd2;
+  BOX *pixbox;
+  NUMA *na;
+
+  PROCNAME("pixNLNorm");
+
+  if (!pixs || (d = pixGetDepth(pixs)) < 8) {
+    return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL);
+  }
+  if (d == 32) {
+    // ITU-R 601-2 luma
+    pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114);
+    // Legacy converting
+    // pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3);
+  } else {
+    pixg = pixConvertTo8(pixs, 0);
+  }
+
+  /// Normalize contrast
+  //  pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val);
+  //  if (black_val>0) pixAddConstantGray(pixg, -1 * black_val);
+  //  pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val);
+  //  if (white_val<255) pixMultConstantGray(pixg, (255. / white_val));
+  pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE);
+  pixDestroy(&pixg);
+  pixg = pixCopy(nullptr, pixd);
+  pixDestroy(&pixd);
+
+  /// Calculate flat version
+  pixGetDimensions(pixg, &w1, &h1, NULL);
+  pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0);
+  pixd2 = pixRankFilter(pixd, 20, 2, 0.8);
+  pixDestroy(&pixd);
+  pixd = pixRankFilter(pixd2, 2, 20, 0.8);
+  pixDestroy(&pixd2);
+  pixGetDimensions(pixd, &w2, &h2, NULL);
+  pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2,
+                         (l_float32)h1 / (l_float32)h2);
+  pixDestroy(&pixd);
+  pixInvert(pixd2, pixd2);
+  pixAddGray(pixg, pixg, pixd2);
+  pixDestroy(&pixd2);
+
+  /// Local contrast enhancement
+  //  Ignore a border of 10 % and get a mean threshold,
+  //  background and foreground value
+  pixbox = boxCreate(w1 * 0.1, h1 * 0.1, w1 * 0.9, h1 * 0.9);
+  na = pixGetGrayHistogramInRect(pixg, pixbox, 1);
+  numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL);
+  boxDestroy(&pixbox);
+  numaDestroy(&na);
+
+  /// Subtract by a foreground value and multiply by factor to
+  //  set a background value to 255
+  fgval = (l_int32)(avefg + 0.5);
+  bgval = (l_int32)(avebg + 0.5);
+  threshpos = (l_float32)(thresh - fgval) / (bgval - fgval);
+  // Todo: fgval or fgval + slightly offset
+  fgval = fgval; // + (l_int32) ((thresh - fgval)*.25);
+  bgval = bgval +
+          (l_int32)std::min((l_int32)((bgval - thresh) * .5), (255 - bgval));
+  factor = 255. / (bgval - fgval);
+  if (pthresh) {
+    *pthresh = (l_int32)threshpos * factor - threshpos * .1;
+  }
+  pixAddConstantGray(pixg, -1 * fgval);
+  pixMultConstantGray(pixg, factor);
+
+  return pixg;
+}
+
+/*----------------------------------------------------------------------*
+ *                          Thresholding                                *
+ *----------------------------------------------------------------------*/
+
 std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
                                                       TessBaseAPI *api,
                                                       ThresholdMethod method) {
@@ -207,7 +304,7 @@ std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
   int r;
 
   l_int32 pix_w, pix_h;
-  pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
+  pixGetDimensions(pix_, &pix_w, &pix_h, nullptr);
 
   bool thresholding_debug;
   api->GetBoolVariable("thresholding_debug", &thresholding_debug);
@@ -370,6 +467,17 @@ Image ImageThresholder::GetPixRectGrey() {
   return pix;
 }
 
+// Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
+// and at the same resolution as the output binary.
+// The returned Pix must be pixDestroyed.
+// Provided to the classifier to extract features from the greyscale image.
+Image ImageThresholder::GetPixNormRectGrey() {
+  auto pix = GetPixRect();
+  auto result = ImageThresholder::pixNLNorm(pix, nullptr);
+  pix.destroy();
+  return result;
+}
+
 // Otsu thresholds the rectangle, taking the rectangle from *this.
 void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
   std::vector<int> thresholds;

diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h
@@ -154,6 +154,12 @@ class TESS_API ImageThresholder {
   // Provided to the classifier to extract features from the greyscale image.
   virtual Image GetPixRectGrey();
 
+  // Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
+  // and at the same resolution as the output binary.
+  // The returned Pix must be pixDestroyed.
+  // Provided to the classifier to extract features from the greyscale image.
+  virtual Image GetPixNormRectGrey();
+
 protected:
   // ----------------------------------------------------------------------
   // Utility functions that may be useful components for other thresholders.
@@ -170,6 +176,9 @@ class TESS_API ImageThresholder {
   // Otsu thresholds the rectangle, taking the rectangle from *this.
   void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;
 
+  // Return non-linear normalized grayscale
+  Pix *pixNLNorm(Pix *pixs, int *pthresh);
+
   /// Threshold the rectangle, taking everything except the src_pix
   /// from the class, using thresholds/hi_values to the output pix.
   /// NOTE that num_channels is the size of the thresholds and hi_values